Moved temp variables inside loops
This commit is contained in:
@ -1643,75 +1643,52 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
|
||||
int ii=tid+BLOCK_ID_X*BLOCK_SIZE_X;
|
||||
|
||||
if (ii<inum) {
|
||||
numtyp4 ix; fetch4(ix,ii,pos_tex); //x_[i];
|
||||
//numtyp4 ix; fetch4(ix,ii,pos_tex); //x_[i];
|
||||
acctyp fdip_buf[32];
|
||||
|
||||
int j,k,m;
|
||||
numtyp v0,v1,v2,v3;
|
||||
numtyp u0,u1,u2,u3;
|
||||
numtyp t0,t1,t2,t3;
|
||||
numtyp t0_1,t0_2,t1_1,t1_2;
|
||||
numtyp t2_1,t2_2,tq_1,tq_2;
|
||||
numtyp tu00,tu10,tu01,tu20,tu11;
|
||||
numtyp tu02,tu30,tu21,tu12,tu03;
|
||||
numtyp tu00_1,tu01_1,tu10_1;
|
||||
numtyp tu00_2,tu01_2,tu10_2;
|
||||
numtyp tu20_1,tu11_1,tu02_1;
|
||||
numtyp tu20_2,tu11_2,tu02_2;
|
||||
numtyp tuv100_1,tuv010_1,tuv001_1;
|
||||
numtyp tuv100_2,tuv010_2,tuv001_2;
|
||||
numtyp tuv200_1,tuv020_1,tuv002_1;
|
||||
numtyp tuv110_1,tuv101_1,tuv011_1;
|
||||
numtyp tuv200_2,tuv020_2,tuv002_2;
|
||||
numtyp tuv110_2,tuv101_2,tuv011_2;
|
||||
numtyp tuv000,tuv100,tuv010,tuv001;
|
||||
numtyp tuv200,tuv020,tuv002,tuv110;
|
||||
numtyp tuv101,tuv011,tuv300,tuv030;
|
||||
numtyp tuv003,tuv210,tuv201,tuv120;
|
||||
numtyp tuv021,tuv102,tuv012,tuv111;
|
||||
|
||||
int j,k,m;
|
||||
int nlpts = (bsorder-1) / 2;
|
||||
|
||||
// extract the permanent multipole field at each site
|
||||
|
||||
tuv100_1 = (numtyp)0.0;
|
||||
tuv010_1 = (numtyp)0.0;
|
||||
tuv001_1 = (numtyp)0.0;
|
||||
tuv200_1 = (numtyp)0.0;
|
||||
tuv020_1 = (numtyp)0.0;
|
||||
tuv002_1 = (numtyp)0.0;
|
||||
tuv110_1 = (numtyp)0.0;
|
||||
tuv101_1 = (numtyp)0.0;
|
||||
tuv011_1 = (numtyp)0.0;
|
||||
tuv100_2 = (numtyp)0.0;
|
||||
tuv010_2 = (numtyp)0.0;
|
||||
tuv001_2 = (numtyp)0.0;
|
||||
tuv200_2 = (numtyp)0.0;
|
||||
tuv020_2 = (numtyp)0.0;
|
||||
tuv002_2 = (numtyp)0.0;
|
||||
tuv110_2 = (numtyp)0.0;
|
||||
tuv101_2 = (numtyp)0.0;
|
||||
tuv011_2 = (numtyp)0.0;
|
||||
tuv000 = (numtyp)0.0;
|
||||
tuv001 = (numtyp)0.0;
|
||||
tuv010 = (numtyp)0.0;
|
||||
tuv100 = (numtyp)0.0;
|
||||
tuv200 = (numtyp)0.0;
|
||||
tuv020 = (numtyp)0.0;
|
||||
tuv002 = (numtyp)0.0;
|
||||
tuv110 = (numtyp)0.0;
|
||||
tuv101 = (numtyp)0.0;
|
||||
tuv011 = (numtyp)0.0;
|
||||
tuv300 = (numtyp)0.0;
|
||||
tuv030 = (numtyp)0.0;
|
||||
tuv003 = (numtyp)0.0;
|
||||
tuv210 = (numtyp)0.0;
|
||||
tuv201 = (numtyp)0.0;
|
||||
tuv120 = (numtyp)0.0;
|
||||
tuv021 = (numtyp)0.0;
|
||||
tuv102 = (numtyp)0.0;
|
||||
tuv012 = (numtyp)0.0;
|
||||
tuv111 = (numtyp)0.0;
|
||||
numtyp tuv100_1 = (numtyp)0.0;
|
||||
numtyp tuv010_1 = (numtyp)0.0;
|
||||
numtyp tuv001_1 = (numtyp)0.0;
|
||||
numtyp tuv200_1 = (numtyp)0.0;
|
||||
numtyp tuv020_1 = (numtyp)0.0;
|
||||
numtyp tuv002_1 = (numtyp)0.0;
|
||||
numtyp tuv110_1 = (numtyp)0.0;
|
||||
numtyp tuv101_1 = (numtyp)0.0;
|
||||
numtyp tuv011_1 = (numtyp)0.0;
|
||||
numtyp tuv100_2 = (numtyp)0.0;
|
||||
numtyp tuv010_2 = (numtyp)0.0;
|
||||
numtyp tuv001_2 = (numtyp)0.0;
|
||||
numtyp tuv200_2 = (numtyp)0.0;
|
||||
numtyp tuv020_2 = (numtyp)0.0;
|
||||
numtyp tuv002_2 = (numtyp)0.0;
|
||||
numtyp tuv110_2 = (numtyp)0.0;
|
||||
numtyp tuv101_2 = (numtyp)0.0;
|
||||
numtyp tuv011_2 = (numtyp)0.0;
|
||||
numtyp tuv000 = (numtyp)0.0;
|
||||
numtyp tuv001 = (numtyp)0.0;
|
||||
numtyp tuv010 = (numtyp)0.0;
|
||||
numtyp tuv100 = (numtyp)0.0;
|
||||
numtyp tuv200 = (numtyp)0.0;
|
||||
numtyp tuv020 = (numtyp)0.0;
|
||||
numtyp tuv002 = (numtyp)0.0;
|
||||
numtyp tuv110 = (numtyp)0.0;
|
||||
numtyp tuv101 = (numtyp)0.0;
|
||||
numtyp tuv011 = (numtyp)0.0;
|
||||
numtyp tuv300 = (numtyp)0.0;
|
||||
numtyp tuv030 = (numtyp)0.0;
|
||||
numtyp tuv003 = (numtyp)0.0;
|
||||
numtyp tuv210 = (numtyp)0.0;
|
||||
numtyp tuv201 = (numtyp)0.0;
|
||||
numtyp tuv120 = (numtyp)0.0;
|
||||
numtyp tuv021 = (numtyp)0.0;
|
||||
numtyp tuv102 = (numtyp)0.0;
|
||||
numtyp tuv012 = (numtyp)0.0;
|
||||
numtyp tuv111 = (numtyp)0.0;
|
||||
|
||||
k = igrid[4*ii+2] - nzlo_out - nlpts;
|
||||
for (int kb = 0; kb < bsorder; kb++) {
|
||||
@ -1722,32 +1699,32 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
|
||||
v3 = thetai3[m][kb][3];
|
||||
*/
|
||||
int i3 = ii*4*bsorder + 4*kb;
|
||||
v0 = thetai3[i3];
|
||||
v1 = thetai3[i3+1];
|
||||
v2 = thetai3[i3+2];
|
||||
v3 = thetai3[i3+3];
|
||||
tu00_1 = (numtyp)0.0;
|
||||
tu01_1 = (numtyp)0.0;
|
||||
tu10_1 = (numtyp)0.0;
|
||||
tu20_1 = (numtyp)0.0;
|
||||
tu11_1 = (numtyp)0.0;
|
||||
tu02_1 = (numtyp)0.0;
|
||||
tu00_2 = (numtyp)0.0;
|
||||
tu01_2 = (numtyp)0.0;
|
||||
tu10_2 = (numtyp)0.0;
|
||||
tu20_2 = (numtyp)0.0;
|
||||
tu11_2 = (numtyp)0.0;
|
||||
tu02_2 = (numtyp)0.0;
|
||||
tu00 = (numtyp)0.0;
|
||||
tu10 = (numtyp)0.0;
|
||||
tu01 = (numtyp)0.0;
|
||||
tu20 = (numtyp)0.0;
|
||||
tu11 = (numtyp)0.0;
|
||||
tu02 = (numtyp)0.0;
|
||||
tu30 = (numtyp)0.0;
|
||||
tu21 = (numtyp)0.0;
|
||||
tu12 = (numtyp)0.0;
|
||||
tu03 = (numtyp)0.0;
|
||||
numtyp v0 = thetai3[i3];
|
||||
numtyp v1 = thetai3[i3+1];
|
||||
numtyp v2 = thetai3[i3+2];
|
||||
numtyp v3 = thetai3[i3+3];
|
||||
numtyp tu00_1 = (numtyp)0.0;
|
||||
numtyp tu01_1 = (numtyp)0.0;
|
||||
numtyp tu10_1 = (numtyp)0.0;
|
||||
numtyp tu20_1 = (numtyp)0.0;
|
||||
numtyp tu11_1 = (numtyp)0.0;
|
||||
numtyp tu02_1 = (numtyp)0.0;
|
||||
numtyp tu00_2 = (numtyp)0.0;
|
||||
numtyp tu01_2 = (numtyp)0.0;
|
||||
numtyp tu10_2 = (numtyp)0.0;
|
||||
numtyp tu20_2 = (numtyp)0.0;
|
||||
numtyp tu11_2 = (numtyp)0.0;
|
||||
numtyp tu02_2 = (numtyp)0.0;
|
||||
numtyp tu00 = (numtyp)0.0;
|
||||
numtyp tu10 = (numtyp)0.0;
|
||||
numtyp tu01 = (numtyp)0.0;
|
||||
numtyp tu20 = (numtyp)0.0;
|
||||
numtyp tu11 = (numtyp)0.0;
|
||||
numtyp tu02 = (numtyp)0.0;
|
||||
numtyp tu30 = (numtyp)0.0;
|
||||
numtyp tu21 = (numtyp)0.0;
|
||||
numtyp tu12 = (numtyp)0.0;
|
||||
numtyp tu03 = (numtyp)0.0;
|
||||
|
||||
j = igrid[4*ii+1] - nylo_out - nlpts;
|
||||
for (int jb = 0; jb < bsorder; jb++) {
|
||||
@ -1758,17 +1735,17 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
|
||||
u3 = thetai2[m][jb][3];
|
||||
*/
|
||||
int i2 = ii*4*bsorder+4*jb;
|
||||
u0 = thetai2[i2];
|
||||
u1 = thetai2[i2+1];
|
||||
u2 = thetai2[i2+2];
|
||||
u3 = thetai2[i2+3];
|
||||
t0_1 = (numtyp)0.0;
|
||||
t1_1 = (numtyp)0.0;
|
||||
t2_1 = (numtyp)0.0;
|
||||
t0_2 = (numtyp)0.0;
|
||||
t1_2 = (numtyp)0.0;
|
||||
t2_2 = (numtyp)0.0;
|
||||
t3 = (numtyp)0.0;
|
||||
numtyp u0 = thetai2[i2];
|
||||
numtyp u1 = thetai2[i2+1];
|
||||
numtyp u2 = thetai2[i2+2];
|
||||
numtyp u3 = thetai2[i2+3];
|
||||
numtyp t0_1 = (numtyp)0.0;
|
||||
numtyp t1_1 = (numtyp)0.0;
|
||||
numtyp t2_1 = (numtyp)0.0;
|
||||
numtyp t0_2 = (numtyp)0.0;
|
||||
numtyp t1_2 = (numtyp)0.0;
|
||||
numtyp t2_2 = (numtyp)0.0;
|
||||
numtyp t3 = (numtyp)0.0;
|
||||
|
||||
int i = igrid[4*ii] - nxlo_out - nlpts;
|
||||
for (int ib = 0; ib < bsorder; ib++) {
|
||||
@ -1789,8 +1766,8 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
|
||||
numtyp w2 = thetai1[i1+2];
|
||||
numtyp w3 = thetai1[i1+3];
|
||||
int gidx = 2*(k*ngridxy + j*ngridx + i);
|
||||
tq_1 = grid[gidx];
|
||||
tq_2 = grid[gidx+1];
|
||||
numtyp tq_1 = grid[gidx];
|
||||
numtyp tq_2 = grid[gidx+1];
|
||||
t0_1 += tq_1*w0;
|
||||
t1_1 += tq_1*w1;
|
||||
t2_1 += tq_1*w2;
|
||||
@ -1813,9 +1790,9 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
|
||||
tu20_2 += t2_2*u0;
|
||||
tu11_2 += t1_2*u1;
|
||||
tu02_2 += t0_2*u2;
|
||||
t0 = t0_1 + t0_2;
|
||||
t1 = t1_1 + t1_2;
|
||||
t2 = t2_1 + t2_2;
|
||||
numtyp t0 = t0_1 + t0_2;
|
||||
numtyp t1 = t1_1 + t1_2;
|
||||
numtyp t2 = t2_1 + t2_2;
|
||||
tu00 += t0*u0;
|
||||
tu10 += t1*u0;
|
||||
tu01 += t0*u1;
|
||||
|
||||
Reference in New Issue
Block a user