Moved temp variables inside loops

This commit is contained in:
Trung Nguyen
2022-09-10 02:45:06 -05:00
parent 363b6c51d0
commit 5e59c95be4

View File

@ -1643,75 +1643,52 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
int ii=tid+BLOCK_ID_X*BLOCK_SIZE_X;
if (ii<inum) {
numtyp4 ix; fetch4(ix,ii,pos_tex); //x_[i];
//numtyp4 ix; fetch4(ix,ii,pos_tex); //x_[i];
acctyp fdip_buf[32];
int j,k,m;
numtyp v0,v1,v2,v3;
numtyp u0,u1,u2,u3;
numtyp t0,t1,t2,t3;
numtyp t0_1,t0_2,t1_1,t1_2;
numtyp t2_1,t2_2,tq_1,tq_2;
numtyp tu00,tu10,tu01,tu20,tu11;
numtyp tu02,tu30,tu21,tu12,tu03;
numtyp tu00_1,tu01_1,tu10_1;
numtyp tu00_2,tu01_2,tu10_2;
numtyp tu20_1,tu11_1,tu02_1;
numtyp tu20_2,tu11_2,tu02_2;
numtyp tuv100_1,tuv010_1,tuv001_1;
numtyp tuv100_2,tuv010_2,tuv001_2;
numtyp tuv200_1,tuv020_1,tuv002_1;
numtyp tuv110_1,tuv101_1,tuv011_1;
numtyp tuv200_2,tuv020_2,tuv002_2;
numtyp tuv110_2,tuv101_2,tuv011_2;
numtyp tuv000,tuv100,tuv010,tuv001;
numtyp tuv200,tuv020,tuv002,tuv110;
numtyp tuv101,tuv011,tuv300,tuv030;
numtyp tuv003,tuv210,tuv201,tuv120;
numtyp tuv021,tuv102,tuv012,tuv111;
int j,k,m;
int nlpts = (bsorder-1) / 2;
// extract the permanent multipole field at each site
tuv100_1 = (numtyp)0.0;
tuv010_1 = (numtyp)0.0;
tuv001_1 = (numtyp)0.0;
tuv200_1 = (numtyp)0.0;
tuv020_1 = (numtyp)0.0;
tuv002_1 = (numtyp)0.0;
tuv110_1 = (numtyp)0.0;
tuv101_1 = (numtyp)0.0;
tuv011_1 = (numtyp)0.0;
tuv100_2 = (numtyp)0.0;
tuv010_2 = (numtyp)0.0;
tuv001_2 = (numtyp)0.0;
tuv200_2 = (numtyp)0.0;
tuv020_2 = (numtyp)0.0;
tuv002_2 = (numtyp)0.0;
tuv110_2 = (numtyp)0.0;
tuv101_2 = (numtyp)0.0;
tuv011_2 = (numtyp)0.0;
tuv000 = (numtyp)0.0;
tuv001 = (numtyp)0.0;
tuv010 = (numtyp)0.0;
tuv100 = (numtyp)0.0;
tuv200 = (numtyp)0.0;
tuv020 = (numtyp)0.0;
tuv002 = (numtyp)0.0;
tuv110 = (numtyp)0.0;
tuv101 = (numtyp)0.0;
tuv011 = (numtyp)0.0;
tuv300 = (numtyp)0.0;
tuv030 = (numtyp)0.0;
tuv003 = (numtyp)0.0;
tuv210 = (numtyp)0.0;
tuv201 = (numtyp)0.0;
tuv120 = (numtyp)0.0;
tuv021 = (numtyp)0.0;
tuv102 = (numtyp)0.0;
tuv012 = (numtyp)0.0;
tuv111 = (numtyp)0.0;
numtyp tuv100_1 = (numtyp)0.0;
numtyp tuv010_1 = (numtyp)0.0;
numtyp tuv001_1 = (numtyp)0.0;
numtyp tuv200_1 = (numtyp)0.0;
numtyp tuv020_1 = (numtyp)0.0;
numtyp tuv002_1 = (numtyp)0.0;
numtyp tuv110_1 = (numtyp)0.0;
numtyp tuv101_1 = (numtyp)0.0;
numtyp tuv011_1 = (numtyp)0.0;
numtyp tuv100_2 = (numtyp)0.0;
numtyp tuv010_2 = (numtyp)0.0;
numtyp tuv001_2 = (numtyp)0.0;
numtyp tuv200_2 = (numtyp)0.0;
numtyp tuv020_2 = (numtyp)0.0;
numtyp tuv002_2 = (numtyp)0.0;
numtyp tuv110_2 = (numtyp)0.0;
numtyp tuv101_2 = (numtyp)0.0;
numtyp tuv011_2 = (numtyp)0.0;
numtyp tuv000 = (numtyp)0.0;
numtyp tuv001 = (numtyp)0.0;
numtyp tuv010 = (numtyp)0.0;
numtyp tuv100 = (numtyp)0.0;
numtyp tuv200 = (numtyp)0.0;
numtyp tuv020 = (numtyp)0.0;
numtyp tuv002 = (numtyp)0.0;
numtyp tuv110 = (numtyp)0.0;
numtyp tuv101 = (numtyp)0.0;
numtyp tuv011 = (numtyp)0.0;
numtyp tuv300 = (numtyp)0.0;
numtyp tuv030 = (numtyp)0.0;
numtyp tuv003 = (numtyp)0.0;
numtyp tuv210 = (numtyp)0.0;
numtyp tuv201 = (numtyp)0.0;
numtyp tuv120 = (numtyp)0.0;
numtyp tuv021 = (numtyp)0.0;
numtyp tuv102 = (numtyp)0.0;
numtyp tuv012 = (numtyp)0.0;
numtyp tuv111 = (numtyp)0.0;
k = igrid[4*ii+2] - nzlo_out - nlpts;
for (int kb = 0; kb < bsorder; kb++) {
@ -1722,32 +1699,32 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
v3 = thetai3[m][kb][3];
*/
int i3 = ii*4*bsorder + 4*kb;
v0 = thetai3[i3];
v1 = thetai3[i3+1];
v2 = thetai3[i3+2];
v3 = thetai3[i3+3];
tu00_1 = (numtyp)0.0;
tu01_1 = (numtyp)0.0;
tu10_1 = (numtyp)0.0;
tu20_1 = (numtyp)0.0;
tu11_1 = (numtyp)0.0;
tu02_1 = (numtyp)0.0;
tu00_2 = (numtyp)0.0;
tu01_2 = (numtyp)0.0;
tu10_2 = (numtyp)0.0;
tu20_2 = (numtyp)0.0;
tu11_2 = (numtyp)0.0;
tu02_2 = (numtyp)0.0;
tu00 = (numtyp)0.0;
tu10 = (numtyp)0.0;
tu01 = (numtyp)0.0;
tu20 = (numtyp)0.0;
tu11 = (numtyp)0.0;
tu02 = (numtyp)0.0;
tu30 = (numtyp)0.0;
tu21 = (numtyp)0.0;
tu12 = (numtyp)0.0;
tu03 = (numtyp)0.0;
numtyp v0 = thetai3[i3];
numtyp v1 = thetai3[i3+1];
numtyp v2 = thetai3[i3+2];
numtyp v3 = thetai3[i3+3];
numtyp tu00_1 = (numtyp)0.0;
numtyp tu01_1 = (numtyp)0.0;
numtyp tu10_1 = (numtyp)0.0;
numtyp tu20_1 = (numtyp)0.0;
numtyp tu11_1 = (numtyp)0.0;
numtyp tu02_1 = (numtyp)0.0;
numtyp tu00_2 = (numtyp)0.0;
numtyp tu01_2 = (numtyp)0.0;
numtyp tu10_2 = (numtyp)0.0;
numtyp tu20_2 = (numtyp)0.0;
numtyp tu11_2 = (numtyp)0.0;
numtyp tu02_2 = (numtyp)0.0;
numtyp tu00 = (numtyp)0.0;
numtyp tu10 = (numtyp)0.0;
numtyp tu01 = (numtyp)0.0;
numtyp tu20 = (numtyp)0.0;
numtyp tu11 = (numtyp)0.0;
numtyp tu02 = (numtyp)0.0;
numtyp tu30 = (numtyp)0.0;
numtyp tu21 = (numtyp)0.0;
numtyp tu12 = (numtyp)0.0;
numtyp tu03 = (numtyp)0.0;
j = igrid[4*ii+1] - nylo_out - nlpts;
for (int jb = 0; jb < bsorder; jb++) {
@ -1758,17 +1735,17 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
u3 = thetai2[m][jb][3];
*/
int i2 = ii*4*bsorder+4*jb;
u0 = thetai2[i2];
u1 = thetai2[i2+1];
u2 = thetai2[i2+2];
u3 = thetai2[i2+3];
t0_1 = (numtyp)0.0;
t1_1 = (numtyp)0.0;
t2_1 = (numtyp)0.0;
t0_2 = (numtyp)0.0;
t1_2 = (numtyp)0.0;
t2_2 = (numtyp)0.0;
t3 = (numtyp)0.0;
numtyp u0 = thetai2[i2];
numtyp u1 = thetai2[i2+1];
numtyp u2 = thetai2[i2+2];
numtyp u3 = thetai2[i2+3];
numtyp t0_1 = (numtyp)0.0;
numtyp t1_1 = (numtyp)0.0;
numtyp t2_1 = (numtyp)0.0;
numtyp t0_2 = (numtyp)0.0;
numtyp t1_2 = (numtyp)0.0;
numtyp t2_2 = (numtyp)0.0;
numtyp t3 = (numtyp)0.0;
int i = igrid[4*ii] - nxlo_out - nlpts;
for (int ib = 0; ib < bsorder; ib++) {
@ -1789,8 +1766,8 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
numtyp w2 = thetai1[i1+2];
numtyp w3 = thetai1[i1+3];
int gidx = 2*(k*ngridxy + j*ngridx + i);
tq_1 = grid[gidx];
tq_2 = grid[gidx+1];
numtyp tq_1 = grid[gidx];
numtyp tq_2 = grid[gidx+1];
t0_1 += tq_1*w0;
t1_1 += tq_1*w1;
t2_1 += tq_1*w2;
@ -1813,9 +1790,9 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
tu20_2 += t2_2*u0;
tu11_2 += t1_2*u1;
tu02_2 += t0_2*u2;
t0 = t0_1 + t0_2;
t1 = t1_1 + t1_2;
t2 = t2_1 + t2_2;
numtyp t0 = t0_1 + t0_2;
numtyp t1 = t1_1 + t1_2;
numtyp t2 = t2_1 + t2_2;
tu00 += t0*u0;
tu10 += t1*u0;
tu01 += t0*u1;