diff --git a/lib/gpu/lal_amoeba.cu b/lib/gpu/lal_amoeba.cu index ab750aaadc..cc593e4263 100644 --- a/lib/gpu/lal_amoeba.cu +++ b/lib/gpu/lal_amoeba.cu @@ -1639,10 +1639,6 @@ __kernel void k_amoeba_fphi_uind(const __global numtyp4 *restrict thetai1, const int nxlo_out, const int ngridxy, const int ngridx) { - //int tid, ii, offset, i, n_stride; - //atom_info(t_per_atom,ii,tid,offset); - - int tid=THREAD_ID_X; int ii=tid+BLOCK_ID_X*BLOCK_SIZE_X; @@ -1763,23 +1759,17 @@ __kernel void k_amoeba_fphi_uind(const __global numtyp4 *restrict thetai1, */ const int i1 = istart + ib; const numtyp4 tha1 = thetai1[i1]; - /* - const numtyp w0 = tha1.x; - const numtyp w1 = tha1.y; - const numtyp w2 = tha1.z; - const numtyp w3 = tha1.w; - */ const int gidx = my + i; // k*ngridxy + j*ngridx + i; const numtyp2 tq = grid[gidx]; const numtyp tq_1 = tq.x; //grid[gidx]; const numtyp tq_2 = tq.y; //grid[gidx+1]; - t0_1 += tq_1*tha1.x; // w0 - t1_1 += tq_1*tha1.y; // w1 - t2_1 += tq_1*tha1.z; // w2 - t0_2 += tq_2*tha1.x; // w0 - t1_2 += tq_2*tha1.y; // w1 - t2_2 += tq_2*tha1.z; // w2 - t3 += (tq_1+tq_2)*tha1.w; // w3 + t0_1 += tq_1*tha1.x; + t1_1 += tq_1*tha1.y; + t2_1 += tq_1*tha1.z; + t0_2 += tq_2*tha1.x; + t1_2 += tq_2*tha1.y; + t2_2 += tq_2*tha1.z; + t3 += (tq_1+tq_2)*tha1.w; i++; } diff --git a/src/GPU/pair_amoeba_gpu.cpp b/src/GPU/pair_amoeba_gpu.cpp index 6b977cb638..fa0670a757 100644 --- a/src/GPU/pair_amoeba_gpu.cpp +++ b/src/GPU/pair_amoeba_gpu.cpp @@ -1078,6 +1078,7 @@ void PairAmoebaGPU::umutual1(double **field, double **fieldp) double ****gridpost = (double ****) ic_kspace->post_convolution(); // get potential + time0 = MPI_Wtime(); fphi_uind(gridpost,fdip_phi1,fdip_phi2,fdip_sum_phi); diff --git a/src/GPU/pair_hippo_gpu.cpp b/src/GPU/pair_hippo_gpu.cpp index b874c656c3..49a83e75be 100644 --- a/src/GPU/pair_hippo_gpu.cpp +++ b/src/GPU/pair_hippo_gpu.cpp @@ -1170,22 +1170,24 @@ void PairHippoGPU::umutual1(double **field, double **fieldp) fuinp[i][1] = a[1][0]*uinp[i][0] + a[1][1]*uinp[i][1] + a[1][2]*uinp[i][2]; fuinp[i][2] = a[2][0]*uinp[i][0] + a[2][1]*uinp[i][1] + a[2][2]*uinp[i][2]; } -/* - for (i = 0; i < nlocal; i++) { - for (j = 0; j < 3; j++) { - fuind[i][j] = a[j][0]*uind[i][0] + a[j][1]*uind[i][1] + a[j][2]*uind[i][2]; - fuinp[i][j] = a[j][0]*uinp[i][0] + a[j][1]*uinp[i][1] + a[j][2]*uinp[i][2]; - } - } -*/ + + double time0, time1; + // gridpre = my portion of 4d grid in brick decomp w/ ghost values double ****gridpre = (double ****) ic_kspace->zero(); // map 2 values to grid + + MPI_Barrier(world); + time0 = MPI_Wtime(); + grid_uind(fuind,fuinp,gridpre); + time1 = MPI_Wtime(); + time_grid_uind += (time1 - time0); + // pre-convolution operations including forward FFT // gridfft = my portion of complex 3d grid in FFT decomposition @@ -1222,9 +1224,7 @@ void PairHippoGPU::umutual1(double **field, double **fieldp) double ****gridpost = (double ****) ic_kspace->post_convolution(); // get potential - double time0, time1; - MPI_Barrier(world); time0 = MPI_Wtime(); fphi_uind(gridpost,fdip_phi1,fdip_phi2,fdip_sum_phi);