diff --git a/lib/gpu/lal_lj_tip4p_long.cu b/lib/gpu/lal_lj_tip4p_long.cu index 66c9a6071e..782ae43662 100644 --- a/lib/gpu/lal_lj_tip4p_long.cu +++ b/lib/gpu/lal_lj_tip4p_long.cu @@ -110,10 +110,8 @@ __kernel void k_lj_tip4p_long_distrib(const __global numtyp4 *restrict x_, const int typeO, const int typeH, const numtyp alpha, const __global numtyp *restrict q_, const __global acctyp4 *restrict ansO) { - int tid, ii, offset; - atom_info(t_per_atom,ii,tid,offset); - int i = BLOCK_ID_X*(BLOCK_SIZE_X)+tid; + int i = BLOCK_ID_X*(BLOCK_SIZE_X)+THREAD_ID_X; acctyp4 f; f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; @@ -122,6 +120,8 @@ __kernel void k_lj_tip4p_long_distrib(const __global numtyp4 *restrict x_, int itype = ix.w; acctyp4 fM, vM; acctyp eM; + // placement of the virial in engv depends on eflag value + int engv_iter = eflag ? 2 : 0; if (itype == typeH) { int iO = hneigh[i*4]; if (iO < inum) { @@ -131,13 +131,13 @@ __kernel void k_lj_tip4p_long_distrib(const __global numtyp4 *restrict x_, f.z += fM.z * (acctyp)0.5 * alpha; if (vflag > 0) { vM = ansO[inum +iO]; - engv[inum*2 + i] += vM.x * (acctyp)0.5 * alpha; - engv[inum*3 + i] += vM.y * (acctyp)0.5 * alpha; - engv[inum*4 + i] += vM.z * (acctyp)0.5 * alpha; + engv[inum*engv_iter + i] += vM.x * (acctyp)0.5 * alpha; engv_iter++; + engv[inum*engv_iter + i] += vM.y * (acctyp)0.5 * alpha; engv_iter++; + engv[inum*engv_iter + i] += vM.z * (acctyp)0.5 * alpha; engv_iter++; vM = ansO[inum*2+iO]; - engv[inum*5 + i] += vM.x * (acctyp)0.5 * alpha; - engv[inum*6 + i] += vM.y * (acctyp)0.5 * alpha; - engv[inum*7 + i] += vM.z * (acctyp)0.5 * alpha; + engv[inum*engv_iter + i] += vM.x * (acctyp)0.5 * alpha; engv_iter++; + engv[inum*engv_iter + i] += vM.y * (acctyp)0.5 * alpha; engv_iter++; + engv[inum*engv_iter + i] += vM.z * (acctyp)0.5 * alpha; } } } else { @@ -155,13 +155,13 @@ __kernel void k_lj_tip4p_long_distrib(const __global numtyp4 *restrict x_, } if (vflag > 0) { vM = ansO[inum + i]; - engv[inum*2 + i] += vM.x * (acctyp)(1 - alpha); - engv[inum*3 + i] += vM.y * (acctyp)(1 - alpha); - engv[inum*4 + i] += vM.z * (acctyp)(1 - alpha); + engv[inum*engv_iter + i] += vM.x * (acctyp)(1 - alpha); engv_iter++; + engv[inum*engv_iter + i] += vM.y * (acctyp)(1 - alpha); engv_iter++; + engv[inum*engv_iter + i] += vM.z * (acctyp)(1 - alpha); engv_iter++; vM = ansO[inum*2 + i]; - engv[inum*5 + i] += vM.x * (acctyp)(1 - alpha); - engv[inum*6 + i] += vM.y * (acctyp)(1 - alpha); - engv[inum*7 + i] += vM.z * (acctyp)(1 - alpha); + engv[inum*engv_iter + i] += vM.x * (acctyp)(1 - alpha); engv_iter++; + engv[inum*engv_iter + i] += vM.y * (acctyp)(1 - alpha); engv_iter++; + engv[inum*engv_iter + i] += vM.z * (acctyp)(1 - alpha); } } acctyp4 old=ans[i]; @@ -182,9 +182,8 @@ __kernel void k_lj_tip4p_reneigh(const __global numtyp4 *restrict x_, const int typeO, const int typeH, const __global tagint *restrict tag, const __global int *restrict map, const __global int *restrict sametag) { - int tid, ii, offset; - atom_info(t_per_atom,ii,tid,offset); - int i = BLOCK_ID_X*(BLOCK_SIZE_X)+tid; + + int i = BLOCK_ID_X*(BLOCK_SIZE_X)+THREAD_ID_X; if (inlocal + atom->nghost; int inum, host_start;