diff --git a/lib/gpu/lal_tersoff.cu b/lib/gpu/lal_tersoff.cu index 836f05660d..2e29ca721b 100644 --- a/lib/gpu/lal_tersoff.cu +++ b/lib/gpu/lal_tersoff.cu @@ -308,8 +308,6 @@ __kernel void k_tersoff_zeta(const __global numtyp4 *restrict x_, delr1.z = jx.z-ix.z; numtyp rsq1 = delr1.x*delr1.x+delr1.y*delr1.y+delr1.z*delr1.z; -// if (rsq1 > cutsq[ijparam]) continue; - // compute zeta_ij z = (acctyp)0; @@ -355,13 +353,9 @@ __kernel void k_tersoff_zeta(const __global numtyp4 *restrict x_, rsq1, rsq2, delr1, delr2); } - //int jj = (nbor_j-offset_j-2*nbor_pitch)/n_stride; - //int idx = jj*n_stride + i*t_per_atom + offset_j; - //idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor + // idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor int idx = nbor_j; if (dev_packed==dev_nbor) idx -= n_stride; -// zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, -// i, nbor_j, offset_j, idx); acc_zeta(z, tid, t_per_atom, offset_k); numtyp4 ts1_ijparam = ts1[ijparam]; //fetch4(ts1_ijparam,ijparam,ts1_tex); @@ -585,14 +579,9 @@ __kernel void k_tersoff_three_center(const __global numtyp4 *restrict x_, numtyp r1inv = ucl_rsqrt(rsq1); // look up for zeta_ij - - //int jj = (nbor_j-offset_j-2*nbor_pitch) / n_stride; - //int idx = jj*n_stride + i*t_per_atom + offset_j; - //idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor + // idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor int idx = nbor_j; if (dev_packed==dev_nbor) idx -= n_stride; -// zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, -// i, nbor_j, offset_j, idx); acctyp4 zeta_ij = zetaij[idx]; // fetch(zeta_ij,idx,zeta_tex); numtyp force = zeta_ij.x*tpainv; numtyp prefactor = zeta_ij.y; @@ -823,13 +812,9 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_, offset_kf = red_acc[2*m+1]; } - //int iix = (ijnum - offset_kf - 2*nbor_pitch) / n_stride; - //int idx = iix*n_stride + j*t_per_atom + offset_kf; - //idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor + // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor int idx = ijnum; if (dev_packed==dev_nbor) idx -= n_stride; -// zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, -// j, ijnum, offset_kf, idx); acctyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex); numtyp force = zeta_ji.x*tpainv; numtyp prefactor_ji = zeta_ji.y; @@ -891,13 +876,10 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_, f.y += fi[1]; f.z += fi[2]; - //int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride; - //int idx = kk*n_stride + j*t_per_atom + offset_k; - //idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor + // idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor int idx = nbor_k; if (dev_packed==dev_nbor) idx -= n_stride; -// zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, -// j, nbor_k, offset_k, idx); + acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex); numtyp prefactor_jk = zeta_jk.y; int jkiparam=elem2param[jtype*nelements*nelements+ktype*nelements+itype]; @@ -1068,13 +1050,9 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_, offset_kf = red_acc[2*m+1]; } - //int iix = (ijnum - offset_kf - 2*nbor_pitch) / n_stride; - //int idx = iix*n_stride + j*t_per_atom + offset_kf; - //idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor + // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor int idx = ijnum; if (dev_packed==dev_nbor) idx -= n_stride; -// zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, -// j, ijnum, offset_kf, idx); acctyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex); numtyp force = zeta_ji.x*tpainv; numtyp prefactor_ji = zeta_ji.y; @@ -1143,13 +1121,9 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_, virial[4] += TWOTHIRD*(mdelr1[0]*fj[2] + delr2[0]*fk[2]); virial[5] += TWOTHIRD*(mdelr1[1]*fj[2] + delr2[1]*fk[2]); - //int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride; - //int idx = kk*n_stride + j*t_per_atom + offset_k; - //idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor + // idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor int idx = nbor_k; if (dev_packed==dev_nbor) idx -= n_stride; -// zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, -// j, nbor_k, offset_k, idx); acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex); numtyp prefactor_jk = zeta_jk.y; diff --git a/lib/gpu/lal_tersoff_mod.cu b/lib/gpu/lal_tersoff_mod.cu index dfb94c4145..c85f5e08ca 100644 --- a/lib/gpu/lal_tersoff_mod.cu +++ b/lib/gpu/lal_tersoff_mod.cu @@ -356,13 +356,9 @@ __kernel void k_tersoff_mod_zeta(const __global numtyp4 *restrict x_, ijkparam_c5, rsq1, rsq2, delr1, delr2); } - //int jj = (nbor_j-offset_j-2*nbor_pitch)/n_stride; - //int idx = jj*n_stride + i*t_per_atom + offset_j; - //idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor + // idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor int idx = nbor_j; if (dev_packed==dev_nbor) idx -= n_stride; -// zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, -// i, nbor_j, offset_j, idx); acc_zeta(z, tid, t_per_atom, offset_k); numtyp4 ts1_ijparam = ts1[ijparam]; //fetch4(ts1_ijparam,ijparam,ts1_tex); @@ -587,14 +583,9 @@ __kernel void k_tersoff_mod_three_center(const __global numtyp4 *restrict x_, numtyp r1inv = ucl_rsqrt(rsq1); // look up for zeta_ij - - //int jj = (nbor_j-offset_j-2*nbor_pitch) / n_stride; - //int idx = jj*n_stride + i*t_per_atom + offset_j; - //idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor + // idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor int idx = nbor_j; if (dev_packed==dev_nbor) idx -= n_stride; -// zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, -// i, nbor_j, offset_j, idx); acctyp4 zeta_ij = zetaij[idx]; // fetch(zeta_ij,idx,zeta_tex); numtyp force = zeta_ij.x*tpainv; numtyp prefactor = zeta_ij.y; @@ -831,13 +822,9 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_, offset_kf = red_acc[2*m+1]; } - //int iix = (ijnum - offset_kf - 2*nbor_pitch) / n_stride; - //int idx = iix*n_stride + j*t_per_atom + offset_kf; - //idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor + // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor int idx = ijnum; if (dev_packed==dev_nbor) idx -= n_stride; -// zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, -// j, ijnum, offset_kf, idx); acctyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex); numtyp force = zeta_ji.x*tpainv; numtyp prefactor_ji = zeta_ji.y; @@ -902,13 +889,9 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_, f.y += fi[1]; f.z += fi[2]; - //int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride; - //int idx = kk*n_stride + j*t_per_atom + offset_k; - //idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor + // idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor int idx = nbor_k; if (dev_packed==dev_nbor) idx -= n_stride; -// zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, -// j, nbor_k, offset_k, idx); acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex); numtyp prefactor_jk = zeta_jk.y; int jkiparam=elem2param[jtype*nelements*nelements+ktype*nelements+itype]; @@ -1085,13 +1068,9 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_, offset_kf = red_acc[2*m+1]; } - //int iix = (ijnum - offset_kf - 2*nbor_pitch) / n_stride; - //int idx = iix*n_stride + j*t_per_atom + offset_kf; - //idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor + // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor int idx = ijnum; if (dev_packed==dev_nbor) idx -= n_stride; -// zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, -// j, ijnum, offset_kf, idx); acctyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex); numtyp force = zeta_ji.x*tpainv; numtyp prefactor_ji = zeta_ji.y; @@ -1163,13 +1142,9 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_, virial[4] += TWOTHIRD*(mdelr1[0]*fj[2] + delr2[0]*fk[2]); virial[5] += TWOTHIRD*(mdelr1[1]*fj[2] + delr2[1]*fk[2]); - //int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride; - //int idx = kk*n_stride + j*t_per_atom + offset_k; - //idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor + // idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor int idx = nbor_k; if (dev_packed==dev_nbor) idx -= n_stride; -// zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, -// j, nbor_k, offset_k, idx); acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex); numtyp prefactor_jk = zeta_jk.y; diff --git a/lib/gpu/lal_tersoff_zbl.cu b/lib/gpu/lal_tersoff_zbl.cu index 73ff51c704..b574a529c0 100644 --- a/lib/gpu/lal_tersoff_zbl.cu +++ b/lib/gpu/lal_tersoff_zbl.cu @@ -359,13 +359,9 @@ __kernel void k_tersoff_zbl_zeta(const __global numtyp4 *restrict x_, rsq1, rsq2, delr1, delr2); } - //int jj = (nbor_j-offset_j-2*nbor_pitch)/n_stride; - //int idx = jj*n_stride + i*t_per_atom + offset_j; - //idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor + // idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor int idx = nbor_j; if (dev_packed==dev_nbor) idx -= n_stride; -// zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, -// i, nbor_j, offset_j, idx); acc_zeta(z, tid, t_per_atom, offset_k); numtyp4 ts1_ijparam = ts1[ijparam]; //fetch4(ts1_ijparam,ijparam,ts1_tex); @@ -603,14 +599,9 @@ __kernel void k_tersoff_zbl_three_center(const __global numtyp4 *restrict x_, numtyp r1inv = ucl_rsqrt(rsq1); // look up for zeta_ij - - //int jj = (nbor_j-offset_j-2*nbor_pitch) / n_stride; - //int idx = jj*n_stride + i*t_per_atom + offset_j; - //idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor + // idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor int idx = nbor_j; if (dev_packed==dev_nbor) idx -= n_stride; -// zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, -// i, nbor_j, offset_j, idx); acctyp4 zeta_ij = zetaij[idx]; // fetch(zeta_ij,idx,zeta_tex); numtyp force = zeta_ij.x*tpainv; numtyp prefactor = zeta_ij.y; @@ -841,13 +832,9 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_, offset_kf = red_acc[2*m+1]; } - //int iix = (ijnum - offset_kf - 2*nbor_pitch) / n_stride; - //int idx = iix*n_stride + j*t_per_atom + offset_kf; - //idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor + // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor int idx = ijnum; if (dev_packed==dev_nbor) idx -= n_stride; -// zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, -// j, ijnum, offset_kf, idx); acctyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex); numtyp force = zeta_ji.x*tpainv; numtyp prefactor_ji = zeta_ji.y; @@ -909,13 +896,9 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_, f.y += fi[1]; f.z += fi[2]; - //int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride; - //int idx = kk*n_stride + j*t_per_atom + offset_k; - //idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor + // idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor int idx = nbor_k; if (dev_packed==dev_nbor) idx -= n_stride; -// zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, -// j, nbor_k, offset_k, idx); acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex); numtyp prefactor_jk = zeta_jk.y; int jkiparam=elem2param[jtype*nelements*nelements+ktype*nelements+itype]; @@ -1086,13 +1069,9 @@ __kernel void k_tersoff_zbl_three_end_vatom(const __global numtyp4 *restrict x_, offset_kf = red_acc[2*m+1]; } - //int iix = (ijnum - offset_kf - 2*nbor_pitch) / n_stride; - //int idx = iix*n_stride + j*t_per_atom + offset_kf; - //idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor + // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor int idx = ijnum; if (dev_packed==dev_nbor) idx -= n_stride; -// zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, -// j, ijnum, offset_kf, idx); acctyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex); numtyp force = zeta_ji.x*tpainv; numtyp prefactor_ji = zeta_ji.y; @@ -1161,13 +1140,9 @@ __kernel void k_tersoff_zbl_three_end_vatom(const __global numtyp4 *restrict x_, virial[4] += TWOTHIRD*(mdelr1[0]*fj[2] + delr2[0]*fk[2]); virial[5] += TWOTHIRD*(mdelr1[1]*fj[2] + delr2[1]*fk[2]); - //int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride; - //int idx = kk*n_stride + j*t_per_atom + offset_k; - //idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor + // idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor int idx = nbor_k; if (dev_packed==dev_nbor) idx -= n_stride; -// zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, -// j, nbor_k, offset_k, idx); acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex); numtyp prefactor_jk = zeta_jk.y; diff --git a/lib/gpu/lal_yukawa_colloid.cu b/lib/gpu/lal_yukawa_colloid.cu index 48ab47bc94..a3cbbbc11c 100644 --- a/lib/gpu/lal_yukawa_colloid.cu +++ b/lib/gpu/lal_yukawa_colloid.cu @@ -89,10 +89,10 @@ __kernel void k_yukawa_colloid(const __global numtyp4 *restrict x_, if (rsqcut_innersq) { - t = r - cut_inner; - force = t*t * (coeff1[mtype].x + coeff1[mtype].y*t); - } - + if (rsq>cut_innersq) { + t = r - cut_inner; + force = t*t * (coeff1[mtype].x + coeff1[mtype].y*t); + } force *= (numtyp)-1.0*ucl_recip(r); f.x+=delx*force; @@ -148,11 +145,10 @@ __kernel void k_zbl(const __global numtyp4 *restrict x_, if (eflag>0) { numtyp e=e_zbl(r, coeff2[mtype].x, coeff2[mtype].y, coeff2[mtype].z, coeff2[mtype].w, coeff1[mtype].z); - e += coeff3[mtype].z; - if (rsq > cut_innersq) { - e += t*t*t * (coeff3[mtype].x + coeff3[mtype].y*t); - } - + e += coeff3[mtype].z; + if (rsq > cut_innersq) { + e += t*t*t * (coeff3[mtype].x + coeff3[mtype].y*t); + } energy+=e; } if (vflag>0) { @@ -232,15 +228,13 @@ __kernel void k_zbl_fast(const __global numtyp4 *restrict x_, if (rsqcut_innersq) { - t = r - cut_inner; - force += t*t * (coeff1[mtype].x + coeff1[mtype].y*t); - } + if (rsq>cut_innersq) { + t = r - cut_inner; + force += t*t * (coeff1[mtype].x + coeff1[mtype].y*t); + } force *= (numtyp)-1.0*ucl_recip(r); @@ -251,11 +245,10 @@ __kernel void k_zbl_fast(const __global numtyp4 *restrict x_, if (eflag>0) { numtyp e=e_zbl(r, coeff2[mtype].x, coeff2[mtype].y, coeff2[mtype].z, coeff2[mtype].w, coeff1[mtype].z); - e += coeff3[mtype].z; - if (rsq > cut_innersq) { - e += t*t*t * (coeff3[mtype].x + coeff3[mtype].y*t); - } - + e += coeff3[mtype].z; + if (rsq > cut_innersq) { + e += t*t*t * (coeff3[mtype].x + coeff3[mtype].y*t); + } energy+=e; } if (vflag>0) {