diff --git a/lib/gpu/lal_tersoff.cu b/lib/gpu/lal_tersoff.cu
index 836f05660d..2e29ca721b 100644
--- a/lib/gpu/lal_tersoff.cu
+++ b/lib/gpu/lal_tersoff.cu
@@ -308,8 +308,6 @@ __kernel void k_tersoff_zeta(const __global numtyp4 *restrict x_,
       delr1.z = jx.z-ix.z;
       numtyp rsq1 = delr1.x*delr1.x+delr1.y*delr1.y+delr1.z*delr1.z;
 
-//      if (rsq1 > cutsq[ijparam]) continue;
-
       // compute zeta_ij
       z = (acctyp)0;
 
@@ -355,13 +353,9 @@ __kernel void k_tersoff_zeta(const __global numtyp4 *restrict x_,
                   rsq1, rsq2, delr1, delr2);
       }
 
-      //int jj = (nbor_j-offset_j-2*nbor_pitch)/n_stride;
-      //int idx = jj*n_stride + i*t_per_atom + offset_j;
-      //idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor
+      // idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor
       int idx = nbor_j;
       if (dev_packed==dev_nbor) idx -= n_stride;
-//      zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
-//               i, nbor_j, offset_j, idx);
       acc_zeta(z, tid, t_per_atom, offset_k);
 
       numtyp4 ts1_ijparam = ts1[ijparam]; //fetch4(ts1_ijparam,ijparam,ts1_tex);
@@ -585,14 +579,9 @@ __kernel void k_tersoff_three_center(const __global numtyp4 *restrict x_,
       numtyp r1inv = ucl_rsqrt(rsq1);
 
       // look up for zeta_ij
-
-      //int jj = (nbor_j-offset_j-2*nbor_pitch) / n_stride;
-      //int idx = jj*n_stride + i*t_per_atom + offset_j;
-      //idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor
+      // idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor
       int idx = nbor_j;
       if (dev_packed==dev_nbor) idx -= n_stride;
-//      zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
-//               i, nbor_j, offset_j, idx);
       acctyp4 zeta_ij = zetaij[idx]; // fetch(zeta_ij,idx,zeta_tex);
       numtyp force = zeta_ij.x*tpainv;
       numtyp prefactor = zeta_ij.y;
@@ -823,13 +812,9 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
         offset_kf = red_acc[2*m+1];
       }
 
-      //int iix = (ijnum - offset_kf - 2*nbor_pitch) / n_stride;
-      //int idx = iix*n_stride + j*t_per_atom + offset_kf;
-      //idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
+      // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
       int idx = ijnum;
       if (dev_packed==dev_nbor) idx -= n_stride;
-//      zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
-//               j, ijnum, offset_kf, idx);
       acctyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex);
       numtyp force = zeta_ji.x*tpainv;
       numtyp prefactor_ji = zeta_ji.y;
@@ -891,13 +876,10 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
         f.y += fi[1];
         f.z += fi[2];
 
-        //int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride;
-        //int idx = kk*n_stride + j*t_per_atom + offset_k;
-        //idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor
+        // idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor
         int idx = nbor_k;
         if (dev_packed==dev_nbor) idx -= n_stride;
-//        zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
-//                 j, nbor_k, offset_k, idx);
+
         acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
         numtyp prefactor_jk = zeta_jk.y;
         int jkiparam=elem2param[jtype*nelements*nelements+ktype*nelements+itype];
@@ -1068,13 +1050,9 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_,
         offset_kf = red_acc[2*m+1];
       }
 
-      //int iix = (ijnum - offset_kf - 2*nbor_pitch) / n_stride;
-      //int idx = iix*n_stride + j*t_per_atom + offset_kf;
-      //idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
+      // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
       int idx = ijnum;
       if (dev_packed==dev_nbor) idx -= n_stride;
-//      zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
-//               j, ijnum, offset_kf, idx);
       acctyp4 zeta_ji = zetaij[idx]; //  fetch(zeta_ji,idx,zeta_tex);
       numtyp force = zeta_ji.x*tpainv;
       numtyp prefactor_ji = zeta_ji.y;
@@ -1143,13 +1121,9 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_,
         virial[4] += TWOTHIRD*(mdelr1[0]*fj[2] + delr2[0]*fk[2]);
         virial[5] += TWOTHIRD*(mdelr1[1]*fj[2] + delr2[1]*fk[2]);
 
-        //int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride;
-        //int idx = kk*n_stride + j*t_per_atom + offset_k;
-        //idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor
+        // idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor
         int idx = nbor_k;
         if (dev_packed==dev_nbor) idx -= n_stride;
-//        zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
-//                 j, nbor_k, offset_k, idx);
         acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
         numtyp prefactor_jk = zeta_jk.y;
 
diff --git a/lib/gpu/lal_tersoff_mod.cu b/lib/gpu/lal_tersoff_mod.cu
index dfb94c4145..c85f5e08ca 100644
--- a/lib/gpu/lal_tersoff_mod.cu
+++ b/lib/gpu/lal_tersoff_mod.cu
@@ -356,13 +356,9 @@ __kernel void k_tersoff_mod_zeta(const __global numtyp4 *restrict x_,
                   ijkparam_c5, rsq1, rsq2, delr1, delr2);
       }
 
-      //int jj = (nbor_j-offset_j-2*nbor_pitch)/n_stride;
-      //int idx = jj*n_stride + i*t_per_atom + offset_j;
-      //idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor
+      // idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor
       int idx = nbor_j;
       if (dev_packed==dev_nbor) idx -= n_stride;
-//      zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
-//               i, nbor_j, offset_j, idx);
       acc_zeta(z, tid, t_per_atom, offset_k);
 
       numtyp4 ts1_ijparam = ts1[ijparam]; //fetch4(ts1_ijparam,ijparam,ts1_tex);
@@ -587,14 +583,9 @@ __kernel void k_tersoff_mod_three_center(const __global numtyp4 *restrict x_,
       numtyp r1inv = ucl_rsqrt(rsq1);
 
       // look up for zeta_ij
-
-      //int jj = (nbor_j-offset_j-2*nbor_pitch) / n_stride;
-      //int idx = jj*n_stride + i*t_per_atom + offset_j;
-      //idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor
+      // idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor
       int idx = nbor_j;
       if (dev_packed==dev_nbor) idx -= n_stride;
-//      zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
-//               i, nbor_j, offset_j, idx);
       acctyp4 zeta_ij = zetaij[idx]; // fetch(zeta_ij,idx,zeta_tex);
       numtyp force = zeta_ij.x*tpainv;
       numtyp prefactor = zeta_ij.y;
@@ -831,13 +822,9 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
         offset_kf = red_acc[2*m+1];
       }
 
-      //int iix = (ijnum - offset_kf - 2*nbor_pitch) / n_stride;
-      //int idx = iix*n_stride + j*t_per_atom + offset_kf;
-      //idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
+      // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
       int idx = ijnum;
       if (dev_packed==dev_nbor) idx -= n_stride;
-//      zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
-//               j, ijnum, offset_kf, idx);
       acctyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex);
       numtyp force = zeta_ji.x*tpainv;
       numtyp prefactor_ji = zeta_ji.y;
@@ -902,13 +889,9 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
         f.y += fi[1];
         f.z += fi[2];
 
-        //int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride;
-        //int idx = kk*n_stride + j*t_per_atom + offset_k;
-        //idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor
+        // idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor
         int idx = nbor_k;
         if (dev_packed==dev_nbor) idx -= n_stride;
-//        zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
-//                 j, nbor_k, offset_k, idx);
         acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
         numtyp prefactor_jk = zeta_jk.y;
         int jkiparam=elem2param[jtype*nelements*nelements+ktype*nelements+itype];
@@ -1085,13 +1068,9 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_,
         offset_kf = red_acc[2*m+1];
       }
 
-      //int iix = (ijnum - offset_kf - 2*nbor_pitch) / n_stride;
-      //int idx = iix*n_stride + j*t_per_atom + offset_kf;
-      //idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
+      // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
       int idx = ijnum;
       if (dev_packed==dev_nbor) idx -= n_stride;
-//      zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
-//               j, ijnum, offset_kf, idx);
       acctyp4 zeta_ji = zetaij[idx]; //  fetch(zeta_ji,idx,zeta_tex);
       numtyp force = zeta_ji.x*tpainv;
       numtyp prefactor_ji = zeta_ji.y;
@@ -1163,13 +1142,9 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_,
         virial[4] += TWOTHIRD*(mdelr1[0]*fj[2] + delr2[0]*fk[2]);
         virial[5] += TWOTHIRD*(mdelr1[1]*fj[2] + delr2[1]*fk[2]);
 
-        //int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride;
-        //int idx = kk*n_stride + j*t_per_atom + offset_k;
-        //idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor
+        // idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor
         int idx = nbor_k;
         if (dev_packed==dev_nbor) idx -= n_stride;
-//        zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
-//                 j, nbor_k, offset_k, idx);
         acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
         numtyp prefactor_jk = zeta_jk.y;
 
diff --git a/lib/gpu/lal_tersoff_zbl.cu b/lib/gpu/lal_tersoff_zbl.cu
index 73ff51c704..b574a529c0 100644
--- a/lib/gpu/lal_tersoff_zbl.cu
+++ b/lib/gpu/lal_tersoff_zbl.cu
@@ -359,13 +359,9 @@ __kernel void k_tersoff_zbl_zeta(const __global numtyp4 *restrict x_,
                   rsq1, rsq2, delr1, delr2);
       }
 
-      //int jj = (nbor_j-offset_j-2*nbor_pitch)/n_stride;
-      //int idx = jj*n_stride + i*t_per_atom + offset_j;
-      //idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor
+      // idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor
       int idx = nbor_j;
       if (dev_packed==dev_nbor) idx -= n_stride;
-//      zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
-//               i, nbor_j, offset_j, idx);
       acc_zeta(z, tid, t_per_atom, offset_k);
 
       numtyp4 ts1_ijparam = ts1[ijparam]; //fetch4(ts1_ijparam,ijparam,ts1_tex);
@@ -603,14 +599,9 @@ __kernel void k_tersoff_zbl_three_center(const __global numtyp4 *restrict x_,
       numtyp r1inv = ucl_rsqrt(rsq1);
 
       // look up for zeta_ij
-
-      //int jj = (nbor_j-offset_j-2*nbor_pitch) / n_stride;
-      //int idx = jj*n_stride + i*t_per_atom + offset_j;
-      //idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor
+      // idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor
       int idx = nbor_j;
       if (dev_packed==dev_nbor) idx -= n_stride;
-//      zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
-//               i, nbor_j, offset_j, idx);
       acctyp4 zeta_ij = zetaij[idx]; // fetch(zeta_ij,idx,zeta_tex);
       numtyp force = zeta_ij.x*tpainv;
       numtyp prefactor = zeta_ij.y;
@@ -841,13 +832,9 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_,
         offset_kf = red_acc[2*m+1];
       }
 
-      //int iix = (ijnum - offset_kf - 2*nbor_pitch) / n_stride;
-      //int idx = iix*n_stride + j*t_per_atom + offset_kf;
-      //idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
+      // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
       int idx = ijnum;
       if (dev_packed==dev_nbor) idx -= n_stride;
-//      zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
-//               j, ijnum, offset_kf, idx);
       acctyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex);
       numtyp force = zeta_ji.x*tpainv;
       numtyp prefactor_ji = zeta_ji.y;
@@ -909,13 +896,9 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_,
         f.y += fi[1];
         f.z += fi[2];
 
-        //int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride;
-        //int idx = kk*n_stride + j*t_per_atom + offset_k;
-        //idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor
+        // idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor
         int idx = nbor_k;
         if (dev_packed==dev_nbor) idx -= n_stride;
-//        zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
-//                 j, nbor_k, offset_k, idx);
         acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
         numtyp prefactor_jk = zeta_jk.y;
         int jkiparam=elem2param[jtype*nelements*nelements+ktype*nelements+itype];
@@ -1086,13 +1069,9 @@ __kernel void k_tersoff_zbl_three_end_vatom(const __global numtyp4 *restrict x_,
         offset_kf = red_acc[2*m+1];
       }
 
-      //int iix = (ijnum - offset_kf - 2*nbor_pitch) / n_stride;
-      //int idx = iix*n_stride + j*t_per_atom + offset_kf;
-      //idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
+      // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
       int idx = ijnum;
       if (dev_packed==dev_nbor) idx -= n_stride;
-//      zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
-//               j, ijnum, offset_kf, idx);
       acctyp4 zeta_ji = zetaij[idx]; //  fetch(zeta_ji,idx,zeta_tex);
       numtyp force = zeta_ji.x*tpainv;
       numtyp prefactor_ji = zeta_ji.y;
@@ -1161,13 +1140,9 @@ __kernel void k_tersoff_zbl_three_end_vatom(const __global numtyp4 *restrict x_,
         virial[4] += TWOTHIRD*(mdelr1[0]*fj[2] + delr2[0]*fk[2]);
         virial[5] += TWOTHIRD*(mdelr1[1]*fj[2] + delr2[1]*fk[2]);
 
-        //int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride;
-        //int idx = kk*n_stride + j*t_per_atom + offset_k;
-        //idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor
+        // idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor
         int idx = nbor_k;
         if (dev_packed==dev_nbor) idx -= n_stride;
-//        zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
-//                 j, nbor_k, offset_k, idx);
         acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
         numtyp prefactor_jk = zeta_jk.y;
 
diff --git a/lib/gpu/lal_yukawa_colloid.cu b/lib/gpu/lal_yukawa_colloid.cu
index 48ab47bc94..a3cbbbc11c 100644
--- a/lib/gpu/lal_yukawa_colloid.cu
+++ b/lib/gpu/lal_yukawa_colloid.cu
@@ -89,10 +89,10 @@ __kernel void k_yukawa_colloid(const __global numtyp4 *restrict x_,
       if (rsq<coeff[mtype].z) {
         numtyp r = ucl_sqrt(rsq);
         numtyp rinv = ucl_recip(r);
-              numtyp screening = ucl_exp(-kappa*(r-(radi+radj)));
-              numtyp force = coeff[mtype].x * screening;
+        numtyp screening = ucl_exp(-kappa*(r-(radi+radj)));
+        numtyp force = coeff[mtype].x * screening;
 
-              force = factor_lj*force * rinv;
+        force = factor_lj*force * rinv;
 
         f.x+=delx*force;
         f.y+=dely*force;
@@ -181,10 +181,10 @@ __kernel void k_yukawa_colloid_fast(const __global numtyp4 *restrict x_,
       if (rsq<coeff[mtype].z) {
         numtyp r = ucl_sqrt(rsq);
         numtyp rinv = ucl_recip(r);
-              numtyp screening = ucl_exp(-kappa*(r-(radi+radj)));
-              numtyp force = coeff[mtype].x * screening;
+        numtyp screening = ucl_exp(-kappa*(r-(radi+radj)));
+        numtyp force = coeff[mtype].x * screening;
 
-              force = factor_lj*force * rinv;
+        force = factor_lj*force * rinv;
 
         f.x+=delx*force;
         f.y+=dely*force;
diff --git a/lib/gpu/lal_zbl.cu b/lib/gpu/lal_zbl.cu
index b7f379c833..33c850e134 100644
--- a/lib/gpu/lal_zbl.cu
+++ b/lib/gpu/lal_zbl.cu
@@ -129,16 +129,13 @@ __kernel void k_zbl(const __global numtyp4 *restrict x_,
       int mtype=itype*lj_types+jtype;
       if (rsq<cut_globalsq) {
         numtyp r, t, force;
-
         r = ucl_sqrt(rsq);
         force = dzbldr(r, coeff2[mtype].x, coeff2[mtype].y,
                        coeff2[mtype].z, coeff2[mtype].w, coeff1[mtype].z);
-
-              if (rsq>cut_innersq) {
-                t = r - cut_inner;
-                force = t*t * (coeff1[mtype].x + coeff1[mtype].y*t);
-              }
-
+        if (rsq>cut_innersq) {
+          t = r - cut_inner;
+          force = t*t * (coeff1[mtype].x + coeff1[mtype].y*t);
+        }
         force *= (numtyp)-1.0*ucl_recip(r);
 
         f.x+=delx*force;
@@ -148,11 +145,10 @@ __kernel void k_zbl(const __global numtyp4 *restrict x_,
         if (eflag>0) {
           numtyp e=e_zbl(r, coeff2[mtype].x, coeff2[mtype].y,
                          coeff2[mtype].z, coeff2[mtype].w, coeff1[mtype].z);
-                 e += coeff3[mtype].z;
-                if (rsq > cut_innersq) {
-                  e += t*t*t * (coeff3[mtype].x + coeff3[mtype].y*t);
-                }
-
+          e += coeff3[mtype].z;
+          if (rsq > cut_innersq) {
+            e += t*t*t * (coeff3[mtype].x + coeff3[mtype].y*t);
+          }
           energy+=e;
         }
         if (vflag>0) {
@@ -232,15 +228,13 @@ __kernel void k_zbl_fast(const __global numtyp4 *restrict x_,
 
       if (rsq<cut_globalsq) {
         numtyp r, t, force;
-
         r = ucl_sqrt(rsq);
         force = dzbldr(r, coeff2[mtype].x, coeff2[mtype].y,
                        coeff2[mtype].z, coeff2[mtype].w, coeff1[mtype].z);
-
-              if (rsq>cut_innersq) {
-                t = r - cut_inner;
-                force += t*t * (coeff1[mtype].x + coeff1[mtype].y*t);
-              }
+        if (rsq>cut_innersq) {
+          t = r - cut_inner;
+          force += t*t * (coeff1[mtype].x + coeff1[mtype].y*t);
+        }
 
         force *= (numtyp)-1.0*ucl_recip(r);
 
@@ -251,11 +245,10 @@ __kernel void k_zbl_fast(const __global numtyp4 *restrict x_,
         if (eflag>0) {
           numtyp e=e_zbl(r, coeff2[mtype].x, coeff2[mtype].y,
                          coeff2[mtype].z, coeff2[mtype].w, coeff1[mtype].z);
-                 e += coeff3[mtype].z;
-                if (rsq > cut_innersq) {
-                  e += t*t*t * (coeff3[mtype].x + coeff3[mtype].y*t);
-                }
-
+          e += coeff3[mtype].z;
+          if (rsq > cut_innersq) {
+            e += t*t*t * (coeff3[mtype].x + coeff3[mtype].y*t);
+          }
           energy+=e;
         }
         if (vflag>0) {