diff --git a/src/INTEL/angle_charmm_intel.cpp b/src/INTEL/angle_charmm_intel.cpp
index 1bdf2acb29..48e179c997 100644
--- a/src/INTEL/angle_charmm_intel.cpp
+++ b/src/INTEL/angle_charmm_intel.cpp
@@ -166,10 +166,10 @@ void AngleCharmmIntel::eval(const int vflag,
     #else
     for (int n = nfrom; n < nto; n += npl) {
     #endif
-      const int i1 = anglelist[n].a;
-      const int i2 = anglelist[n].b;
-      const int i3 = anglelist[n].c;
-      const int type = anglelist[n].t;
+      const int i1 = IP_PRE_dword_index(anglelist[n].a);
+      const int i2 = IP_PRE_dword_index(anglelist[n].b);
+      const int i3 = IP_PRE_dword_index(anglelist[n].c);
+      const int type = IP_PRE_dword_index(anglelist[n].t);
 
       // 1st bond
 
diff --git a/src/INTEL/angle_harmonic_intel.cpp b/src/INTEL/angle_harmonic_intel.cpp
index 8d3d44f528..57dc152fbf 100644
--- a/src/INTEL/angle_harmonic_intel.cpp
+++ b/src/INTEL/angle_harmonic_intel.cpp
@@ -166,10 +166,10 @@ void AngleHarmonicIntel::eval(const int vflag,
     #else
     for (int n = nfrom; n < nto; n += npl) {
     #endif
-      const int i1 = anglelist[n].a;
-      const int i2 = anglelist[n].b;
-      const int i3 = anglelist[n].c;
-      const int type = anglelist[n].t;
+      const int i1 = IP_PRE_dword_index(anglelist[n].a);
+      const int i2 = IP_PRE_dword_index(anglelist[n].b);
+      const int i3 = IP_PRE_dword_index(anglelist[n].c);
+      const int type = IP_PRE_dword_index(anglelist[n].t);
 
       // 1st bond
 
diff --git a/src/INTEL/bond_fene_intel.cpp b/src/INTEL/bond_fene_intel.cpp
index 5cd14dc230..12579372e9 100644
--- a/src/INTEL/bond_fene_intel.cpp
+++ b/src/INTEL/bond_fene_intel.cpp
@@ -163,9 +163,9 @@ void BondFENEIntel::eval(const int vflag,
     #else
     for (int n = nfrom; n < nto; n += npl) {
     #endif
-      const int i1 = bondlist[n].a;
-      const int i2 = bondlist[n].b;
-      const int type = bondlist[n].t;
+      const int i1 = IP_PRE_dword_index(bondlist[n].a);
+      const int i2 = IP_PRE_dword_index(bondlist[n].b);
+      const int type = IP_PRE_dword_index(bondlist[n].t);
 
       const flt_t ir0sq = fc.fc[type].ir0sq;
       const flt_t k = fc.fc[type].k;
diff --git a/src/INTEL/bond_harmonic_intel.cpp b/src/INTEL/bond_harmonic_intel.cpp
index 4cb6ba3a8a..b3283f63bc 100644
--- a/src/INTEL/bond_harmonic_intel.cpp
+++ b/src/INTEL/bond_harmonic_intel.cpp
@@ -159,9 +159,9 @@ void BondHarmonicIntel::eval(const int vflag,
     #else
     for (int n = nfrom; n < nto; n += npl) {
     #endif
-      const int i1 = bondlist[n].a;
-      const int i2 = bondlist[n].b;
-      const int type = bondlist[n].t;
+      const int i1 = IP_PRE_dword_index(bondlist[n].a);
+      const int i2 = IP_PRE_dword_index(bondlist[n].b);
+      const int type = IP_PRE_dword_index(bondlist[n].t);
 
       const flt_t delx = x[i1].x - x[i2].x;
       const flt_t dely = x[i1].y - x[i2].y;
diff --git a/src/INTEL/dihedral_charmm_intel.cpp b/src/INTEL/dihedral_charmm_intel.cpp
index b36e5ab505..2b3f8e2208 100644
--- a/src/INTEL/dihedral_charmm_intel.cpp
+++ b/src/INTEL/dihedral_charmm_intel.cpp
@@ -195,11 +195,11 @@ void DihedralCharmmIntel::eval(const int vflag,
     for (int n = nfrom; n < nto; n++) {
     #endif
     for (int n = nfrom; n < nto; n += npl) {
-      const int i1 = dihedrallist[n].a;
-      const int i2 = dihedrallist[n].b;
-      const int i3 = dihedrallist[n].c;
-      const int i4 = dihedrallist[n].d;
-      const int type = dihedrallist[n].t;
+      const int i1 = IP_PRE_dword_index(dihedrallist[n].a);
+      const int i2 = IP_PRE_dword_index(dihedrallist[n].b);
+      const int i3 = IP_PRE_dword_index(dihedrallist[n].c);
+      const int i4 = IP_PRE_dword_index(dihedrallist[n].d);
+      const int type = IP_PRE_dword_index(dihedrallist[n].t);
 
       // 1st bond
 
diff --git a/src/INTEL/dihedral_harmonic_intel.cpp b/src/INTEL/dihedral_harmonic_intel.cpp
index 9b504be3ca..831b3a94f4 100644
--- a/src/INTEL/dihedral_harmonic_intel.cpp
+++ b/src/INTEL/dihedral_harmonic_intel.cpp
@@ -163,11 +163,11 @@ void DihedralHarmonicIntel::eval(const int vflag,
     #else
     for (int n = nfrom; n < nto; n += npl) {
     #endif
-      const int i1 = dihedrallist[n].a;
-      const int i2 = dihedrallist[n].b;
-      const int i3 = dihedrallist[n].c;
-      const int i4 = dihedrallist[n].d;
-      const int type = dihedrallist[n].t;
+      const int i1 = IP_PRE_dword_index(dihedrallist[n].a);
+      const int i2 = IP_PRE_dword_index(dihedrallist[n].b);
+      const int i3 = IP_PRE_dword_index(dihedrallist[n].c);
+      const int i4 = IP_PRE_dword_index(dihedrallist[n].d);
+      const int type = IP_PRE_dword_index(dihedrallist[n].t);
 
       // 1st bond
 
diff --git a/src/INTEL/dihedral_opls_intel.cpp b/src/INTEL/dihedral_opls_intel.cpp
index db0618abec..ff54923c34 100644
--- a/src/INTEL/dihedral_opls_intel.cpp
+++ b/src/INTEL/dihedral_opls_intel.cpp
@@ -167,11 +167,11 @@ void DihedralOPLSIntel::eval(const int vflag,
     #else
     for (int n = nfrom; n < nto; n += npl) {
     #endif
-      const int i1 = dihedrallist[n].a;
-      const int i2 = dihedrallist[n].b;
-      const int i3 = dihedrallist[n].c;
-      const int i4 = dihedrallist[n].d;
-      const int type = dihedrallist[n].t;
+      const int i1 = IP_PRE_dword_index(dihedrallist[n].a);
+      const int i2 = IP_PRE_dword_index(dihedrallist[n].b);
+      const int i3 = IP_PRE_dword_index(dihedrallist[n].c);
+      const int i4 = IP_PRE_dword_index(dihedrallist[n].d);
+      const int type = IP_PRE_dword_index(dihedrallist[n].t);
 
       // 1st bond
 
diff --git a/src/INTEL/fix_nh_intel.cpp b/src/INTEL/fix_nh_intel.cpp
index a6f2ec0478..2f10b63922 100644
--- a/src/INTEL/fix_nh_intel.cpp
+++ b/src/INTEL/fix_nh_intel.cpp
@@ -22,6 +22,7 @@
 #include "domain.h"
 #include "error.h"
 #include "force.h"
+#include "intel_preprocess.h"
 #include "memory.h"
 #include "modify.h"
 #include "neighbor.h"
@@ -100,6 +101,7 @@ void FixNHIntel::remap()
     #pragma vector aligned
     #endif
     for (int i = 0; i < nlocal; i++) {
+      i = IP_PRE_dword_index(i);
       const double d0 = x[i].x - b0;
       const double d1 = x[i].y - b1;
       const double d2 = x[i].z - b2;
@@ -118,6 +120,7 @@ void FixNHIntel::remap()
     #endif
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & dilate_group_bit) {
+        i = IP_PRE_dword_index(i);
         const double d0 = x[i].x - b0;
         const double d1 = x[i].y - b1;
         const double d2 = x[i].z - b2;
@@ -287,6 +290,7 @@ void FixNHIntel::remap()
     #pragma vector aligned
     #endif
     for (int i = 0; i < nlocal; i++) {
+      i = IP_PRE_dword_index(i);
       x[i].x = h0*x[i].x + h5*x[i].y + h4*x[i].z + nb0;
       x[i].y = h1*x[i].y + h3*x[i].z + nb1;
       x[i].z = h2*x[i].z + nb2;
@@ -302,6 +306,7 @@ void FixNHIntel::remap()
     #endif
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & dilate_group_bit) {
+        i = IP_PRE_dword_index(i);
         x[i].x = h0*x[i].x + h5*x[i].y + h4*x[i].z + nb0;
         x[i].y = h1*x[i].y + h3*x[i].z + nb1;
         x[i].z = h2*x[i].z + nb2;
@@ -432,6 +437,7 @@ void FixNHIntel::nh_v_press()
     #pragma vector aligned
     #endif
     for (int i = 0; i < nlocal; i++) {
+      i = IP_PRE_dword_index(i);
       v[i].x *= f0;
       v[i].y *= f1;
       v[i].z *= f2;
@@ -447,6 +453,7 @@ void FixNHIntel::nh_v_press()
     #endif
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
+        i = IP_PRE_dword_index(i);
         v[i].x *= f0;
         v[i].y *= f1;
         v[i].z *= f2;
diff --git a/src/INTEL/improper_cvff_intel.cpp b/src/INTEL/improper_cvff_intel.cpp
index bc3c1d06e8..ad0702f7b1 100644
--- a/src/INTEL/improper_cvff_intel.cpp
+++ b/src/INTEL/improper_cvff_intel.cpp
@@ -168,11 +168,11 @@ void ImproperCvffIntel::eval(const int vflag,
     #else
     for (int n = nfrom; n < nto; n += npl) {
     #endif
-      const int i1 = improperlist[n].a;
-      const int i2 = improperlist[n].b;
-      const int i3 = improperlist[n].c;
-      const int i4 = improperlist[n].d;
-      const int type = improperlist[n].t;
+      const int i1 = IP_PRE_dword_index(improperlist[n].a);
+      const int i2 = IP_PRE_dword_index(improperlist[n].b);
+      const int i3 = IP_PRE_dword_index(improperlist[n].c);
+      const int i4 = IP_PRE_dword_index(improperlist[n].d);
+      const int type = IP_PRE_dword_index(improperlist[n].t);
 
       // geometry of 4-body
 
diff --git a/src/INTEL/improper_harmonic_intel.cpp b/src/INTEL/improper_harmonic_intel.cpp
index 615d080446..d71c0bdf10 100644
--- a/src/INTEL/improper_harmonic_intel.cpp
+++ b/src/INTEL/improper_harmonic_intel.cpp
@@ -170,11 +170,11 @@ void ImproperHarmonicIntel::eval(const int vflag,
     #else
     for (int n = nfrom; n < nto; n += npl) {
     #endif
-      const int i1 = improperlist[n].a;
-      const int i2 = improperlist[n].b;
-      const int i3 = improperlist[n].c;
-      const int i4 = improperlist[n].d;
-      const int type = improperlist[n].t;
+      const int i1 = IP_PRE_dword_index(improperlist[n].a);
+      const int i2 = IP_PRE_dword_index(improperlist[n].b);
+      const int i3 = IP_PRE_dword_index(improperlist[n].c);
+      const int i4 = IP_PRE_dword_index(improperlist[n].d);
+      const int type = IP_PRE_dword_index(improperlist[n].t);
 
       // geometry of 4-body
 
diff --git a/src/INTEL/intel_preprocess.h b/src/INTEL/intel_preprocess.h
index c7bd60b00b..7ff8f7d099 100644
--- a/src/INTEL/intel_preprocess.h
+++ b/src/INTEL/intel_preprocess.h
@@ -16,10 +16,16 @@
    Contributing author: W. Michael Brown (Intel)
 ------------------------------------------------------------------------- */
 
+#include "lmptype.h"
+
 #ifdef __INTEL_LLVM_COMPILER
 #define USE_OMP_SIMD
 #define __INTEL_COMPILER __INTEL_LLVM_COMPILER
 #define __INTEL_COMPILER_BUILD_DATE __INTEL_LLVM_COMPILER
+// Indicate to vectorizer that it is safe to use dword indexed gather
+#define IP_PRE_dword_index(i) ((i) & NEIGHMASK)
+#else
+#define IP_PRE_dword_index(i) i
 #endif
 
 #ifdef __INTEL_COMPILER
diff --git a/src/INTEL/npair_full_bin_ghost_intel.cpp b/src/INTEL/npair_full_bin_ghost_intel.cpp
index b0c052d35b..e6ca266933 100644
--- a/src/INTEL/npair_full_bin_ghost_intel.cpp
+++ b/src/INTEL/npair_full_bin_ghost_intel.cpp
@@ -370,7 +370,7 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
           #pragma vector aligned
           #endif
           for (int u = 0; u < ncount; u++) {
-            const int j = tj[u];
+            const int j = IP_PRE_dword_index(tj[u]);
             tx[u] = x[j].x;
             ty[u] = x[j].y;
             tz[u] = x[j].z;
diff --git a/src/INTEL/npair_intel.cpp b/src/INTEL/npair_intel.cpp
index f6783fce40..cfbea387a0 100644
--- a/src/INTEL/npair_intel.cpp
+++ b/src/INTEL/npair_intel.cpp
@@ -359,7 +359,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
           #pragma vector aligned
           #endif
           for (int u = 0; u < ncount; u++) {
-            const int j = tj[u];
+            const int j = IP_PRE_dword_index(tj[u]);
             tx[u] = x[j].x;
             ty[u] = x[j].y;
             tz[u] = x[j].z;
@@ -387,7 +387,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
 #endif
             #endif
             for (int jj = bstart; jj < bend; jj++) {
-              const int j = binpacked[jj];
+              const int j = IP_PRE_dword_index(binpacked[jj]);
               itj[icount] = j;
               itx[icount] = x[j].x;
               ity[icount] = x[j].y;
diff --git a/src/INTEL/pair_buck_coul_cut_intel.cpp b/src/INTEL/pair_buck_coul_cut_intel.cpp
index 57608a090c..62d6d02952 100644
--- a/src/INTEL/pair_buck_coul_cut_intel.cpp
+++ b/src/INTEL/pair_buck_coul_cut_intel.cpp
@@ -265,7 +265,7 @@ void PairBuckCoulCutIntel::eval(const int offload, const int vflag,
           const flt_t delx = xtmp - x[j].x;
           const flt_t dely = ytmp - x[j].y;
           const flt_t delz = ztmp - x[j].z;
-          const int jtype = x[j].w;
+          const int jtype = IP_PRE_dword_index(x[j].w);
           const flt_t rsq = delx * delx + dely * dely + delz * delz;
           const flt_t r = sqrt(rsq);
           const flt_t r2inv = (flt_t)1.0 / rsq;
diff --git a/src/INTEL/pair_buck_coul_long_intel.cpp b/src/INTEL/pair_buck_coul_long_intel.cpp
index b0638ff68d..1425317a0a 100644
--- a/src/INTEL/pair_buck_coul_long_intel.cpp
+++ b/src/INTEL/pair_buck_coul_long_intel.cpp
@@ -289,7 +289,7 @@ void PairBuckCoulLongIntel::eval(const int offload, const int vflag,
           const flt_t delx = xtmp - x[j].x;
           const flt_t dely = ytmp - x[j].y;
           const flt_t delz = ztmp - x[j].z;
-          const int jtype = x[j].w;
+          const int jtype = IP_PRE_dword_index(x[j].w);
           const flt_t rsq = delx * delx + dely * dely + delz * delz;
 
           if (rsq < c_forcei[jtype].cutsq) {
diff --git a/src/INTEL/pair_buck_intel.cpp b/src/INTEL/pair_buck_intel.cpp
index 7e3da8daf8..46ea291420 100644
--- a/src/INTEL/pair_buck_intel.cpp
+++ b/src/INTEL/pair_buck_intel.cpp
@@ -253,7 +253,7 @@ void PairBuckIntel::eval(const int offload, const int vflag,
           const flt_t delx = xtmp - x[j].x;
           const flt_t dely = ytmp - x[j].y;
           const flt_t delz = ztmp - x[j].z;
-          const int jtype = x[j].w;
+          const int jtype = IP_PRE_dword_index(x[j].w);
           const flt_t rsq = delx * delx + dely * dely + delz * delz;
           const flt_t r = sqrt(rsq);
           const flt_t r2inv = (flt_t)1.0 / rsq;
diff --git a/src/INTEL/pair_dpd_intel.cpp b/src/INTEL/pair_dpd_intel.cpp
index 06801b0ce0..763bd2fb57 100644
--- a/src/INTEL/pair_dpd_intel.cpp
+++ b/src/INTEL/pair_dpd_intel.cpp
@@ -312,13 +312,13 @@ void PairDPDIntel::eval(const int offload, const int vflag,
             sbindex = jlist[jj] >> SBBITS & 3;
             j = jlist[jj] & NEIGHMASK;
           } else
-            j = jlist[jj];
+            j = IP_PRE_dword_index(jlist[jj]);
 
           const flt_t delx = xtmp - x[j].x;
           const flt_t dely = ytmp - x[j].y;
           const flt_t delz = ztmp - x[j].z;
           if (!ONETYPE) {
-            jtype = x[j].w;
+            jtype = IP_PRE_dword_index(x[j].w);
             icut = parami[jtype].icut;
           }
           const flt_t rsq = delx * delx + dely * dely + delz * delz;
diff --git a/src/INTEL/pair_eam_intel.cpp b/src/INTEL/pair_eam_intel.cpp
index 2a491e66c9..0fff4f0632 100644
--- a/src/INTEL/pair_eam_intel.cpp
+++ b/src/INTEL/pair_eam_intel.cpp
@@ -347,14 +347,15 @@ void PairEAMIntel::eval(const int offload, const int vflag,
           p = MIN(p,(flt_t)1.0);
           if (!ONETYPE)
             rhor_joff = rhor_ioff + jtype * jstride;
-          const int joff = rhor_joff + m;
+          const int joff = IP_PRE_dword_index(rhor_joff + m);
           flt_t ra;
           ra = ((rhor_spline_e[joff].a*p + rhor_spline_e[joff].b) * p +
                 rhor_spline_e[joff].c) * p + rhor_spline_e[joff].d;
           rhoi += ra;
           if (NEWTON_PAIR) {
             if (!ONETYPE) {
-              const int ioff = jtype * istride + itype * jstride + m;
+              const int ioff = IP_PRE_dword_index(jtype * istride + itype *
+                                                  jstride + m);
               ra = ((rhor_spline_e[ioff].a*p + rhor_spline_e[ioff].b)*p +
                     rhor_spline_e[ioff].c) * p + rhor_spline_e[ioff].d;
             }
@@ -439,7 +440,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
       #pragma vector aligned
       #endif
       for (int ii = iifrom; ii < iito; ++ii) {
-        const int i = ilist[ii];
+        const int i = IP_PRE_dword_index(ilist[ii]);
         int itype;
         if (!ONETYPE) itype = x[i].w;
         flt_t p = rho[i]*frdrho + (flt_t)1.0;
@@ -448,7 +449,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
         p -= m;
         p = MIN(p,(flt_t)1.0);
         if (!ONETYPE) frho_ioff = itype * fstride;
-        const int ioff = frho_ioff + m;
+        const int ioff = IP_PRE_dword_index(frho_ioff + m);
         fp_f[i] = (frho_spline_f[ioff].a*p + frho_spline_f[ioff].b)*p +
           frho_spline_f[ioff].c;
         if (EFLAG) {
@@ -553,13 +554,14 @@ void PairEAMIntel::eval(const int offload, const int vflag,
           p = MIN(p,(flt_t)1.0);
           if (!ONETYPE)
             rhor_joff = rhor_ioff + jtype * jstride;
-          const int joff = rhor_joff + m;
+          const int joff = IP_PRE_dword_index(rhor_joff + m);
           const flt_t rhojp = (rhor_spline_f[joff].a*p +
                                rhor_spline_f[joff].b)*p +
             rhor_spline_f[joff].c;
           flt_t rhoip;
           if (!ONETYPE) {
-            const int ioff = jtype * istride + itype * jstride + m;
+            const int ioff = IP_PRE_dword_index(jtype * istride +
+                                                itype * jstride + m);
             rhoip = (rhor_spline_f[ioff].a*p + rhor_spline_f[ioff].b)*p +
               rhor_spline_f[ioff].c;
           } else
diff --git a/src/INTEL/pair_gayberne_intel.cpp b/src/INTEL/pair_gayberne_intel.cpp
index c3ce8a8d43..1c2cdf0d49 100644
--- a/src/INTEL/pair_gayberne_intel.cpp
+++ b/src/INTEL/pair_gayberne_intel.cpp
@@ -417,7 +417,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
         for (int jj = 0; jj < jnum; jj++) {
           int jm = jlist[jj];
           int j = jm & NEIGHMASK;
-          const int jtype = x[j].w;
+          const int jtype = IP_PRE_dword_index(x[j].w);
 
           if (ijci[jtype].form == ELLIPSE_ELLIPSE) {
             flt_t delx = x[j].x-xtmp;
@@ -473,7 +473,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
           const int sbindex = jlist_form[jj] >> SBBITS & 3;
           const int j = jlist_form[jj] & NEIGHMASK;
           flt_t factor_lj = special_lj[sbindex];
-          const int jtype = jtype_form[jj];
+          const int jtype = IP_PRE_dword_index(jtype_form[jj]);
           const flt_t sigma = ijci[jtype].sigma;
           const flt_t epsilon = ijci[jtype].epsilon;
           const flt_t shape2_0 = ic[jtype].shape2[0];
diff --git a/src/INTEL/pair_lj_charmm_coul_charmm_intel.cpp b/src/INTEL/pair_lj_charmm_coul_charmm_intel.cpp
index e6c528d3a4..9cc76a277c 100644
--- a/src/INTEL/pair_lj_charmm_coul_charmm_intel.cpp
+++ b/src/INTEL/pair_lj_charmm_coul_charmm_intel.cpp
@@ -318,7 +318,7 @@ void PairLJCharmmCoulCharmmIntel::eval(const int offload, const int vflag,
           #ifdef INTEL_VMASK
           if (rsq < cut_ljsq) {
           #endif
-            const int jtype = tjtype[jj];
+            const int jtype = IP_PRE_dword_index(tjtype[jj]);
             flt_t r6inv = r2inv * r2inv * r2inv;
             forcelj = r6inv * (lji[jtype].x * r6inv - lji[jtype].y);
             if (EFLAG) evdwl = r6inv*(lji[jtype].z * r6inv - lji[jtype].w);
diff --git a/src/INTEL/pair_lj_charmm_coul_long_intel.cpp b/src/INTEL/pair_lj_charmm_coul_long_intel.cpp
index 3952a7da95..e30c1dec32 100644
--- a/src/INTEL/pair_lj_charmm_coul_long_intel.cpp
+++ b/src/INTEL/pair_lj_charmm_coul_long_intel.cpp
@@ -324,7 +324,7 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
 
           const int j = tj[jj] & NEIGHMASK;
           const int sbindex = tj[jj] >> SBBITS & 3;
-          const int jtype = tjtype[jj];
+          const int jtype = IP_PRE_dword_index(tjtype[jj]);
           const flt_t rsq = trsq[jj];
           const flt_t r2inv = (flt_t)1.0 / rsq;
 
diff --git a/src/INTEL/pair_lj_cut_coul_long_intel.cpp b/src/INTEL/pair_lj_cut_coul_long_intel.cpp
index 098036c2f1..17e6b6361d 100644
--- a/src/INTEL/pair_lj_cut_coul_long_intel.cpp
+++ b/src/INTEL/pair_lj_cut_coul_long_intel.cpp
@@ -287,7 +287,7 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
           const flt_t delx = xtmp - x[j].x;
           const flt_t dely = ytmp - x[j].y;
           const flt_t delz = ztmp - x[j].z;
-          const int jtype = x[j].w;
+          const int jtype = IP_PRE_dword_index(x[j].w);
           const flt_t rsq = delx * delx + dely * dely + delz * delz;
 
           if (rsq < c_forcei[jtype].cutsq) {
@@ -316,8 +316,8 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
           forcecoul = forcelj = evdwl = ecoul = (flt_t)0.0;
 
           const int j = tj[jj] & NEIGHMASK;
-          const int sbindex = tj[jj] >> SBBITS & 3;
-          const int jtype = tjtype[jj];
+          const int sbindex = IP_PRE_dword_index(tj[jj] >> SBBITS & 3);
+          const int jtype = IP_PRE_dword_index(tjtype[jj]);
           const flt_t rsq = trsq[jj];
           const flt_t r2inv = (flt_t)1.0 / rsq;
 
diff --git a/src/INTEL/pair_lj_cut_intel.cpp b/src/INTEL/pair_lj_cut_intel.cpp
index 1f3169ab26..c1d9a947f6 100644
--- a/src/INTEL/pair_lj_cut_intel.cpp
+++ b/src/INTEL/pair_lj_cut_intel.cpp
@@ -262,13 +262,13 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
             sbindex = jlist[jj] >> SBBITS & 3;
             j = jlist[jj] & NEIGHMASK;
           } else
-            j = jlist[jj];
+            j = IP_PRE_dword_index(jlist[jj]);
 
           const flt_t delx = xtmp - x[j].x;
           const flt_t dely = ytmp - x[j].y;
           const flt_t delz = ztmp - x[j].z;
           if (!ONETYPE) {
-            jtype = x[j].w;
+            jtype = IP_PRE_dword_index(x[j].w);
             cutsq = ljc12oi[jtype].cutsq;
           }
           const flt_t rsq = delx * delx + dely * dely + delz * delz;
diff --git a/src/INTEL/pair_sw_intel.cpp b/src/INTEL/pair_sw_intel.cpp
index 37fe19260a..6e239afc7d 100644
--- a/src/INTEL/pair_sw_intel.cpp
+++ b/src/INTEL/pair_sw_intel.cpp
@@ -332,7 +332,7 @@ void PairSWIntel::eval(const int offload, const int vflag,
           int jtype, ijtype;
           if (!ONETYPE) {
             jtype = x[j].w;
-            ijtype = itype_offset + jtype;
+            ijtype = IP_PRE_dword_index(itype_offset + jtype);
             cutsq = p2[ijtype].cutsq;
           }
           const flt_t rsq1 = delx * delx + dely * dely + delz * delz;
@@ -378,7 +378,7 @@ void PairSWIntel::eval(const int offload, const int vflag,
           if (EFLAG) fjtmp = (acc_t)0.0;
           int ijtype;
 
-          if (!ONETYPE) ijtype = tjtype[jj] + itype_offset;
+          if (!ONETYPE) ijtype = IP_PRE_dword_index(tjtype[jj] + itype_offset);
           const flt_t rsq1 = trsq[jj];
 
           const flt_t rinvsq1 = (flt_t)1.0 / rsq1;
@@ -459,8 +459,8 @@ void PairSWIntel::eval(const int offload, const int vflag,
             int iktype, ijktype;
             if (!ONETYPE) {
               iktype = tjtype[kk];
-              ijktype = ijkoff + iktype;
-              iktype += itype_offset;
+              ijktype = IP_PRE_dword_index(ijkoff + iktype);
+              iktype = IP_PRE_dword_index(iktype + itype_offset);
               cut = p2[iktype].cut;
               sigma_gamma = p2[iktype].sigma_gamma;
               costheta = p3[ijktype].costheta;
@@ -520,7 +520,7 @@ void PairSWIntel::eval(const int offload, const int vflag,
               }
             }
           } // for kk
-          const int j = tj[jj];
+          const int j = IP_PRE_dword_index(tj[jj]);
           f[j].x += fjxtmp;
           f[j].y += fjytmp;
           f[j].z += fjztmp;
diff --git a/src/INTEL/pppm_intel.cpp b/src/INTEL/pppm_intel.cpp
index 72c5791f75..fc25d64847 100644
--- a/src/INTEL/pppm_intel.cpp
+++ b/src/INTEL/pppm_intel.cpp
@@ -403,7 +403,6 @@ void PPPMIntel::particle_map(IntelBuffers<flt_t,acc_t> *buffers)
       // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
       // current particle coord can be outside global and local box
       // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
-
       int nx = static_cast<int> ((x[i].x-lo0)*xi+fshift) - OFFSET;
       int ny = static_cast<int> ((x[i].y-lo1)*yi+fshift) - OFFSET;
       int nz = static_cast<int> ((x[i].z-lo2)*zi+fshift) - OFFSET;
@@ -941,6 +940,7 @@ void PPPMIntel::fieldforce_ad(IntelBuffers<flt_t,acc_t> *buffers)
 #endif
     #endif
     for (int i = ifrom; i < ito; i++) {
+      i = IP_PRE_dword_index(i);
       particle_ekx[i] *= hx_inv;
       particle_eky[i] *= hy_inv;
       particle_ekz[i] *= hz_inv;