diff --git a/src/INTEL/angle_charmm_intel.cpp b/src/INTEL/angle_charmm_intel.cpp index 1bdf2acb29..48e179c997 100644 --- a/src/INTEL/angle_charmm_intel.cpp +++ b/src/INTEL/angle_charmm_intel.cpp @@ -166,10 +166,10 @@ void AngleCharmmIntel::eval(const int vflag, #else for (int n = nfrom; n < nto; n += npl) { #endif - const int i1 = anglelist[n].a; - const int i2 = anglelist[n].b; - const int i3 = anglelist[n].c; - const int type = anglelist[n].t; + const int i1 = IP_PRE_dword_index(anglelist[n].a); + const int i2 = IP_PRE_dword_index(anglelist[n].b); + const int i3 = IP_PRE_dword_index(anglelist[n].c); + const int type = IP_PRE_dword_index(anglelist[n].t); // 1st bond diff --git a/src/INTEL/angle_harmonic_intel.cpp b/src/INTEL/angle_harmonic_intel.cpp index 8d3d44f528..57dc152fbf 100644 --- a/src/INTEL/angle_harmonic_intel.cpp +++ b/src/INTEL/angle_harmonic_intel.cpp @@ -166,10 +166,10 @@ void AngleHarmonicIntel::eval(const int vflag, #else for (int n = nfrom; n < nto; n += npl) { #endif - const int i1 = anglelist[n].a; - const int i2 = anglelist[n].b; - const int i3 = anglelist[n].c; - const int type = anglelist[n].t; + const int i1 = IP_PRE_dword_index(anglelist[n].a); + const int i2 = IP_PRE_dword_index(anglelist[n].b); + const int i3 = IP_PRE_dword_index(anglelist[n].c); + const int type = IP_PRE_dword_index(anglelist[n].t); // 1st bond diff --git a/src/INTEL/bond_fene_intel.cpp b/src/INTEL/bond_fene_intel.cpp index 5cd14dc230..12579372e9 100644 --- a/src/INTEL/bond_fene_intel.cpp +++ b/src/INTEL/bond_fene_intel.cpp @@ -163,9 +163,9 @@ void BondFENEIntel::eval(const int vflag, #else for (int n = nfrom; n < nto; n += npl) { #endif - const int i1 = bondlist[n].a; - const int i2 = bondlist[n].b; - const int type = bondlist[n].t; + const int i1 = IP_PRE_dword_index(bondlist[n].a); + const int i2 = IP_PRE_dword_index(bondlist[n].b); + const int type = IP_PRE_dword_index(bondlist[n].t); const flt_t ir0sq = fc.fc[type].ir0sq; const flt_t k = fc.fc[type].k; diff --git a/src/INTEL/bond_harmonic_intel.cpp b/src/INTEL/bond_harmonic_intel.cpp index 4cb6ba3a8a..b3283f63bc 100644 --- a/src/INTEL/bond_harmonic_intel.cpp +++ b/src/INTEL/bond_harmonic_intel.cpp @@ -159,9 +159,9 @@ void BondHarmonicIntel::eval(const int vflag, #else for (int n = nfrom; n < nto; n += npl) { #endif - const int i1 = bondlist[n].a; - const int i2 = bondlist[n].b; - const int type = bondlist[n].t; + const int i1 = IP_PRE_dword_index(bondlist[n].a); + const int i2 = IP_PRE_dword_index(bondlist[n].b); + const int type = IP_PRE_dword_index(bondlist[n].t); const flt_t delx = x[i1].x - x[i2].x; const flt_t dely = x[i1].y - x[i2].y; diff --git a/src/INTEL/dihedral_charmm_intel.cpp b/src/INTEL/dihedral_charmm_intel.cpp index b36e5ab505..2b3f8e2208 100644 --- a/src/INTEL/dihedral_charmm_intel.cpp +++ b/src/INTEL/dihedral_charmm_intel.cpp @@ -195,11 +195,11 @@ void DihedralCharmmIntel::eval(const int vflag, for (int n = nfrom; n < nto; n++) { #endif for (int n = nfrom; n < nto; n += npl) { - const int i1 = dihedrallist[n].a; - const int i2 = dihedrallist[n].b; - const int i3 = dihedrallist[n].c; - const int i4 = dihedrallist[n].d; - const int type = dihedrallist[n].t; + const int i1 = IP_PRE_dword_index(dihedrallist[n].a); + const int i2 = IP_PRE_dword_index(dihedrallist[n].b); + const int i3 = IP_PRE_dword_index(dihedrallist[n].c); + const int i4 = IP_PRE_dword_index(dihedrallist[n].d); + const int type = IP_PRE_dword_index(dihedrallist[n].t); // 1st bond diff --git a/src/INTEL/dihedral_harmonic_intel.cpp b/src/INTEL/dihedral_harmonic_intel.cpp index 9b504be3ca..831b3a94f4 100644 --- a/src/INTEL/dihedral_harmonic_intel.cpp +++ b/src/INTEL/dihedral_harmonic_intel.cpp @@ -163,11 +163,11 @@ void DihedralHarmonicIntel::eval(const int vflag, #else for (int n = nfrom; n < nto; n += npl) { #endif - const int i1 = dihedrallist[n].a; - const int i2 = dihedrallist[n].b; - const int i3 = dihedrallist[n].c; - const int i4 = dihedrallist[n].d; - const int type = dihedrallist[n].t; + const int i1 = IP_PRE_dword_index(dihedrallist[n].a); + const int i2 = IP_PRE_dword_index(dihedrallist[n].b); + const int i3 = IP_PRE_dword_index(dihedrallist[n].c); + const int i4 = IP_PRE_dword_index(dihedrallist[n].d); + const int type = IP_PRE_dword_index(dihedrallist[n].t); // 1st bond diff --git a/src/INTEL/dihedral_opls_intel.cpp b/src/INTEL/dihedral_opls_intel.cpp index db0618abec..ff54923c34 100644 --- a/src/INTEL/dihedral_opls_intel.cpp +++ b/src/INTEL/dihedral_opls_intel.cpp @@ -167,11 +167,11 @@ void DihedralOPLSIntel::eval(const int vflag, #else for (int n = nfrom; n < nto; n += npl) { #endif - const int i1 = dihedrallist[n].a; - const int i2 = dihedrallist[n].b; - const int i3 = dihedrallist[n].c; - const int i4 = dihedrallist[n].d; - const int type = dihedrallist[n].t; + const int i1 = IP_PRE_dword_index(dihedrallist[n].a); + const int i2 = IP_PRE_dword_index(dihedrallist[n].b); + const int i3 = IP_PRE_dword_index(dihedrallist[n].c); + const int i4 = IP_PRE_dword_index(dihedrallist[n].d); + const int type = IP_PRE_dword_index(dihedrallist[n].t); // 1st bond diff --git a/src/INTEL/fix_nh_intel.cpp b/src/INTEL/fix_nh_intel.cpp index a6f2ec0478..2f10b63922 100644 --- a/src/INTEL/fix_nh_intel.cpp +++ b/src/INTEL/fix_nh_intel.cpp @@ -22,6 +22,7 @@ #include "domain.h" #include "error.h" #include "force.h" +#include "intel_preprocess.h" #include "memory.h" #include "modify.h" #include "neighbor.h" @@ -100,6 +101,7 @@ void FixNHIntel::remap() #pragma vector aligned #endif for (int i = 0; i < nlocal; i++) { + i = IP_PRE_dword_index(i); const double d0 = x[i].x - b0; const double d1 = x[i].y - b1; const double d2 = x[i].z - b2; @@ -118,6 +120,7 @@ void FixNHIntel::remap() #endif for (int i = 0; i < nlocal; i++) { if (mask[i] & dilate_group_bit) { + i = IP_PRE_dword_index(i); const double d0 = x[i].x - b0; const double d1 = x[i].y - b1; const double d2 = x[i].z - b2; @@ -287,6 +290,7 @@ void FixNHIntel::remap() #pragma vector aligned #endif for (int i = 0; i < nlocal; i++) { + i = IP_PRE_dword_index(i); x[i].x = h0*x[i].x + h5*x[i].y + h4*x[i].z + nb0; x[i].y = h1*x[i].y + h3*x[i].z + nb1; x[i].z = h2*x[i].z + nb2; @@ -302,6 +306,7 @@ void FixNHIntel::remap() #endif for (int i = 0; i < nlocal; i++) { if (mask[i] & dilate_group_bit) { + i = IP_PRE_dword_index(i); x[i].x = h0*x[i].x + h5*x[i].y + h4*x[i].z + nb0; x[i].y = h1*x[i].y + h3*x[i].z + nb1; x[i].z = h2*x[i].z + nb2; @@ -432,6 +437,7 @@ void FixNHIntel::nh_v_press() #pragma vector aligned #endif for (int i = 0; i < nlocal; i++) { + i = IP_PRE_dword_index(i); v[i].x *= f0; v[i].y *= f1; v[i].z *= f2; @@ -447,6 +453,7 @@ void FixNHIntel::nh_v_press() #endif for (int i = 0; i < nlocal; i++) { if (mask[i] & groupbit) { + i = IP_PRE_dword_index(i); v[i].x *= f0; v[i].y *= f1; v[i].z *= f2; diff --git a/src/INTEL/improper_cvff_intel.cpp b/src/INTEL/improper_cvff_intel.cpp index bc3c1d06e8..ad0702f7b1 100644 --- a/src/INTEL/improper_cvff_intel.cpp +++ b/src/INTEL/improper_cvff_intel.cpp @@ -168,11 +168,11 @@ void ImproperCvffIntel::eval(const int vflag, #else for (int n = nfrom; n < nto; n += npl) { #endif - const int i1 = improperlist[n].a; - const int i2 = improperlist[n].b; - const int i3 = improperlist[n].c; - const int i4 = improperlist[n].d; - const int type = improperlist[n].t; + const int i1 = IP_PRE_dword_index(improperlist[n].a); + const int i2 = IP_PRE_dword_index(improperlist[n].b); + const int i3 = IP_PRE_dword_index(improperlist[n].c); + const int i4 = IP_PRE_dword_index(improperlist[n].d); + const int type = IP_PRE_dword_index(improperlist[n].t); // geometry of 4-body diff --git a/src/INTEL/improper_harmonic_intel.cpp b/src/INTEL/improper_harmonic_intel.cpp index 615d080446..d71c0bdf10 100644 --- a/src/INTEL/improper_harmonic_intel.cpp +++ b/src/INTEL/improper_harmonic_intel.cpp @@ -170,11 +170,11 @@ void ImproperHarmonicIntel::eval(const int vflag, #else for (int n = nfrom; n < nto; n += npl) { #endif - const int i1 = improperlist[n].a; - const int i2 = improperlist[n].b; - const int i3 = improperlist[n].c; - const int i4 = improperlist[n].d; - const int type = improperlist[n].t; + const int i1 = IP_PRE_dword_index(improperlist[n].a); + const int i2 = IP_PRE_dword_index(improperlist[n].b); + const int i3 = IP_PRE_dword_index(improperlist[n].c); + const int i4 = IP_PRE_dword_index(improperlist[n].d); + const int type = IP_PRE_dword_index(improperlist[n].t); // geometry of 4-body diff --git a/src/INTEL/intel_preprocess.h b/src/INTEL/intel_preprocess.h index c7bd60b00b..7ff8f7d099 100644 --- a/src/INTEL/intel_preprocess.h +++ b/src/INTEL/intel_preprocess.h @@ -16,10 +16,16 @@ Contributing author: W. Michael Brown (Intel) ------------------------------------------------------------------------- */ +#include "lmptype.h" + #ifdef __INTEL_LLVM_COMPILER #define USE_OMP_SIMD #define __INTEL_COMPILER __INTEL_LLVM_COMPILER #define __INTEL_COMPILER_BUILD_DATE __INTEL_LLVM_COMPILER +// Indicate to vectorizer that it is safe to use dword indexed gather +#define IP_PRE_dword_index(i) ((i) & NEIGHMASK) +#else +#define IP_PRE_dword_index(i) i #endif #ifdef __INTEL_COMPILER diff --git a/src/INTEL/npair_full_bin_ghost_intel.cpp b/src/INTEL/npair_full_bin_ghost_intel.cpp index b0c052d35b..e6ca266933 100644 --- a/src/INTEL/npair_full_bin_ghost_intel.cpp +++ b/src/INTEL/npair_full_bin_ghost_intel.cpp @@ -370,7 +370,7 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list, #pragma vector aligned #endif for (int u = 0; u < ncount; u++) { - const int j = tj[u]; + const int j = IP_PRE_dword_index(tj[u]); tx[u] = x[j].x; ty[u] = x[j].y; tz[u] = x[j].z; diff --git a/src/INTEL/npair_intel.cpp b/src/INTEL/npair_intel.cpp index f6783fce40..cfbea387a0 100644 --- a/src/INTEL/npair_intel.cpp +++ b/src/INTEL/npair_intel.cpp @@ -359,7 +359,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list, #pragma vector aligned #endif for (int u = 0; u < ncount; u++) { - const int j = tj[u]; + const int j = IP_PRE_dword_index(tj[u]); tx[u] = x[j].x; ty[u] = x[j].y; tz[u] = x[j].z; @@ -387,7 +387,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list, #endif #endif for (int jj = bstart; jj < bend; jj++) { - const int j = binpacked[jj]; + const int j = IP_PRE_dword_index(binpacked[jj]); itj[icount] = j; itx[icount] = x[j].x; ity[icount] = x[j].y; diff --git a/src/INTEL/pair_buck_coul_cut_intel.cpp b/src/INTEL/pair_buck_coul_cut_intel.cpp index 57608a090c..62d6d02952 100644 --- a/src/INTEL/pair_buck_coul_cut_intel.cpp +++ b/src/INTEL/pair_buck_coul_cut_intel.cpp @@ -265,7 +265,7 @@ void PairBuckCoulCutIntel::eval(const int offload, const int vflag, const flt_t delx = xtmp - x[j].x; const flt_t dely = ytmp - x[j].y; const flt_t delz = ztmp - x[j].z; - const int jtype = x[j].w; + const int jtype = IP_PRE_dword_index(x[j].w); const flt_t rsq = delx * delx + dely * dely + delz * delz; const flt_t r = sqrt(rsq); const flt_t r2inv = (flt_t)1.0 / rsq; diff --git a/src/INTEL/pair_buck_coul_long_intel.cpp b/src/INTEL/pair_buck_coul_long_intel.cpp index b0638ff68d..1425317a0a 100644 --- a/src/INTEL/pair_buck_coul_long_intel.cpp +++ b/src/INTEL/pair_buck_coul_long_intel.cpp @@ -289,7 +289,7 @@ void PairBuckCoulLongIntel::eval(const int offload, const int vflag, const flt_t delx = xtmp - x[j].x; const flt_t dely = ytmp - x[j].y; const flt_t delz = ztmp - x[j].z; - const int jtype = x[j].w; + const int jtype = IP_PRE_dword_index(x[j].w); const flt_t rsq = delx * delx + dely * dely + delz * delz; if (rsq < c_forcei[jtype].cutsq) { diff --git a/src/INTEL/pair_buck_intel.cpp b/src/INTEL/pair_buck_intel.cpp index 7e3da8daf8..46ea291420 100644 --- a/src/INTEL/pair_buck_intel.cpp +++ b/src/INTEL/pair_buck_intel.cpp @@ -253,7 +253,7 @@ void PairBuckIntel::eval(const int offload, const int vflag, const flt_t delx = xtmp - x[j].x; const flt_t dely = ytmp - x[j].y; const flt_t delz = ztmp - x[j].z; - const int jtype = x[j].w; + const int jtype = IP_PRE_dword_index(x[j].w); const flt_t rsq = delx * delx + dely * dely + delz * delz; const flt_t r = sqrt(rsq); const flt_t r2inv = (flt_t)1.0 / rsq; diff --git a/src/INTEL/pair_dpd_intel.cpp b/src/INTEL/pair_dpd_intel.cpp index 06801b0ce0..763bd2fb57 100644 --- a/src/INTEL/pair_dpd_intel.cpp +++ b/src/INTEL/pair_dpd_intel.cpp @@ -312,13 +312,13 @@ void PairDPDIntel::eval(const int offload, const int vflag, sbindex = jlist[jj] >> SBBITS & 3; j = jlist[jj] & NEIGHMASK; } else - j = jlist[jj]; + j = IP_PRE_dword_index(jlist[jj]); const flt_t delx = xtmp - x[j].x; const flt_t dely = ytmp - x[j].y; const flt_t delz = ztmp - x[j].z; if (!ONETYPE) { - jtype = x[j].w; + jtype = IP_PRE_dword_index(x[j].w); icut = parami[jtype].icut; } const flt_t rsq = delx * delx + dely * dely + delz * delz; diff --git a/src/INTEL/pair_eam_intel.cpp b/src/INTEL/pair_eam_intel.cpp index 2a491e66c9..0fff4f0632 100644 --- a/src/INTEL/pair_eam_intel.cpp +++ b/src/INTEL/pair_eam_intel.cpp @@ -347,14 +347,15 @@ void PairEAMIntel::eval(const int offload, const int vflag, p = MIN(p,(flt_t)1.0); if (!ONETYPE) rhor_joff = rhor_ioff + jtype * jstride; - const int joff = rhor_joff + m; + const int joff = IP_PRE_dword_index(rhor_joff + m); flt_t ra; ra = ((rhor_spline_e[joff].a*p + rhor_spline_e[joff].b) * p + rhor_spline_e[joff].c) * p + rhor_spline_e[joff].d; rhoi += ra; if (NEWTON_PAIR) { if (!ONETYPE) { - const int ioff = jtype * istride + itype * jstride + m; + const int ioff = IP_PRE_dword_index(jtype * istride + itype * + jstride + m); ra = ((rhor_spline_e[ioff].a*p + rhor_spline_e[ioff].b)*p + rhor_spline_e[ioff].c) * p + rhor_spline_e[ioff].d; } @@ -439,7 +440,7 @@ void PairEAMIntel::eval(const int offload, const int vflag, #pragma vector aligned #endif for (int ii = iifrom; ii < iito; ++ii) { - const int i = ilist[ii]; + const int i = IP_PRE_dword_index(ilist[ii]); int itype; if (!ONETYPE) itype = x[i].w; flt_t p = rho[i]*frdrho + (flt_t)1.0; @@ -448,7 +449,7 @@ void PairEAMIntel::eval(const int offload, const int vflag, p -= m; p = MIN(p,(flt_t)1.0); if (!ONETYPE) frho_ioff = itype * fstride; - const int ioff = frho_ioff + m; + const int ioff = IP_PRE_dword_index(frho_ioff + m); fp_f[i] = (frho_spline_f[ioff].a*p + frho_spline_f[ioff].b)*p + frho_spline_f[ioff].c; if (EFLAG) { @@ -553,13 +554,14 @@ void PairEAMIntel::eval(const int offload, const int vflag, p = MIN(p,(flt_t)1.0); if (!ONETYPE) rhor_joff = rhor_ioff + jtype * jstride; - const int joff = rhor_joff + m; + const int joff = IP_PRE_dword_index(rhor_joff + m); const flt_t rhojp = (rhor_spline_f[joff].a*p + rhor_spline_f[joff].b)*p + rhor_spline_f[joff].c; flt_t rhoip; if (!ONETYPE) { - const int ioff = jtype * istride + itype * jstride + m; + const int ioff = IP_PRE_dword_index(jtype * istride + + itype * jstride + m); rhoip = (rhor_spline_f[ioff].a*p + rhor_spline_f[ioff].b)*p + rhor_spline_f[ioff].c; } else diff --git a/src/INTEL/pair_gayberne_intel.cpp b/src/INTEL/pair_gayberne_intel.cpp index c3ce8a8d43..1c2cdf0d49 100644 --- a/src/INTEL/pair_gayberne_intel.cpp +++ b/src/INTEL/pair_gayberne_intel.cpp @@ -417,7 +417,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, for (int jj = 0; jj < jnum; jj++) { int jm = jlist[jj]; int j = jm & NEIGHMASK; - const int jtype = x[j].w; + const int jtype = IP_PRE_dword_index(x[j].w); if (ijci[jtype].form == ELLIPSE_ELLIPSE) { flt_t delx = x[j].x-xtmp; @@ -473,7 +473,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, const int sbindex = jlist_form[jj] >> SBBITS & 3; const int j = jlist_form[jj] & NEIGHMASK; flt_t factor_lj = special_lj[sbindex]; - const int jtype = jtype_form[jj]; + const int jtype = IP_PRE_dword_index(jtype_form[jj]); const flt_t sigma = ijci[jtype].sigma; const flt_t epsilon = ijci[jtype].epsilon; const flt_t shape2_0 = ic[jtype].shape2[0]; diff --git a/src/INTEL/pair_lj_charmm_coul_charmm_intel.cpp b/src/INTEL/pair_lj_charmm_coul_charmm_intel.cpp index e6c528d3a4..9cc76a277c 100644 --- a/src/INTEL/pair_lj_charmm_coul_charmm_intel.cpp +++ b/src/INTEL/pair_lj_charmm_coul_charmm_intel.cpp @@ -318,7 +318,7 @@ void PairLJCharmmCoulCharmmIntel::eval(const int offload, const int vflag, #ifdef INTEL_VMASK if (rsq < cut_ljsq) { #endif - const int jtype = tjtype[jj]; + const int jtype = IP_PRE_dword_index(tjtype[jj]); flt_t r6inv = r2inv * r2inv * r2inv; forcelj = r6inv * (lji[jtype].x * r6inv - lji[jtype].y); if (EFLAG) evdwl = r6inv*(lji[jtype].z * r6inv - lji[jtype].w); diff --git a/src/INTEL/pair_lj_charmm_coul_long_intel.cpp b/src/INTEL/pair_lj_charmm_coul_long_intel.cpp index 3952a7da95..e30c1dec32 100644 --- a/src/INTEL/pair_lj_charmm_coul_long_intel.cpp +++ b/src/INTEL/pair_lj_charmm_coul_long_intel.cpp @@ -324,7 +324,7 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag, const int j = tj[jj] & NEIGHMASK; const int sbindex = tj[jj] >> SBBITS & 3; - const int jtype = tjtype[jj]; + const int jtype = IP_PRE_dword_index(tjtype[jj]); const flt_t rsq = trsq[jj]; const flt_t r2inv = (flt_t)1.0 / rsq; diff --git a/src/INTEL/pair_lj_cut_coul_long_intel.cpp b/src/INTEL/pair_lj_cut_coul_long_intel.cpp index 098036c2f1..17e6b6361d 100644 --- a/src/INTEL/pair_lj_cut_coul_long_intel.cpp +++ b/src/INTEL/pair_lj_cut_coul_long_intel.cpp @@ -287,7 +287,7 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag, const flt_t delx = xtmp - x[j].x; const flt_t dely = ytmp - x[j].y; const flt_t delz = ztmp - x[j].z; - const int jtype = x[j].w; + const int jtype = IP_PRE_dword_index(x[j].w); const flt_t rsq = delx * delx + dely * dely + delz * delz; if (rsq < c_forcei[jtype].cutsq) { @@ -316,8 +316,8 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag, forcecoul = forcelj = evdwl = ecoul = (flt_t)0.0; const int j = tj[jj] & NEIGHMASK; - const int sbindex = tj[jj] >> SBBITS & 3; - const int jtype = tjtype[jj]; + const int sbindex = IP_PRE_dword_index(tj[jj] >> SBBITS & 3); + const int jtype = IP_PRE_dword_index(tjtype[jj]); const flt_t rsq = trsq[jj]; const flt_t r2inv = (flt_t)1.0 / rsq; diff --git a/src/INTEL/pair_lj_cut_intel.cpp b/src/INTEL/pair_lj_cut_intel.cpp index 1f3169ab26..c1d9a947f6 100644 --- a/src/INTEL/pair_lj_cut_intel.cpp +++ b/src/INTEL/pair_lj_cut_intel.cpp @@ -262,13 +262,13 @@ void PairLJCutIntel::eval(const int offload, const int vflag, sbindex = jlist[jj] >> SBBITS & 3; j = jlist[jj] & NEIGHMASK; } else - j = jlist[jj]; + j = IP_PRE_dword_index(jlist[jj]); const flt_t delx = xtmp - x[j].x; const flt_t dely = ytmp - x[j].y; const flt_t delz = ztmp - x[j].z; if (!ONETYPE) { - jtype = x[j].w; + jtype = IP_PRE_dword_index(x[j].w); cutsq = ljc12oi[jtype].cutsq; } const flt_t rsq = delx * delx + dely * dely + delz * delz; diff --git a/src/INTEL/pair_sw_intel.cpp b/src/INTEL/pair_sw_intel.cpp index 37fe19260a..6e239afc7d 100644 --- a/src/INTEL/pair_sw_intel.cpp +++ b/src/INTEL/pair_sw_intel.cpp @@ -332,7 +332,7 @@ void PairSWIntel::eval(const int offload, const int vflag, int jtype, ijtype; if (!ONETYPE) { jtype = x[j].w; - ijtype = itype_offset + jtype; + ijtype = IP_PRE_dword_index(itype_offset + jtype); cutsq = p2[ijtype].cutsq; } const flt_t rsq1 = delx * delx + dely * dely + delz * delz; @@ -378,7 +378,7 @@ void PairSWIntel::eval(const int offload, const int vflag, if (EFLAG) fjtmp = (acc_t)0.0; int ijtype; - if (!ONETYPE) ijtype = tjtype[jj] + itype_offset; + if (!ONETYPE) ijtype = IP_PRE_dword_index(tjtype[jj] + itype_offset); const flt_t rsq1 = trsq[jj]; const flt_t rinvsq1 = (flt_t)1.0 / rsq1; @@ -459,8 +459,8 @@ void PairSWIntel::eval(const int offload, const int vflag, int iktype, ijktype; if (!ONETYPE) { iktype = tjtype[kk]; - ijktype = ijkoff + iktype; - iktype += itype_offset; + ijktype = IP_PRE_dword_index(ijkoff + iktype); + iktype = IP_PRE_dword_index(iktype + itype_offset); cut = p2[iktype].cut; sigma_gamma = p2[iktype].sigma_gamma; costheta = p3[ijktype].costheta; @@ -520,7 +520,7 @@ void PairSWIntel::eval(const int offload, const int vflag, } } } // for kk - const int j = tj[jj]; + const int j = IP_PRE_dword_index(tj[jj]); f[j].x += fjxtmp; f[j].y += fjytmp; f[j].z += fjztmp; diff --git a/src/INTEL/pppm_intel.cpp b/src/INTEL/pppm_intel.cpp index 72c5791f75..fc25d64847 100644 --- a/src/INTEL/pppm_intel.cpp +++ b/src/INTEL/pppm_intel.cpp @@ -403,7 +403,6 @@ void PPPMIntel::particle_map(IntelBuffers *buffers) // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // current particle coord can be outside global and local box // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - int nx = static_cast ((x[i].x-lo0)*xi+fshift) - OFFSET; int ny = static_cast ((x[i].y-lo1)*yi+fshift) - OFFSET; int nz = static_cast ((x[i].z-lo2)*zi+fshift) - OFFSET; @@ -941,6 +940,7 @@ void PPPMIntel::fieldforce_ad(IntelBuffers *buffers) #endif #endif for (int i = ifrom; i < ito; i++) { + i = IP_PRE_dword_index(i); particle_ekx[i] *= hx_inv; particle_eky[i] *= hy_inv; particle_ekz[i] *= hz_inv;