Merge pull request #3410 from wmbrownIntel/icx_gather_opt
Changes to Intel Package files for better LLVM-based compiler support
This commit is contained in:
@ -166,10 +166,10 @@ void AngleCharmmIntel::eval(const int vflag,
|
|||||||
#else
|
#else
|
||||||
for (int n = nfrom; n < nto; n += npl) {
|
for (int n = nfrom; n < nto; n += npl) {
|
||||||
#endif
|
#endif
|
||||||
const int i1 = anglelist[n].a;
|
const int i1 = IP_PRE_dword_index(anglelist[n].a);
|
||||||
const int i2 = anglelist[n].b;
|
const int i2 = IP_PRE_dword_index(anglelist[n].b);
|
||||||
const int i3 = anglelist[n].c;
|
const int i3 = IP_PRE_dword_index(anglelist[n].c);
|
||||||
const int type = anglelist[n].t;
|
const int type = IP_PRE_dword_index(anglelist[n].t);
|
||||||
|
|
||||||
// 1st bond
|
// 1st bond
|
||||||
|
|
||||||
|
|||||||
@ -166,10 +166,10 @@ void AngleHarmonicIntel::eval(const int vflag,
|
|||||||
#else
|
#else
|
||||||
for (int n = nfrom; n < nto; n += npl) {
|
for (int n = nfrom; n < nto; n += npl) {
|
||||||
#endif
|
#endif
|
||||||
const int i1 = anglelist[n].a;
|
const int i1 = IP_PRE_dword_index(anglelist[n].a);
|
||||||
const int i2 = anglelist[n].b;
|
const int i2 = IP_PRE_dword_index(anglelist[n].b);
|
||||||
const int i3 = anglelist[n].c;
|
const int i3 = IP_PRE_dword_index(anglelist[n].c);
|
||||||
const int type = anglelist[n].t;
|
const int type = IP_PRE_dword_index(anglelist[n].t);
|
||||||
|
|
||||||
// 1st bond
|
// 1st bond
|
||||||
|
|
||||||
|
|||||||
@ -163,9 +163,9 @@ void BondFENEIntel::eval(const int vflag,
|
|||||||
#else
|
#else
|
||||||
for (int n = nfrom; n < nto; n += npl) {
|
for (int n = nfrom; n < nto; n += npl) {
|
||||||
#endif
|
#endif
|
||||||
const int i1 = bondlist[n].a;
|
const int i1 = IP_PRE_dword_index(bondlist[n].a);
|
||||||
const int i2 = bondlist[n].b;
|
const int i2 = IP_PRE_dword_index(bondlist[n].b);
|
||||||
const int type = bondlist[n].t;
|
const int type = IP_PRE_dword_index(bondlist[n].t);
|
||||||
|
|
||||||
const flt_t ir0sq = fc.fc[type].ir0sq;
|
const flt_t ir0sq = fc.fc[type].ir0sq;
|
||||||
const flt_t k = fc.fc[type].k;
|
const flt_t k = fc.fc[type].k;
|
||||||
|
|||||||
@ -159,9 +159,9 @@ void BondHarmonicIntel::eval(const int vflag,
|
|||||||
#else
|
#else
|
||||||
for (int n = nfrom; n < nto; n += npl) {
|
for (int n = nfrom; n < nto; n += npl) {
|
||||||
#endif
|
#endif
|
||||||
const int i1 = bondlist[n].a;
|
const int i1 = IP_PRE_dword_index(bondlist[n].a);
|
||||||
const int i2 = bondlist[n].b;
|
const int i2 = IP_PRE_dword_index(bondlist[n].b);
|
||||||
const int type = bondlist[n].t;
|
const int type = IP_PRE_dword_index(bondlist[n].t);
|
||||||
|
|
||||||
const flt_t delx = x[i1].x - x[i2].x;
|
const flt_t delx = x[i1].x - x[i2].x;
|
||||||
const flt_t dely = x[i1].y - x[i2].y;
|
const flt_t dely = x[i1].y - x[i2].y;
|
||||||
|
|||||||
@ -195,11 +195,11 @@ void DihedralCharmmIntel::eval(const int vflag,
|
|||||||
for (int n = nfrom; n < nto; n++) {
|
for (int n = nfrom; n < nto; n++) {
|
||||||
#endif
|
#endif
|
||||||
for (int n = nfrom; n < nto; n += npl) {
|
for (int n = nfrom; n < nto; n += npl) {
|
||||||
const int i1 = dihedrallist[n].a;
|
const int i1 = IP_PRE_dword_index(dihedrallist[n].a);
|
||||||
const int i2 = dihedrallist[n].b;
|
const int i2 = IP_PRE_dword_index(dihedrallist[n].b);
|
||||||
const int i3 = dihedrallist[n].c;
|
const int i3 = IP_PRE_dword_index(dihedrallist[n].c);
|
||||||
const int i4 = dihedrallist[n].d;
|
const int i4 = IP_PRE_dword_index(dihedrallist[n].d);
|
||||||
const int type = dihedrallist[n].t;
|
const int type = IP_PRE_dword_index(dihedrallist[n].t);
|
||||||
|
|
||||||
// 1st bond
|
// 1st bond
|
||||||
|
|
||||||
|
|||||||
@ -163,11 +163,11 @@ void DihedralHarmonicIntel::eval(const int vflag,
|
|||||||
#else
|
#else
|
||||||
for (int n = nfrom; n < nto; n += npl) {
|
for (int n = nfrom; n < nto; n += npl) {
|
||||||
#endif
|
#endif
|
||||||
const int i1 = dihedrallist[n].a;
|
const int i1 = IP_PRE_dword_index(dihedrallist[n].a);
|
||||||
const int i2 = dihedrallist[n].b;
|
const int i2 = IP_PRE_dword_index(dihedrallist[n].b);
|
||||||
const int i3 = dihedrallist[n].c;
|
const int i3 = IP_PRE_dword_index(dihedrallist[n].c);
|
||||||
const int i4 = dihedrallist[n].d;
|
const int i4 = IP_PRE_dword_index(dihedrallist[n].d);
|
||||||
const int type = dihedrallist[n].t;
|
const int type = IP_PRE_dword_index(dihedrallist[n].t);
|
||||||
|
|
||||||
// 1st bond
|
// 1st bond
|
||||||
|
|
||||||
|
|||||||
@ -167,11 +167,11 @@ void DihedralOPLSIntel::eval(const int vflag,
|
|||||||
#else
|
#else
|
||||||
for (int n = nfrom; n < nto; n += npl) {
|
for (int n = nfrom; n < nto; n += npl) {
|
||||||
#endif
|
#endif
|
||||||
const int i1 = dihedrallist[n].a;
|
const int i1 = IP_PRE_dword_index(dihedrallist[n].a);
|
||||||
const int i2 = dihedrallist[n].b;
|
const int i2 = IP_PRE_dword_index(dihedrallist[n].b);
|
||||||
const int i3 = dihedrallist[n].c;
|
const int i3 = IP_PRE_dword_index(dihedrallist[n].c);
|
||||||
const int i4 = dihedrallist[n].d;
|
const int i4 = IP_PRE_dword_index(dihedrallist[n].d);
|
||||||
const int type = dihedrallist[n].t;
|
const int type = IP_PRE_dword_index(dihedrallist[n].t);
|
||||||
|
|
||||||
// 1st bond
|
// 1st bond
|
||||||
|
|
||||||
|
|||||||
@ -22,6 +22,7 @@
|
|||||||
#include "domain.h"
|
#include "domain.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "force.h"
|
#include "force.h"
|
||||||
|
#include "intel_preprocess.h"
|
||||||
#include "memory.h"
|
#include "memory.h"
|
||||||
#include "modify.h"
|
#include "modify.h"
|
||||||
#include "neighbor.h"
|
#include "neighbor.h"
|
||||||
@ -100,6 +101,7 @@ void FixNHIntel::remap()
|
|||||||
#pragma vector aligned
|
#pragma vector aligned
|
||||||
#endif
|
#endif
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
|
i = IP_PRE_dword_index(i);
|
||||||
const double d0 = x[i].x - b0;
|
const double d0 = x[i].x - b0;
|
||||||
const double d1 = x[i].y - b1;
|
const double d1 = x[i].y - b1;
|
||||||
const double d2 = x[i].z - b2;
|
const double d2 = x[i].z - b2;
|
||||||
@ -118,6 +120,7 @@ void FixNHIntel::remap()
|
|||||||
#endif
|
#endif
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
if (mask[i] & dilate_group_bit) {
|
if (mask[i] & dilate_group_bit) {
|
||||||
|
i = IP_PRE_dword_index(i);
|
||||||
const double d0 = x[i].x - b0;
|
const double d0 = x[i].x - b0;
|
||||||
const double d1 = x[i].y - b1;
|
const double d1 = x[i].y - b1;
|
||||||
const double d2 = x[i].z - b2;
|
const double d2 = x[i].z - b2;
|
||||||
@ -287,6 +290,7 @@ void FixNHIntel::remap()
|
|||||||
#pragma vector aligned
|
#pragma vector aligned
|
||||||
#endif
|
#endif
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
|
i = IP_PRE_dword_index(i);
|
||||||
x[i].x = h0*x[i].x + h5*x[i].y + h4*x[i].z + nb0;
|
x[i].x = h0*x[i].x + h5*x[i].y + h4*x[i].z + nb0;
|
||||||
x[i].y = h1*x[i].y + h3*x[i].z + nb1;
|
x[i].y = h1*x[i].y + h3*x[i].z + nb1;
|
||||||
x[i].z = h2*x[i].z + nb2;
|
x[i].z = h2*x[i].z + nb2;
|
||||||
@ -302,6 +306,7 @@ void FixNHIntel::remap()
|
|||||||
#endif
|
#endif
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
if (mask[i] & dilate_group_bit) {
|
if (mask[i] & dilate_group_bit) {
|
||||||
|
i = IP_PRE_dword_index(i);
|
||||||
x[i].x = h0*x[i].x + h5*x[i].y + h4*x[i].z + nb0;
|
x[i].x = h0*x[i].x + h5*x[i].y + h4*x[i].z + nb0;
|
||||||
x[i].y = h1*x[i].y + h3*x[i].z + nb1;
|
x[i].y = h1*x[i].y + h3*x[i].z + nb1;
|
||||||
x[i].z = h2*x[i].z + nb2;
|
x[i].z = h2*x[i].z + nb2;
|
||||||
@ -432,6 +437,7 @@ void FixNHIntel::nh_v_press()
|
|||||||
#pragma vector aligned
|
#pragma vector aligned
|
||||||
#endif
|
#endif
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
|
i = IP_PRE_dword_index(i);
|
||||||
v[i].x *= f0;
|
v[i].x *= f0;
|
||||||
v[i].y *= f1;
|
v[i].y *= f1;
|
||||||
v[i].z *= f2;
|
v[i].z *= f2;
|
||||||
@ -447,6 +453,7 @@ void FixNHIntel::nh_v_press()
|
|||||||
#endif
|
#endif
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
if (mask[i] & groupbit) {
|
if (mask[i] & groupbit) {
|
||||||
|
i = IP_PRE_dword_index(i);
|
||||||
v[i].x *= f0;
|
v[i].x *= f0;
|
||||||
v[i].y *= f1;
|
v[i].y *= f1;
|
||||||
v[i].z *= f2;
|
v[i].z *= f2;
|
||||||
|
|||||||
@ -168,11 +168,11 @@ void ImproperCvffIntel::eval(const int vflag,
|
|||||||
#else
|
#else
|
||||||
for (int n = nfrom; n < nto; n += npl) {
|
for (int n = nfrom; n < nto; n += npl) {
|
||||||
#endif
|
#endif
|
||||||
const int i1 = improperlist[n].a;
|
const int i1 = IP_PRE_dword_index(improperlist[n].a);
|
||||||
const int i2 = improperlist[n].b;
|
const int i2 = IP_PRE_dword_index(improperlist[n].b);
|
||||||
const int i3 = improperlist[n].c;
|
const int i3 = IP_PRE_dword_index(improperlist[n].c);
|
||||||
const int i4 = improperlist[n].d;
|
const int i4 = IP_PRE_dword_index(improperlist[n].d);
|
||||||
const int type = improperlist[n].t;
|
const int type = IP_PRE_dword_index(improperlist[n].t);
|
||||||
|
|
||||||
// geometry of 4-body
|
// geometry of 4-body
|
||||||
|
|
||||||
|
|||||||
@ -170,11 +170,11 @@ void ImproperHarmonicIntel::eval(const int vflag,
|
|||||||
#else
|
#else
|
||||||
for (int n = nfrom; n < nto; n += npl) {
|
for (int n = nfrom; n < nto; n += npl) {
|
||||||
#endif
|
#endif
|
||||||
const int i1 = improperlist[n].a;
|
const int i1 = IP_PRE_dword_index(improperlist[n].a);
|
||||||
const int i2 = improperlist[n].b;
|
const int i2 = IP_PRE_dword_index(improperlist[n].b);
|
||||||
const int i3 = improperlist[n].c;
|
const int i3 = IP_PRE_dword_index(improperlist[n].c);
|
||||||
const int i4 = improperlist[n].d;
|
const int i4 = IP_PRE_dword_index(improperlist[n].d);
|
||||||
const int type = improperlist[n].t;
|
const int type = IP_PRE_dword_index(improperlist[n].t);
|
||||||
|
|
||||||
// geometry of 4-body
|
// geometry of 4-body
|
||||||
|
|
||||||
|
|||||||
@ -16,10 +16,16 @@
|
|||||||
Contributing author: W. Michael Brown (Intel)
|
Contributing author: W. Michael Brown (Intel)
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#include "lmptype.h"
|
||||||
|
|
||||||
#ifdef __INTEL_LLVM_COMPILER
|
#ifdef __INTEL_LLVM_COMPILER
|
||||||
#define USE_OMP_SIMD
|
#define USE_OMP_SIMD
|
||||||
#define __INTEL_COMPILER __INTEL_LLVM_COMPILER
|
#define __INTEL_COMPILER __INTEL_LLVM_COMPILER
|
||||||
#define __INTEL_COMPILER_BUILD_DATE __INTEL_LLVM_COMPILER
|
#define __INTEL_COMPILER_BUILD_DATE __INTEL_LLVM_COMPILER
|
||||||
|
// Indicate to vectorizer that it is safe to use dword indexed gather
|
||||||
|
#define IP_PRE_dword_index(i) ((i) & NEIGHMASK)
|
||||||
|
#else
|
||||||
|
#define IP_PRE_dword_index(i) i
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __INTEL_COMPILER
|
#ifdef __INTEL_COMPILER
|
||||||
|
|||||||
@ -370,7 +370,7 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
|
|||||||
#pragma vector aligned
|
#pragma vector aligned
|
||||||
#endif
|
#endif
|
||||||
for (int u = 0; u < ncount; u++) {
|
for (int u = 0; u < ncount; u++) {
|
||||||
const int j = tj[u];
|
const int j = IP_PRE_dword_index(tj[u]);
|
||||||
tx[u] = x[j].x;
|
tx[u] = x[j].x;
|
||||||
ty[u] = x[j].y;
|
ty[u] = x[j].y;
|
||||||
tz[u] = x[j].z;
|
tz[u] = x[j].z;
|
||||||
|
|||||||
@ -359,7 +359,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
|
|||||||
#pragma vector aligned
|
#pragma vector aligned
|
||||||
#endif
|
#endif
|
||||||
for (int u = 0; u < ncount; u++) {
|
for (int u = 0; u < ncount; u++) {
|
||||||
const int j = tj[u];
|
const int j = IP_PRE_dword_index(tj[u]);
|
||||||
tx[u] = x[j].x;
|
tx[u] = x[j].x;
|
||||||
ty[u] = x[j].y;
|
ty[u] = x[j].y;
|
||||||
tz[u] = x[j].z;
|
tz[u] = x[j].z;
|
||||||
@ -387,7 +387,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
for (int jj = bstart; jj < bend; jj++) {
|
for (int jj = bstart; jj < bend; jj++) {
|
||||||
const int j = binpacked[jj];
|
const int j = IP_PRE_dword_index(binpacked[jj]);
|
||||||
itj[icount] = j;
|
itj[icount] = j;
|
||||||
itx[icount] = x[j].x;
|
itx[icount] = x[j].x;
|
||||||
ity[icount] = x[j].y;
|
ity[icount] = x[j].y;
|
||||||
|
|||||||
@ -265,7 +265,7 @@ void PairBuckCoulCutIntel::eval(const int offload, const int vflag,
|
|||||||
const flt_t delx = xtmp - x[j].x;
|
const flt_t delx = xtmp - x[j].x;
|
||||||
const flt_t dely = ytmp - x[j].y;
|
const flt_t dely = ytmp - x[j].y;
|
||||||
const flt_t delz = ztmp - x[j].z;
|
const flt_t delz = ztmp - x[j].z;
|
||||||
const int jtype = x[j].w;
|
const int jtype = IP_PRE_dword_index(x[j].w);
|
||||||
const flt_t rsq = delx * delx + dely * dely + delz * delz;
|
const flt_t rsq = delx * delx + dely * dely + delz * delz;
|
||||||
const flt_t r = sqrt(rsq);
|
const flt_t r = sqrt(rsq);
|
||||||
const flt_t r2inv = (flt_t)1.0 / rsq;
|
const flt_t r2inv = (flt_t)1.0 / rsq;
|
||||||
|
|||||||
@ -289,7 +289,7 @@ void PairBuckCoulLongIntel::eval(const int offload, const int vflag,
|
|||||||
const flt_t delx = xtmp - x[j].x;
|
const flt_t delx = xtmp - x[j].x;
|
||||||
const flt_t dely = ytmp - x[j].y;
|
const flt_t dely = ytmp - x[j].y;
|
||||||
const flt_t delz = ztmp - x[j].z;
|
const flt_t delz = ztmp - x[j].z;
|
||||||
const int jtype = x[j].w;
|
const int jtype = IP_PRE_dword_index(x[j].w);
|
||||||
const flt_t rsq = delx * delx + dely * dely + delz * delz;
|
const flt_t rsq = delx * delx + dely * dely + delz * delz;
|
||||||
|
|
||||||
if (rsq < c_forcei[jtype].cutsq) {
|
if (rsq < c_forcei[jtype].cutsq) {
|
||||||
|
|||||||
@ -253,7 +253,7 @@ void PairBuckIntel::eval(const int offload, const int vflag,
|
|||||||
const flt_t delx = xtmp - x[j].x;
|
const flt_t delx = xtmp - x[j].x;
|
||||||
const flt_t dely = ytmp - x[j].y;
|
const flt_t dely = ytmp - x[j].y;
|
||||||
const flt_t delz = ztmp - x[j].z;
|
const flt_t delz = ztmp - x[j].z;
|
||||||
const int jtype = x[j].w;
|
const int jtype = IP_PRE_dword_index(x[j].w);
|
||||||
const flt_t rsq = delx * delx + dely * dely + delz * delz;
|
const flt_t rsq = delx * delx + dely * dely + delz * delz;
|
||||||
const flt_t r = sqrt(rsq);
|
const flt_t r = sqrt(rsq);
|
||||||
const flt_t r2inv = (flt_t)1.0 / rsq;
|
const flt_t r2inv = (flt_t)1.0 / rsq;
|
||||||
|
|||||||
@ -312,13 +312,13 @@ void PairDPDIntel::eval(const int offload, const int vflag,
|
|||||||
sbindex = jlist[jj] >> SBBITS & 3;
|
sbindex = jlist[jj] >> SBBITS & 3;
|
||||||
j = jlist[jj] & NEIGHMASK;
|
j = jlist[jj] & NEIGHMASK;
|
||||||
} else
|
} else
|
||||||
j = jlist[jj];
|
j = IP_PRE_dword_index(jlist[jj]);
|
||||||
|
|
||||||
const flt_t delx = xtmp - x[j].x;
|
const flt_t delx = xtmp - x[j].x;
|
||||||
const flt_t dely = ytmp - x[j].y;
|
const flt_t dely = ytmp - x[j].y;
|
||||||
const flt_t delz = ztmp - x[j].z;
|
const flt_t delz = ztmp - x[j].z;
|
||||||
if (!ONETYPE) {
|
if (!ONETYPE) {
|
||||||
jtype = x[j].w;
|
jtype = IP_PRE_dword_index(x[j].w);
|
||||||
icut = parami[jtype].icut;
|
icut = parami[jtype].icut;
|
||||||
}
|
}
|
||||||
const flt_t rsq = delx * delx + dely * dely + delz * delz;
|
const flt_t rsq = delx * delx + dely * dely + delz * delz;
|
||||||
|
|||||||
@ -347,14 +347,15 @@ void PairEAMIntel::eval(const int offload, const int vflag,
|
|||||||
p = MIN(p,(flt_t)1.0);
|
p = MIN(p,(flt_t)1.0);
|
||||||
if (!ONETYPE)
|
if (!ONETYPE)
|
||||||
rhor_joff = rhor_ioff + jtype * jstride;
|
rhor_joff = rhor_ioff + jtype * jstride;
|
||||||
const int joff = rhor_joff + m;
|
const int joff = IP_PRE_dword_index(rhor_joff + m);
|
||||||
flt_t ra;
|
flt_t ra;
|
||||||
ra = ((rhor_spline_e[joff].a*p + rhor_spline_e[joff].b) * p +
|
ra = ((rhor_spline_e[joff].a*p + rhor_spline_e[joff].b) * p +
|
||||||
rhor_spline_e[joff].c) * p + rhor_spline_e[joff].d;
|
rhor_spline_e[joff].c) * p + rhor_spline_e[joff].d;
|
||||||
rhoi += ra;
|
rhoi += ra;
|
||||||
if (NEWTON_PAIR) {
|
if (NEWTON_PAIR) {
|
||||||
if (!ONETYPE) {
|
if (!ONETYPE) {
|
||||||
const int ioff = jtype * istride + itype * jstride + m;
|
const int ioff = IP_PRE_dword_index(jtype * istride + itype *
|
||||||
|
jstride + m);
|
||||||
ra = ((rhor_spline_e[ioff].a*p + rhor_spline_e[ioff].b)*p +
|
ra = ((rhor_spline_e[ioff].a*p + rhor_spline_e[ioff].b)*p +
|
||||||
rhor_spline_e[ioff].c) * p + rhor_spline_e[ioff].d;
|
rhor_spline_e[ioff].c) * p + rhor_spline_e[ioff].d;
|
||||||
}
|
}
|
||||||
@ -439,7 +440,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
|
|||||||
#pragma vector aligned
|
#pragma vector aligned
|
||||||
#endif
|
#endif
|
||||||
for (int ii = iifrom; ii < iito; ++ii) {
|
for (int ii = iifrom; ii < iito; ++ii) {
|
||||||
const int i = ilist[ii];
|
const int i = IP_PRE_dword_index(ilist[ii]);
|
||||||
int itype;
|
int itype;
|
||||||
if (!ONETYPE) itype = x[i].w;
|
if (!ONETYPE) itype = x[i].w;
|
||||||
flt_t p = rho[i]*frdrho + (flt_t)1.0;
|
flt_t p = rho[i]*frdrho + (flt_t)1.0;
|
||||||
@ -448,7 +449,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
|
|||||||
p -= m;
|
p -= m;
|
||||||
p = MIN(p,(flt_t)1.0);
|
p = MIN(p,(flt_t)1.0);
|
||||||
if (!ONETYPE) frho_ioff = itype * fstride;
|
if (!ONETYPE) frho_ioff = itype * fstride;
|
||||||
const int ioff = frho_ioff + m;
|
const int ioff = IP_PRE_dword_index(frho_ioff + m);
|
||||||
fp_f[i] = (frho_spline_f[ioff].a*p + frho_spline_f[ioff].b)*p +
|
fp_f[i] = (frho_spline_f[ioff].a*p + frho_spline_f[ioff].b)*p +
|
||||||
frho_spline_f[ioff].c;
|
frho_spline_f[ioff].c;
|
||||||
if (EFLAG) {
|
if (EFLAG) {
|
||||||
@ -553,13 +554,14 @@ void PairEAMIntel::eval(const int offload, const int vflag,
|
|||||||
p = MIN(p,(flt_t)1.0);
|
p = MIN(p,(flt_t)1.0);
|
||||||
if (!ONETYPE)
|
if (!ONETYPE)
|
||||||
rhor_joff = rhor_ioff + jtype * jstride;
|
rhor_joff = rhor_ioff + jtype * jstride;
|
||||||
const int joff = rhor_joff + m;
|
const int joff = IP_PRE_dword_index(rhor_joff + m);
|
||||||
const flt_t rhojp = (rhor_spline_f[joff].a*p +
|
const flt_t rhojp = (rhor_spline_f[joff].a*p +
|
||||||
rhor_spline_f[joff].b)*p +
|
rhor_spline_f[joff].b)*p +
|
||||||
rhor_spline_f[joff].c;
|
rhor_spline_f[joff].c;
|
||||||
flt_t rhoip;
|
flt_t rhoip;
|
||||||
if (!ONETYPE) {
|
if (!ONETYPE) {
|
||||||
const int ioff = jtype * istride + itype * jstride + m;
|
const int ioff = IP_PRE_dword_index(jtype * istride +
|
||||||
|
itype * jstride + m);
|
||||||
rhoip = (rhor_spline_f[ioff].a*p + rhor_spline_f[ioff].b)*p +
|
rhoip = (rhor_spline_f[ioff].a*p + rhor_spline_f[ioff].b)*p +
|
||||||
rhor_spline_f[ioff].c;
|
rhor_spline_f[ioff].c;
|
||||||
} else
|
} else
|
||||||
|
|||||||
@ -417,7 +417,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
|
|||||||
for (int jj = 0; jj < jnum; jj++) {
|
for (int jj = 0; jj < jnum; jj++) {
|
||||||
int jm = jlist[jj];
|
int jm = jlist[jj];
|
||||||
int j = jm & NEIGHMASK;
|
int j = jm & NEIGHMASK;
|
||||||
const int jtype = x[j].w;
|
const int jtype = IP_PRE_dword_index(x[j].w);
|
||||||
|
|
||||||
if (ijci[jtype].form == ELLIPSE_ELLIPSE) {
|
if (ijci[jtype].form == ELLIPSE_ELLIPSE) {
|
||||||
flt_t delx = x[j].x-xtmp;
|
flt_t delx = x[j].x-xtmp;
|
||||||
@ -473,7 +473,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
|
|||||||
const int sbindex = jlist_form[jj] >> SBBITS & 3;
|
const int sbindex = jlist_form[jj] >> SBBITS & 3;
|
||||||
const int j = jlist_form[jj] & NEIGHMASK;
|
const int j = jlist_form[jj] & NEIGHMASK;
|
||||||
flt_t factor_lj = special_lj[sbindex];
|
flt_t factor_lj = special_lj[sbindex];
|
||||||
const int jtype = jtype_form[jj];
|
const int jtype = IP_PRE_dword_index(jtype_form[jj]);
|
||||||
const flt_t sigma = ijci[jtype].sigma;
|
const flt_t sigma = ijci[jtype].sigma;
|
||||||
const flt_t epsilon = ijci[jtype].epsilon;
|
const flt_t epsilon = ijci[jtype].epsilon;
|
||||||
const flt_t shape2_0 = ic[jtype].shape2[0];
|
const flt_t shape2_0 = ic[jtype].shape2[0];
|
||||||
|
|||||||
@ -318,7 +318,7 @@ void PairLJCharmmCoulCharmmIntel::eval(const int offload, const int vflag,
|
|||||||
#ifdef INTEL_VMASK
|
#ifdef INTEL_VMASK
|
||||||
if (rsq < cut_ljsq) {
|
if (rsq < cut_ljsq) {
|
||||||
#endif
|
#endif
|
||||||
const int jtype = tjtype[jj];
|
const int jtype = IP_PRE_dword_index(tjtype[jj]);
|
||||||
flt_t r6inv = r2inv * r2inv * r2inv;
|
flt_t r6inv = r2inv * r2inv * r2inv;
|
||||||
forcelj = r6inv * (lji[jtype].x * r6inv - lji[jtype].y);
|
forcelj = r6inv * (lji[jtype].x * r6inv - lji[jtype].y);
|
||||||
if (EFLAG) evdwl = r6inv*(lji[jtype].z * r6inv - lji[jtype].w);
|
if (EFLAG) evdwl = r6inv*(lji[jtype].z * r6inv - lji[jtype].w);
|
||||||
|
|||||||
@ -324,7 +324,7 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
|
|||||||
|
|
||||||
const int j = tj[jj] & NEIGHMASK;
|
const int j = tj[jj] & NEIGHMASK;
|
||||||
const int sbindex = tj[jj] >> SBBITS & 3;
|
const int sbindex = tj[jj] >> SBBITS & 3;
|
||||||
const int jtype = tjtype[jj];
|
const int jtype = IP_PRE_dword_index(tjtype[jj]);
|
||||||
const flt_t rsq = trsq[jj];
|
const flt_t rsq = trsq[jj];
|
||||||
const flt_t r2inv = (flt_t)1.0 / rsq;
|
const flt_t r2inv = (flt_t)1.0 / rsq;
|
||||||
|
|
||||||
|
|||||||
@ -287,7 +287,7 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
|
|||||||
const flt_t delx = xtmp - x[j].x;
|
const flt_t delx = xtmp - x[j].x;
|
||||||
const flt_t dely = ytmp - x[j].y;
|
const flt_t dely = ytmp - x[j].y;
|
||||||
const flt_t delz = ztmp - x[j].z;
|
const flt_t delz = ztmp - x[j].z;
|
||||||
const int jtype = x[j].w;
|
const int jtype = IP_PRE_dword_index(x[j].w);
|
||||||
const flt_t rsq = delx * delx + dely * dely + delz * delz;
|
const flt_t rsq = delx * delx + dely * dely + delz * delz;
|
||||||
|
|
||||||
if (rsq < c_forcei[jtype].cutsq) {
|
if (rsq < c_forcei[jtype].cutsq) {
|
||||||
@ -316,8 +316,8 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
|
|||||||
forcecoul = forcelj = evdwl = ecoul = (flt_t)0.0;
|
forcecoul = forcelj = evdwl = ecoul = (flt_t)0.0;
|
||||||
|
|
||||||
const int j = tj[jj] & NEIGHMASK;
|
const int j = tj[jj] & NEIGHMASK;
|
||||||
const int sbindex = tj[jj] >> SBBITS & 3;
|
const int sbindex = IP_PRE_dword_index(tj[jj] >> SBBITS & 3);
|
||||||
const int jtype = tjtype[jj];
|
const int jtype = IP_PRE_dword_index(tjtype[jj]);
|
||||||
const flt_t rsq = trsq[jj];
|
const flt_t rsq = trsq[jj];
|
||||||
const flt_t r2inv = (flt_t)1.0 / rsq;
|
const flt_t r2inv = (flt_t)1.0 / rsq;
|
||||||
|
|
||||||
|
|||||||
@ -262,13 +262,13 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
|
|||||||
sbindex = jlist[jj] >> SBBITS & 3;
|
sbindex = jlist[jj] >> SBBITS & 3;
|
||||||
j = jlist[jj] & NEIGHMASK;
|
j = jlist[jj] & NEIGHMASK;
|
||||||
} else
|
} else
|
||||||
j = jlist[jj];
|
j = IP_PRE_dword_index(jlist[jj]);
|
||||||
|
|
||||||
const flt_t delx = xtmp - x[j].x;
|
const flt_t delx = xtmp - x[j].x;
|
||||||
const flt_t dely = ytmp - x[j].y;
|
const flt_t dely = ytmp - x[j].y;
|
||||||
const flt_t delz = ztmp - x[j].z;
|
const flt_t delz = ztmp - x[j].z;
|
||||||
if (!ONETYPE) {
|
if (!ONETYPE) {
|
||||||
jtype = x[j].w;
|
jtype = IP_PRE_dword_index(x[j].w);
|
||||||
cutsq = ljc12oi[jtype].cutsq;
|
cutsq = ljc12oi[jtype].cutsq;
|
||||||
}
|
}
|
||||||
const flt_t rsq = delx * delx + dely * dely + delz * delz;
|
const flt_t rsq = delx * delx + dely * dely + delz * delz;
|
||||||
|
|||||||
@ -332,7 +332,7 @@ void PairSWIntel::eval(const int offload, const int vflag,
|
|||||||
int jtype, ijtype;
|
int jtype, ijtype;
|
||||||
if (!ONETYPE) {
|
if (!ONETYPE) {
|
||||||
jtype = x[j].w;
|
jtype = x[j].w;
|
||||||
ijtype = itype_offset + jtype;
|
ijtype = IP_PRE_dword_index(itype_offset + jtype);
|
||||||
cutsq = p2[ijtype].cutsq;
|
cutsq = p2[ijtype].cutsq;
|
||||||
}
|
}
|
||||||
const flt_t rsq1 = delx * delx + dely * dely + delz * delz;
|
const flt_t rsq1 = delx * delx + dely * dely + delz * delz;
|
||||||
@ -378,7 +378,7 @@ void PairSWIntel::eval(const int offload, const int vflag,
|
|||||||
if (EFLAG) fjtmp = (acc_t)0.0;
|
if (EFLAG) fjtmp = (acc_t)0.0;
|
||||||
int ijtype;
|
int ijtype;
|
||||||
|
|
||||||
if (!ONETYPE) ijtype = tjtype[jj] + itype_offset;
|
if (!ONETYPE) ijtype = IP_PRE_dword_index(tjtype[jj] + itype_offset);
|
||||||
const flt_t rsq1 = trsq[jj];
|
const flt_t rsq1 = trsq[jj];
|
||||||
|
|
||||||
const flt_t rinvsq1 = (flt_t)1.0 / rsq1;
|
const flt_t rinvsq1 = (flt_t)1.0 / rsq1;
|
||||||
@ -459,8 +459,8 @@ void PairSWIntel::eval(const int offload, const int vflag,
|
|||||||
int iktype, ijktype;
|
int iktype, ijktype;
|
||||||
if (!ONETYPE) {
|
if (!ONETYPE) {
|
||||||
iktype = tjtype[kk];
|
iktype = tjtype[kk];
|
||||||
ijktype = ijkoff + iktype;
|
ijktype = IP_PRE_dword_index(ijkoff + iktype);
|
||||||
iktype += itype_offset;
|
iktype = IP_PRE_dword_index(iktype + itype_offset);
|
||||||
cut = p2[iktype].cut;
|
cut = p2[iktype].cut;
|
||||||
sigma_gamma = p2[iktype].sigma_gamma;
|
sigma_gamma = p2[iktype].sigma_gamma;
|
||||||
costheta = p3[ijktype].costheta;
|
costheta = p3[ijktype].costheta;
|
||||||
@ -520,7 +520,7 @@ void PairSWIntel::eval(const int offload, const int vflag,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // for kk
|
} // for kk
|
||||||
const int j = tj[jj];
|
const int j = IP_PRE_dword_index(tj[jj]);
|
||||||
f[j].x += fjxtmp;
|
f[j].x += fjxtmp;
|
||||||
f[j].y += fjytmp;
|
f[j].y += fjytmp;
|
||||||
f[j].z += fjztmp;
|
f[j].z += fjztmp;
|
||||||
|
|||||||
@ -403,7 +403,6 @@ void PPPMIntel::particle_map(IntelBuffers<flt_t,acc_t> *buffers)
|
|||||||
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
||||||
// current particle coord can be outside global and local box
|
// current particle coord can be outside global and local box
|
||||||
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
|
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
|
||||||
|
|
||||||
int nx = static_cast<int> ((x[i].x-lo0)*xi+fshift) - OFFSET;
|
int nx = static_cast<int> ((x[i].x-lo0)*xi+fshift) - OFFSET;
|
||||||
int ny = static_cast<int> ((x[i].y-lo1)*yi+fshift) - OFFSET;
|
int ny = static_cast<int> ((x[i].y-lo1)*yi+fshift) - OFFSET;
|
||||||
int nz = static_cast<int> ((x[i].z-lo2)*zi+fshift) - OFFSET;
|
int nz = static_cast<int> ((x[i].z-lo2)*zi+fshift) - OFFSET;
|
||||||
@ -941,6 +940,7 @@ void PPPMIntel::fieldforce_ad(IntelBuffers<flt_t,acc_t> *buffers)
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
for (int i = ifrom; i < ito; i++) {
|
for (int i = ifrom; i < ito; i++) {
|
||||||
|
i = IP_PRE_dword_index(i);
|
||||||
particle_ekx[i] *= hx_inv;
|
particle_ekx[i] *= hx_inv;
|
||||||
particle_eky[i] *= hy_inv;
|
particle_eky[i] *= hy_inv;
|
||||||
particle_ekz[i] *= hz_inv;
|
particle_ekz[i] *= hz_inv;
|
||||||
|
|||||||
Reference in New Issue
Block a user