USER-DPD: Split the SSA stencil and neighbor list into subphases.

NOTE: pair evaluation order changes, causing numerical differences!
This enables processing neighbors in subphase groups that enforce
a geometrical seperation of pairs, allowing greater parallelism
once fix_shardlow (SSA) is converted to Kokkos.
This commit is contained in:
Tim Mattox
2017-01-30 14:32:18 -05:00
parent ce2da5068b
commit ee83b755ea
4 changed files with 165 additions and 59 deletions

View File

@ -74,7 +74,7 @@ void NPairHalfBinNewtonSSA::build(NeighList *list)
NStencilSSA *ns_ssa = dynamic_cast<NStencilSSA*>(ns);
if (!ns_ssa) error->one(FLERR, "NStencil wasn't a NStencilSSA object");
int nstencil_half = ns_ssa->nstencil_half;
int *nstencil_ssa = &(ns_ssa->nstencil_ssa[0]);
int nstencil_full = ns_ssa->nstencil;
NBinSSA *nb_ssa = dynamic_cast<NBinSSA*>(nb);
@ -150,34 +150,38 @@ void NPairHalfBinNewtonSSA::build(NeighList *list)
// loop over all local atoms in other bins in "half" stencil
for (k = 0; k < nstencil_half; k++) {
for (j = binhead[ibin+stencil[k]]; j >= 0;
j = bins[j]) {
k = 0;
for (int subphase = 0; subphase < 4; subphase++) {
for (; k < nstencil_ssa[subphase]; k++) {
for (j = binhead[ibin+stencil[k]]; j >= 0;
j = bins[j]) {
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >= 0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >= 0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
}
list->ndxAIR_ssa[i][subphase] = n; // record end of this subphase
}
if (n > 0) {

View File

@ -42,31 +42,69 @@ NStencilHalfBin2dNewtonSSA::NStencilHalfBin2dNewtonSSA(LAMMPS *lmp) :
void NStencilHalfBin2dNewtonSSA::create()
{
int i,j,pos = 0;
// Subphase 0: upper right front bins (red)
for (j = 0; j <= sy; j++)
for (i = -sx; i <= sx; i++)
if (j > 0 || (j == 0 && i > 0))
for (i = 0; i <= sx; i++)
if (j > 0 || i > 0) // skip the centroid
if (bin_distance(i,j,0) < cutneighmaxsq) {
stencilxyz[pos][0] = i;
stencilxyz[pos][1] = j;
stencilxyz[pos][2] = 0;
stencil[pos++] = j*mbinx + i;
}
nstencil_ssa[0] = pos;
nstencil_half = pos; // record where normal half stencil ends
// include additional bins for AIR ghosts only
for (j = -sy; j <= 0; j++)
for (i = -sx; i <= sx; i++) {
if (j == 0 && i > 0) continue;
// Subphase 1: upper left front bins (light blue)
for (j = 1; j <= sy; j++)
for (i = -sx; i < 0; i++)
if (bin_distance(i,j,0) < cutneighmaxsq) {
stencilxyz[pos][0] = i;
stencilxyz[pos][1] = j;
stencilxyz[pos][2] = 0;
stencil[pos++] = j*mbinx + i;
}
}
nstencil_ssa[1] = pos;
// Subphase 2: lower left front bins (blue)
nstencil_ssa[2] = pos;
// Subphase 3: lower right front bins (yellow)
nstencil_ssa[3] = pos;
// Now include additional bins for AIR ghosts, and impure-to-pure locals
// Subphase 4: upper right back bins (pink)
nstencil_ssa[4] = pos;
// Subphase 5: upper left back bins (light green)
nstencil_ssa[5] = pos;
// Subphase 6: lower left back bins (purple)
for (j = -sy; j <= 0; j++)
for (i = -sx; i < 0; i++)
if (bin_distance(i,j,0) < cutneighmaxsq) {
stencilxyz[pos][0] = i;
stencilxyz[pos][1] = j;
stencilxyz[pos][2] = 0;
stencil[pos++] = j*mbinx + i;
}
nstencil_ssa[6] = pos;
// Subphase 7: lower right back bins (white)
for (j = -sy; j < 0; j++)
for (i = 0; i <= sx; i++)
if (bin_distance(i,j,0) < cutneighmaxsq) {
stencilxyz[pos][0] = i;
stencilxyz[pos][1] = j;
stencilxyz[pos][2] = 0;
stencil[pos++] = j*mbinx + i;
}
nstencil_ssa[7] = pos;
// Also, include the centroid for the AIR ghosts.
stencilxyz[pos][0] = 0;
stencilxyz[pos][1] = 0;
stencilxyz[pos][2] = 0;
stencil[pos++] = 0;
nstencil = pos; // record where full stencil ends
}

View File

@ -42,45 +42,109 @@ NStencilHalfBin3dNewtonSSA::NStencilHalfBin3dNewtonSSA(LAMMPS *lmp) :
void NStencilHalfBin3dNewtonSSA::create()
{
int i,j,k,pos = 0;
// Subphase 0: upper right front bins (red)
for (k = 0; k <= sz; k++)
for (j = -sy; j <= sy; j++)
for (i = -sx; i <= sx; i++)
if (k > 0 || j > 0 || (j == 0 && i > 0))
for (j = 0; j <= sy; j++)
for (i = 0; i <= sx; i++)
if (k > 0 || j > 0 || i > 0) // skip the centroid
if (bin_distance(i,j,k) < cutneighmaxsq) {
stencilxyz[pos][0] = i;
stencilxyz[pos][1] = j;
stencilxyz[pos][2] = k;
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
}
nstencil_ssa[0] = pos;
nstencil_half = pos; // record where normal half stencil ends
// include additional bins for AIR ghosts only
for (k = -sz; k < 0; k++)
for (j = -sy; j <= sy; j++)
for (i = -sx; i <= sx; i++)
// Subphase 1: upper left front bins (light blue)
for (k = 0; k <= sz; k++)
for (j = 1; j <= sy; j++)
for (i = -sx; i < 0; i++)
if (bin_distance(i,j,k) < cutneighmaxsq) {
stencilxyz[pos][0] = i;
stencilxyz[pos][1] = j;
stencilxyz[pos][2] = k;
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
}
nstencil_ssa[1] = pos;
// For k==0, make sure to skip already included bins
// Subphase 2: lower left front bins (blue)
for (k = 1; k <= sz; k++)
for (j = -sy; j <= 0; j++)
for (i = -sx; i < 0; i++)
if (bin_distance(i,j,k) < cutneighmaxsq) {
stencilxyz[pos][0] = i;
stencilxyz[pos][1] = j;
stencilxyz[pos][2] = k;
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
}
nstencil_ssa[2] = pos;
k = 0;
for (j = -sy; j <= 0; j++)
for (i = -sx; i <= sx; i++) {
if (j == 0 && i > 0) continue;
if (bin_distance(i,j,k) < cutneighmaxsq) {
stencilxyz[pos][0] = i;
stencilxyz[pos][1] = j;
stencilxyz[pos][2] = k;
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
}
}
// Subphase 3: lower right front bins (yellow)
for (k = 1; k <= sz; k++)
for (j = -sy; j < 0; j++)
for (i = 0; i <= sx; i++)
if (bin_distance(i,j,k) < cutneighmaxsq) {
stencilxyz[pos][0] = i;
stencilxyz[pos][1] = j;
stencilxyz[pos][2] = k;
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
}
nstencil_ssa[3] = pos;
// Now include additional bins for AIR ghosts, and impure-to-pure locals
// Subphase 4: upper right back bins (pink)
for (k = -sz; k < 0; k++)
for (j = 0; j <= sy; j++)
for (i = 0; i <= sx; i++)
if (bin_distance(i,j,k) < cutneighmaxsq) {
stencilxyz[pos][0] = i;
stencilxyz[pos][1] = j;
stencilxyz[pos][2] = k;
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
}
nstencil_ssa[4] = pos;
// Subphase 5: upper left back bins (light green)
for (k = -sz; k < 0; k++)
for (j = 1; j <= sy; j++)
for (i = -sx; i < 0; i++)
if (bin_distance(i,j,k) < cutneighmaxsq) {
stencilxyz[pos][0] = i;
stencilxyz[pos][1] = j;
stencilxyz[pos][2] = k;
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
}
nstencil_ssa[5] = pos;
// Subphase 6: lower left back bins (purple)
for (k = -sz; k <= 0; k++)
for (j = -sy; j <= 0; j++)
for (i = -sx; i < 0; i++)
if (bin_distance(i,j,k) < cutneighmaxsq) {
stencilxyz[pos][0] = i;
stencilxyz[pos][1] = j;
stencilxyz[pos][2] = k;
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
}
nstencil_ssa[6] = pos;
// Subphase 7: lower right back bins (white)
for (k = -sz; k <= 0; k++)
for (j = -sy; j < 0; j++)
for (i = 0; i <= sx; i++)
if (bin_distance(i,j,k) < cutneighmaxsq) {
stencilxyz[pos][0] = i;
stencilxyz[pos][1] = j;
stencilxyz[pos][2] = k;
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
}
nstencil_ssa[7] = pos;
// Also, include the centroid for the AIR ghosts.
stencilxyz[pos][0] = 0;
stencilxyz[pos][1] = 0;
stencilxyz[pos][2] = 0;
stencil[pos++] = 0;
nstencil = pos; // record where full stencil ends
}

View File

@ -24,7 +24,7 @@ class NStencilSSA : public NStencil {
~NStencilSSA() {}
virtual void create() = 0;
int nstencil_half; // where the half stencil ends
int nstencil_ssa[8]; // last stencil index for each subphase
};
}