USER-DPD: Split the SSA stencil and neighbor list into subphases.
NOTE: pair evaluation order changes, causing numerical differences! This enables processing neighbors in subphase groups that enforce a geometrical seperation of pairs, allowing greater parallelism once fix_shardlow (SSA) is converted to Kokkos.
This commit is contained in:
@ -74,7 +74,7 @@ void NPairHalfBinNewtonSSA::build(NeighList *list)
|
||||
|
||||
NStencilSSA *ns_ssa = dynamic_cast<NStencilSSA*>(ns);
|
||||
if (!ns_ssa) error->one(FLERR, "NStencil wasn't a NStencilSSA object");
|
||||
int nstencil_half = ns_ssa->nstencil_half;
|
||||
int *nstencil_ssa = &(ns_ssa->nstencil_ssa[0]);
|
||||
int nstencil_full = ns_ssa->nstencil;
|
||||
|
||||
NBinSSA *nb_ssa = dynamic_cast<NBinSSA*>(nb);
|
||||
@ -150,34 +150,38 @@ void NPairHalfBinNewtonSSA::build(NeighList *list)
|
||||
|
||||
// loop over all local atoms in other bins in "half" stencil
|
||||
|
||||
for (k = 0; k < nstencil_half; k++) {
|
||||
for (j = binhead[ibin+stencil[k]]; j >= 0;
|
||||
j = bins[j]) {
|
||||
k = 0;
|
||||
for (int subphase = 0; subphase < 4; subphase++) {
|
||||
for (; k < nstencil_ssa[subphase]; k++) {
|
||||
for (j = binhead[ibin+stencil[k]]; j >= 0;
|
||||
j = bins[j]) {
|
||||
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >= 0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >= 0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
list->ndxAIR_ssa[i][subphase] = n; // record end of this subphase
|
||||
}
|
||||
|
||||
if (n > 0) {
|
||||
|
||||
@ -42,31 +42,69 @@ NStencilHalfBin2dNewtonSSA::NStencilHalfBin2dNewtonSSA(LAMMPS *lmp) :
|
||||
void NStencilHalfBin2dNewtonSSA::create()
|
||||
{
|
||||
int i,j,pos = 0;
|
||||
|
||||
// Subphase 0: upper right front bins (red)
|
||||
for (j = 0; j <= sy; j++)
|
||||
for (i = -sx; i <= sx; i++)
|
||||
if (j > 0 || (j == 0 && i > 0))
|
||||
for (i = 0; i <= sx; i++)
|
||||
if (j > 0 || i > 0) // skip the centroid
|
||||
if (bin_distance(i,j,0) < cutneighmaxsq) {
|
||||
stencilxyz[pos][0] = i;
|
||||
stencilxyz[pos][1] = j;
|
||||
stencilxyz[pos][2] = 0;
|
||||
stencil[pos++] = j*mbinx + i;
|
||||
}
|
||||
nstencil_ssa[0] = pos;
|
||||
|
||||
nstencil_half = pos; // record where normal half stencil ends
|
||||
|
||||
// include additional bins for AIR ghosts only
|
||||
|
||||
for (j = -sy; j <= 0; j++)
|
||||
for (i = -sx; i <= sx; i++) {
|
||||
if (j == 0 && i > 0) continue;
|
||||
// Subphase 1: upper left front bins (light blue)
|
||||
for (j = 1; j <= sy; j++)
|
||||
for (i = -sx; i < 0; i++)
|
||||
if (bin_distance(i,j,0) < cutneighmaxsq) {
|
||||
stencilxyz[pos][0] = i;
|
||||
stencilxyz[pos][1] = j;
|
||||
stencilxyz[pos][2] = 0;
|
||||
stencil[pos++] = j*mbinx + i;
|
||||
}
|
||||
}
|
||||
nstencil_ssa[1] = pos;
|
||||
|
||||
// Subphase 2: lower left front bins (blue)
|
||||
nstencil_ssa[2] = pos;
|
||||
|
||||
// Subphase 3: lower right front bins (yellow)
|
||||
nstencil_ssa[3] = pos;
|
||||
|
||||
// Now include additional bins for AIR ghosts, and impure-to-pure locals
|
||||
// Subphase 4: upper right back bins (pink)
|
||||
nstencil_ssa[4] = pos;
|
||||
|
||||
// Subphase 5: upper left back bins (light green)
|
||||
nstencil_ssa[5] = pos;
|
||||
|
||||
// Subphase 6: lower left back bins (purple)
|
||||
for (j = -sy; j <= 0; j++)
|
||||
for (i = -sx; i < 0; i++)
|
||||
if (bin_distance(i,j,0) < cutneighmaxsq) {
|
||||
stencilxyz[pos][0] = i;
|
||||
stencilxyz[pos][1] = j;
|
||||
stencilxyz[pos][2] = 0;
|
||||
stencil[pos++] = j*mbinx + i;
|
||||
}
|
||||
nstencil_ssa[6] = pos;
|
||||
|
||||
// Subphase 7: lower right back bins (white)
|
||||
for (j = -sy; j < 0; j++)
|
||||
for (i = 0; i <= sx; i++)
|
||||
if (bin_distance(i,j,0) < cutneighmaxsq) {
|
||||
stencilxyz[pos][0] = i;
|
||||
stencilxyz[pos][1] = j;
|
||||
stencilxyz[pos][2] = 0;
|
||||
stencil[pos++] = j*mbinx + i;
|
||||
}
|
||||
nstencil_ssa[7] = pos;
|
||||
|
||||
// Also, include the centroid for the AIR ghosts.
|
||||
stencilxyz[pos][0] = 0;
|
||||
stencilxyz[pos][1] = 0;
|
||||
stencilxyz[pos][2] = 0;
|
||||
stencil[pos++] = 0;
|
||||
|
||||
nstencil = pos; // record where full stencil ends
|
||||
}
|
||||
|
||||
@ -42,45 +42,109 @@ NStencilHalfBin3dNewtonSSA::NStencilHalfBin3dNewtonSSA(LAMMPS *lmp) :
|
||||
void NStencilHalfBin3dNewtonSSA::create()
|
||||
{
|
||||
int i,j,k,pos = 0;
|
||||
|
||||
// Subphase 0: upper right front bins (red)
|
||||
for (k = 0; k <= sz; k++)
|
||||
for (j = -sy; j <= sy; j++)
|
||||
for (i = -sx; i <= sx; i++)
|
||||
if (k > 0 || j > 0 || (j == 0 && i > 0))
|
||||
for (j = 0; j <= sy; j++)
|
||||
for (i = 0; i <= sx; i++)
|
||||
if (k > 0 || j > 0 || i > 0) // skip the centroid
|
||||
if (bin_distance(i,j,k) < cutneighmaxsq) {
|
||||
stencilxyz[pos][0] = i;
|
||||
stencilxyz[pos][1] = j;
|
||||
stencilxyz[pos][2] = k;
|
||||
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
|
||||
}
|
||||
nstencil_ssa[0] = pos;
|
||||
|
||||
nstencil_half = pos; // record where normal half stencil ends
|
||||
|
||||
// include additional bins for AIR ghosts only
|
||||
|
||||
for (k = -sz; k < 0; k++)
|
||||
for (j = -sy; j <= sy; j++)
|
||||
for (i = -sx; i <= sx; i++)
|
||||
// Subphase 1: upper left front bins (light blue)
|
||||
for (k = 0; k <= sz; k++)
|
||||
for (j = 1; j <= sy; j++)
|
||||
for (i = -sx; i < 0; i++)
|
||||
if (bin_distance(i,j,k) < cutneighmaxsq) {
|
||||
stencilxyz[pos][0] = i;
|
||||
stencilxyz[pos][1] = j;
|
||||
stencilxyz[pos][2] = k;
|
||||
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
|
||||
}
|
||||
nstencil_ssa[1] = pos;
|
||||
|
||||
// For k==0, make sure to skip already included bins
|
||||
// Subphase 2: lower left front bins (blue)
|
||||
for (k = 1; k <= sz; k++)
|
||||
for (j = -sy; j <= 0; j++)
|
||||
for (i = -sx; i < 0; i++)
|
||||
if (bin_distance(i,j,k) < cutneighmaxsq) {
|
||||
stencilxyz[pos][0] = i;
|
||||
stencilxyz[pos][1] = j;
|
||||
stencilxyz[pos][2] = k;
|
||||
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
|
||||
}
|
||||
nstencil_ssa[2] = pos;
|
||||
|
||||
k = 0;
|
||||
for (j = -sy; j <= 0; j++)
|
||||
for (i = -sx; i <= sx; i++) {
|
||||
if (j == 0 && i > 0) continue;
|
||||
if (bin_distance(i,j,k) < cutneighmaxsq) {
|
||||
stencilxyz[pos][0] = i;
|
||||
stencilxyz[pos][1] = j;
|
||||
stencilxyz[pos][2] = k;
|
||||
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
|
||||
}
|
||||
}
|
||||
// Subphase 3: lower right front bins (yellow)
|
||||
for (k = 1; k <= sz; k++)
|
||||
for (j = -sy; j < 0; j++)
|
||||
for (i = 0; i <= sx; i++)
|
||||
if (bin_distance(i,j,k) < cutneighmaxsq) {
|
||||
stencilxyz[pos][0] = i;
|
||||
stencilxyz[pos][1] = j;
|
||||
stencilxyz[pos][2] = k;
|
||||
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
|
||||
}
|
||||
nstencil_ssa[3] = pos;
|
||||
|
||||
// Now include additional bins for AIR ghosts, and impure-to-pure locals
|
||||
// Subphase 4: upper right back bins (pink)
|
||||
for (k = -sz; k < 0; k++)
|
||||
for (j = 0; j <= sy; j++)
|
||||
for (i = 0; i <= sx; i++)
|
||||
if (bin_distance(i,j,k) < cutneighmaxsq) {
|
||||
stencilxyz[pos][0] = i;
|
||||
stencilxyz[pos][1] = j;
|
||||
stencilxyz[pos][2] = k;
|
||||
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
|
||||
}
|
||||
nstencil_ssa[4] = pos;
|
||||
|
||||
// Subphase 5: upper left back bins (light green)
|
||||
for (k = -sz; k < 0; k++)
|
||||
for (j = 1; j <= sy; j++)
|
||||
for (i = -sx; i < 0; i++)
|
||||
if (bin_distance(i,j,k) < cutneighmaxsq) {
|
||||
stencilxyz[pos][0] = i;
|
||||
stencilxyz[pos][1] = j;
|
||||
stencilxyz[pos][2] = k;
|
||||
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
|
||||
}
|
||||
nstencil_ssa[5] = pos;
|
||||
|
||||
// Subphase 6: lower left back bins (purple)
|
||||
for (k = -sz; k <= 0; k++)
|
||||
for (j = -sy; j <= 0; j++)
|
||||
for (i = -sx; i < 0; i++)
|
||||
if (bin_distance(i,j,k) < cutneighmaxsq) {
|
||||
stencilxyz[pos][0] = i;
|
||||
stencilxyz[pos][1] = j;
|
||||
stencilxyz[pos][2] = k;
|
||||
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
|
||||
}
|
||||
nstencil_ssa[6] = pos;
|
||||
|
||||
// Subphase 7: lower right back bins (white)
|
||||
for (k = -sz; k <= 0; k++)
|
||||
for (j = -sy; j < 0; j++)
|
||||
for (i = 0; i <= sx; i++)
|
||||
if (bin_distance(i,j,k) < cutneighmaxsq) {
|
||||
stencilxyz[pos][0] = i;
|
||||
stencilxyz[pos][1] = j;
|
||||
stencilxyz[pos][2] = k;
|
||||
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
|
||||
}
|
||||
nstencil_ssa[7] = pos;
|
||||
|
||||
// Also, include the centroid for the AIR ghosts.
|
||||
stencilxyz[pos][0] = 0;
|
||||
stencilxyz[pos][1] = 0;
|
||||
stencilxyz[pos][2] = 0;
|
||||
stencil[pos++] = 0;
|
||||
|
||||
nstencil = pos; // record where full stencil ends
|
||||
}
|
||||
|
||||
@ -24,7 +24,7 @@ class NStencilSSA : public NStencil {
|
||||
~NStencilSSA() {}
|
||||
virtual void create() = 0;
|
||||
|
||||
int nstencil_half; // where the half stencil ends
|
||||
int nstencil_ssa[8]; // last stencil index for each subphase
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user