From 4b3197202ba2fc56f946376719bce24f1452897f Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Tue, 7 Feb 2017 13:38:49 -0500 Subject: [PATCH] USER-DPD: Rework SSA to use a new neighbor list structure, ready for Kokkos NOTE: pair evaluation order changes, causing numerical differences! Atom pair processing order is fully planned out in npair_half_bin_newton_ssa Makes the SSA neighbor list structure very different. Do not use by others! Each local is in ilist, numneigh, and firstneigh four times instead of once. Changes LAMMPS core code that had been previously changed for USER-DPD/SSA: Removes ssaAIR[] from class Atom as it is now unused. Removes ndxAIR_ssa[] from class NeighList as it is now unused. Increases length of ilist[], numneigh[], and firstneigh[] if SSA flag set. --- src/USER-DPD/fix_shardlow.cpp | 39 ++-- src/USER-DPD/nbin_ssa.cpp | 20 -- src/USER-DPD/nbin_ssa.h | 3 - src/USER-DPD/npair_half_bin_newton_ssa.cpp | 204 ++++++++++++++------- src/USER-DPD/npair_half_bin_newton_ssa.h | 11 +- src/atom.cpp | 2 - src/atom.h | 1 - src/neigh_list.cpp | 22 +-- src/neigh_list.h | 1 - 9 files changed, 174 insertions(+), 129 deletions(-) diff --git a/src/USER-DPD/fix_shardlow.cpp b/src/USER-DPD/fix_shardlow.cpp index 4220760a9b..4a7fff66cf 100644 --- a/src/USER-DPD/fix_shardlow.cpp +++ b/src/USER-DPD/fix_shardlow.cpp @@ -55,6 +55,7 @@ #include "pair_dpd_fdt.h" #include "pair_dpd_fdt_energy.h" #include "pair.h" +#include "npair_half_bin_newton_ssa.h" #include "citeme.h" using namespace LAMMPS_NS; @@ -500,19 +501,30 @@ void FixShardlow::initial_integrate(int vflag) dtsqrt = sqrt(update->dt); - ii = 0; + NPairHalfBinNewtonSSA *np_ssa = dynamic_cast(list->np); + if (!np_ssa) error->one(FLERR, "NPair wasn't a NPairHalfBinNewtonSSA object"); + int ssa_phaseCt = np_ssa->ssa_phaseCt; + int *ssa_phaseLen = np_ssa->ssa_phaseLen; + int **ssa_itemLoc = np_ssa->ssa_itemLoc; + int **ssa_itemLen = np_ssa->ssa_itemLen; + // process neighbors in the local AIR - while (ii < inum) { - i = ilist[ii]; - for (int subphase = 0; subphase < 4; subphase++) { - int start = (subphase > 0) ? list->ndxAIR_ssa[i][subphase - 1] : 0; - int len = list->ndxAIR_ssa[i][subphase] - start; - if (len > 0) { - if (useDPDE) ssa_update_dpde(i, &(list->firstneigh[i][start]), len); - else ssa_update_dpd(i, &(list->firstneigh[i][start]), len); + for (int workPhase = 0; workPhase < ssa_phaseCt; ++workPhase) { + int workItemCt = ssa_phaseLen[workPhase]; + + for (int workItem = 0; workItem < workItemCt; ++workItem) { + int ct = ssa_itemLen[workPhase][workItem]; + ii = ssa_itemLoc[workPhase][workItem]; + + while (ct-- > 0) { + int len = list->numneigh[ii]; + if (len > 0) { + if (useDPDE) ssa_update_dpde(ilist[ii], list->firstneigh[ii], len); + else ssa_update_dpd(ilist[ii], list->firstneigh[ii], len); + } + ii++; } } - ii++; } ii = inum; @@ -531,10 +543,9 @@ void FixShardlow::initial_integrate(int vflag) // process neighbors in this AIR while (ct-- > 0) { - i = ilist[ii]; - int len = list->numneigh[i]; - if (useDPDE) ssa_update_dpde(i, &(list->firstneigh[i][0]), len); - else ssa_update_dpd(i, &(list->firstneigh[i][0]), len); + int len = list->numneigh[ii]; + if (useDPDE) ssa_update_dpde(ilist[ii], list->firstneigh[ii], len); + else ssa_update_dpd(ilist[ii], list->firstneigh[ii], len); ii++; } diff --git a/src/USER-DPD/nbin_ssa.cpp b/src/USER-DPD/nbin_ssa.cpp index 7e603af714..4c57a8e70f 100644 --- a/src/USER-DPD/nbin_ssa.cpp +++ b/src/USER-DPD/nbin_ssa.cpp @@ -30,9 +30,6 @@ using namespace LAMMPS_NS; NBinSSA::NBinSSA(LAMMPS *lmp) : NBinStandard(lmp) { - maxbin_ssa = 0; - binlist_ssa = NULL; - binct_ssa = NULL; for (int i = 0; i < 8; i++) { gairhead_ssa[i] = -1; } @@ -40,8 +37,6 @@ NBinSSA::NBinSSA(LAMMPS *lmp) : NBinStandard(lmp) NBinSSA::~NBinSSA() { - memory->destroy(binlist_ssa); - memory->destroy(binct_ssa); } /* ---------------------------------------------------------------------- @@ -72,8 +67,6 @@ void NBinSSA::bin_atoms() for (i = 0; i < mbins; i++) { binhead[i] = -1; - binlist_ssa[i] = -1; - binct_ssa[i] = 0; } // bin in reverse order so linked list will be in forward order @@ -108,7 +101,6 @@ void NBinSSA::bin_atoms() if (zbin >= lbinzhi) lbinzhi = zbin + 1; bins[i] = binhead[ibin]; binhead[ibin] = i; - ++(binct_ssa[ibin]); } } @@ -119,14 +111,6 @@ void NBinSSA::bin_atoms_setup(int nall) { NBinStandard::bin_atoms_setup(nall); // Setup the parent class's data too - if (mbins > maxbin_ssa) { - maxbin_ssa = mbins; - memory->destroy(binlist_ssa); - memory->destroy(binct_ssa); - memory->create(binlist_ssa,maxbin_ssa,"binlist_ssa"); - memory->create(binct_ssa,maxbin_ssa,"binct_ssa"); - } - // Clear the local bin extent bounding box. lbinxlo = mbinx - 1; // Safe to = stencil->sx + 1 lbinylo = mbiny - 1; // Safe to = stencil->sy + 1 @@ -142,10 +126,6 @@ bigint NBinSSA::memory_usage() { bigint bytes = NBinStandard::memory_usage(); // Count the parent's usage too - if (maxbin_ssa) { - bytes += memory->usage(binlist_ssa,maxbin_ssa); - bytes += memory->usage(binct_ssa,maxbin_ssa); - } return bytes; } diff --git a/src/USER-DPD/nbin_ssa.h b/src/USER-DPD/nbin_ssa.h index f26f8c77f0..2a0175081e 100644 --- a/src/USER-DPD/nbin_ssa.h +++ b/src/USER-DPD/nbin_ssa.h @@ -29,10 +29,7 @@ namespace LAMMPS_NS { class NBinSSA : public NBinStandard { public: - int *binlist_ssa; // index in neighlist of 1st local atom in each bin - int *binct_ssa; // count of local atoms in each bin int gairhead_ssa[8]; // index of 1st ghost atom in each AIR - int maxbin_ssa; // size of binlist_ssa and binct_ssa arrays // Bounds of the local atoms in the binhead array int lbinxlo; // lowest local bin x-dim coordinate diff --git a/src/USER-DPD/npair_half_bin_newton_ssa.cpp b/src/USER-DPD/npair_half_bin_newton_ssa.cpp index 77b20966b0..2c787d6398 100644 --- a/src/USER-DPD/npair_half_bin_newton_ssa.cpp +++ b/src/USER-DPD/npair_half_bin_newton_ssa.cpp @@ -34,7 +34,27 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -NPairHalfBinNewtonSSA::NPairHalfBinNewtonSSA(LAMMPS *lmp) : NPair(lmp) {} +NPairHalfBinNewtonSSA::NPairHalfBinNewtonSSA(LAMMPS *lmp) : NPair(lmp) +{ + ssa_maxPhaseCt = 0; + ssa_maxPhaseLen = 0; + ssa_phaseCt = 0; + ssa_phaseLen = NULL; + ssa_itemLoc = NULL; + ssa_itemLen = NULL; +} + +/* ---------------------------------------------------------------------- */ + +NPairHalfBinNewtonSSA::~NPairHalfBinNewtonSSA() +{ + ssa_maxPhaseCt = 0; + ssa_maxPhaseLen = 0; + ssa_phaseCt = 0; + memory->destroy(ssa_phaseLen); + memory->destroy(ssa_itemLoc); + memory->destroy(ssa_itemLen); +} /* ---------------------------------------------------------------------- binned neighbor list construction with full Newton's 3rd law @@ -81,8 +101,6 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) if (!nb_ssa) error->one(FLERR, "NBin wasn't a NBinSSA object"); int *bins = nb_ssa->bins; int *binhead = nb_ssa->binhead; - int *binlist_ssa = nb_ssa->binlist_ssa; - int *binct_ssa = nb_ssa->binct_ssa; int *gairhead_ssa = &(nb_ssa->gairhead_ssa[0]); int inum = 0; @@ -96,74 +114,81 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) int lbinzlo = nb_ssa->lbinzlo; int lbinzhi = nb_ssa->lbinzhi; + int sx1 = ns_ssa->sx + 1; + int sy1 = ns_ssa->sy + 1; + int sz1 = ns_ssa->sz + 1; + + ssa_phaseCt = sz1*sy1*sx1; + + xbin = (lbinxhi - lbinxlo + sx1 - 1) / sx1 + 1; + ybin = (lbinyhi - lbinylo + sy1 - 1) / sy1 + 1; + zbin = (lbinzhi - lbinzlo + sz1 - 1) / sz1 + 1; + + int phaseLenEstimate = xbin*ybin*zbin; + + if (ssa_phaseCt > ssa_maxPhaseCt) { + ssa_maxPhaseCt = ssa_phaseCt; + ssa_maxPhaseLen = 0; + memory->destroy(ssa_phaseLen); + memory->destroy(ssa_itemLoc); + memory->destroy(ssa_itemLen); + memory->create(ssa_phaseLen,ssa_maxPhaseCt,"NPairHalfBinNewtonSSA:ssa_phaseLen"); + } + + if (phaseLenEstimate > ssa_maxPhaseLen) { + ssa_maxPhaseLen = phaseLenEstimate; + memory->destroy(ssa_itemLoc); + memory->destroy(ssa_itemLen); + memory->create(ssa_itemLoc,ssa_maxPhaseCt,ssa_maxPhaseLen,"NPairHalfBinNewtonSSA:ssa_itemLoc"); + memory->create(ssa_itemLen,ssa_maxPhaseCt,ssa_maxPhaseLen,"NPairHalfBinNewtonSSA:ssa_itemLen"); + } + ipage->reset(); + int workPhase = 0; // loop over bins with local atoms, storing half of the neighbors - for (zbin = lbinzlo; zbin < lbinzhi; zbin++) { - for (ybin = lbinylo; ybin < lbinyhi; ybin++) { - for (xbin = lbinxlo; xbin < lbinxhi; xbin++) { - ibin = zbin*mbiny*mbinx + ybin*mbinx + xbin; - binlist_ssa[ibin] = inum; // record where ibin starts in ilist - for (i = binhead[ibin]; i >= 0; i = bins[i]) { - n = 0; - neighptr = ipage->vget(); + for (int zoff = ns_ssa->sz; zoff >= 0; --zoff) { + for (int yoff = ns_ssa->sy; yoff >= 0; --yoff) { + for (int xoff = ns_ssa->sx; xoff >= 0; --xoff) { + int workItem = 0; + for (zbin = lbinzlo + zoff; zbin < lbinzhi; zbin += sz1) { + for (ybin = lbinylo + yoff - ns_ssa->sy; ybin < lbinyhi; ybin += sy1) { + for (xbin = lbinxlo + xoff - ns_ssa->sx; xbin < lbinxhi; xbin += sz1) { + if (workItem >= phaseLenEstimate) error->one(FLERR,"phaseLenEstimate was too small"); + ssa_itemLoc[workPhase][workItem] = inum; // record where workItem starts in ilist - itype = type[i]; - xtmp = x[i][0]; - ytmp = x[i][1]; - ztmp = x[i][2]; - if (moltemplate) { - imol = molindex[i]; - iatom = molatom[i]; - tagprev = tag[i] - iatom - 1; - } - - // loop over rest of local atoms in i's bin - // just store them, since j is beyond i in linked list - - for (j = bins[i]; j >= 0; j = bins[j]) { - - jtype = type[j]; - if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; - - delx = xtmp - x[j][0]; - dely = ytmp - x[j][1]; - delz = ztmp - x[j][2]; - rsq = delx*delx + dely*dely + delz*delz; - - if (rsq <= cutneighsq[itype][jtype]) { - if (molecular) { - if (!moltemplate) - which = find_special(special[i],nspecial[i],tag[j]); - else if (imol >= 0) - which = find_special(onemols[imol]->special[iatom], - onemols[imol]->nspecial[iatom], - tag[j]-tagprev); - else which = 0; - if (which == 0) neighptr[n++] = j; - else if (domain->minimum_image_check(delx,dely,delz)) - neighptr[n++] = j; - else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); - } else neighptr[n++] = j; - } - } - - // loop over all local atoms in other bins in "half" stencil - - k = 0; for (int subphase = 0; subphase < 4; subphase++) { - for (; k < nstencil_ssa[subphase]; k++) { - for (j = binhead[ibin+stencil[k]]; j >= 0; - j = bins[j]) { + int s_ybin = ybin + ((subphase & 0x2) ? ns_ssa->sy : 0); + int s_xbin = xbin + ((subphase & 0x1) ? ns_ssa->sx : 0); + int ibin, ct; + if ((s_ybin < lbinylo) || (s_ybin >= lbinyhi)) continue; + if ((s_xbin < lbinxlo) || (s_xbin >= lbinxhi)) continue; + ibin = zbin*nb_ssa->mbiny*nb_ssa->mbinx + + s_ybin*nb_ssa->mbinx + + s_xbin; + + for (i = binhead[ibin]; i >= 0; i = bins[i]) { + n = 0; + neighptr = ipage->vget(); + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + if (moltemplate) { + imol = molindex[i]; + iatom = molatom[i]; + tagprev = tag[i] - iatom - 1; + } + // loop over rest of local atoms in i's bin if this is subphase 0 + // just store them, since j is beyond i in linked list + if (subphase == 0) for (j = bins[i]; j >= 0; j = bins[j]) { jtype = type[j]; if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; - delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; - if (rsq <= cutneighsq[itype][jtype]) { if (molecular) { if (!moltemplate) @@ -180,22 +205,59 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) } else neighptr[n++] = j; } } - } - list->ndxAIR_ssa[i][subphase] = n; // record end of this subphase - } - if (n > 0) { - ilist[inum++] = i; + // loop over all local atoms in other bins in "subphase" of stencil + k = (subphase > 0) ? nstencil_ssa[subphase - 1] : 0; + for (; k < nstencil_ssa[subphase]; k++) { + for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + if (!moltemplate) + which = find_special(special[i],nspecial[i],tag[j]); + else if (imol >= 0) + which = find_special(onemols[imol]->special[iatom], + onemols[imol]->nspecial[iatom], + tag[j]-tagprev); + else which = 0; + if (which == 0) neighptr[n++] = j; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + } + } + } + + if (n > 0) { + firstneigh[inum] = neighptr; + numneigh[inum] = n; + ilist[inum++] = i; + } + ipage->vgot(n); + if (ipage->status()) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } } - firstneigh[i] = neighptr; - numneigh[i] = n; - ipage->vgot(n); - if (ipage->status()) - error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + // record where workItem ends in ilist + ssa_itemLen[workPhase][workItem] = inum - ssa_itemLoc[workPhase][workItem]; + if (ssa_itemLen[workPhase][workItem] > 0) workItem++; } } } + + // record where workPhase ends + ssa_phaseLen[workPhase++] = workItem; } + } + } + + if (ssa_phaseCt != workPhase) error->one(FLERR,"ssa_phaseCt was wrong"); list->AIRct_ssa[0] = list->inum = inum; @@ -258,11 +320,11 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) } if (n > 0) { + firstneigh[inum + gnum] = neighptr; + numneigh[inum + gnum] = n; ilist[inum + (gnum++)] = i; ++locAIRct; } - firstneigh[i] = neighptr; - numneigh[i] = n; ipage->vgot(n); if (ipage->status()) error->one(FLERR,"Neighbor (ghost) list overflow, boost neigh_modify one"); diff --git a/src/USER-DPD/npair_half_bin_newton_ssa.h b/src/USER-DPD/npair_half_bin_newton_ssa.h index c9ccbc4bd9..ea292316ca 100644 --- a/src/USER-DPD/npair_half_bin_newton_ssa.h +++ b/src/USER-DPD/npair_half_bin_newton_ssa.h @@ -28,9 +28,18 @@ namespace LAMMPS_NS { class NPairHalfBinNewtonSSA : public NPair { public: + // SSA Work plan data structures + int ssa_phaseCt; + int *ssa_phaseLen; + int **ssa_itemLoc; + int **ssa_itemLen; + NPairHalfBinNewtonSSA(class LAMMPS *); - ~NPairHalfBinNewtonSSA() {} + ~NPairHalfBinNewtonSSA(); void build(class NeighList *); + private: + int ssa_maxPhaseCt; + int ssa_maxPhaseLen; }; } diff --git a/src/atom.cpp b/src/atom.cpp index 0920dc3a02..de98b65470 100644 --- a/src/atom.cpp +++ b/src/atom.cpp @@ -99,7 +99,6 @@ Atom::Atom(LAMMPS *lmp) : Pointers(lmp) uCond = uMech = uChem = uCG = uCGnew = NULL; duChem = NULL; dpdTheta = NULL; - ssaAIR = NULL; // USER-SMD @@ -296,7 +295,6 @@ Atom::~Atom() memory->destroy(uCG); memory->destroy(uCGnew); memory->destroy(duChem); - memory->destroy(ssaAIR); memory->destroy(nspecial); memory->destroy(special); diff --git a/src/atom.h b/src/atom.h index de7cda06ac..745377cee1 100644 --- a/src/atom.h +++ b/src/atom.h @@ -93,7 +93,6 @@ class Atom : protected Pointers { double *duChem; double *dpdTheta; int nspecies_dpd; - int *ssaAIR; // Shardlow Splitting Algorithm Active Interaction Region number // molecular info diff --git a/src/neigh_list.cpp b/src/neigh_list.cpp index e8fd4130fc..6376637832 100644 --- a/src/neigh_list.cpp +++ b/src/neigh_list.cpp @@ -78,7 +78,7 @@ NeighList::NeighList(LAMMPS *lmp) : Pointers(lmp) // USER-DPD package - ndxAIR_ssa = NULL; + for (int i = 0; i < 8; i++) AIRct_ssa[i] = 0; np = NULL; } @@ -98,10 +98,6 @@ NeighList::~NeighList() delete [] iskip; memory->destroy(ijskip); - - if (ssa) { - memory->sfree(ndxAIR_ssa); - } } /* ---------------------------------------------------------------------- @@ -202,14 +198,16 @@ void NeighList::grow(int nlocal, int nall) if (listmiddle) listmiddle->grow(nlocal,nall); // skip if data structs are already big enough - - if (ghost) { + if (ssa) { + if ((nlocal * 3) + nall <= maxatom) return; + } else if (ghost) { if (nall <= maxatom) return; } else { if (nlocal <= maxatom) return; } - maxatom = atom->nmax; + if (ssa) maxatom = (nlocal * 3) + nall; + else maxatom = atom->nmax; memory->destroy(ilist); memory->destroy(numneigh); @@ -223,12 +221,6 @@ void NeighList::grow(int nlocal, int nall) firstdouble = (double **) memory->smalloc(maxatom*sizeof(double *), "neighlist:firstdouble"); } - - if (ssa) { - if (ndxAIR_ssa) memory->sfree(ndxAIR_ssa); - ndxAIR_ssa = (uint16_t (*)[8]) memory->smalloc(sizeof(uint16_t)*8*maxatom, - "neighlist:ndxAIR_ssa"); - } } /* ---------------------------------------------------------------------- @@ -305,7 +297,5 @@ bigint NeighList::memory_usage() } } - if (ndxAIR_ssa) bytes += sizeof(uint16_t) * 8 * maxatom; - return bytes; } diff --git a/src/neigh_list.h b/src/neigh_list.h index ea88e9b28b..bef512512c 100644 --- a/src/neigh_list.h +++ b/src/neigh_list.h @@ -81,7 +81,6 @@ class NeighList : protected Pointers { // USER-DPD package and Shardlow Splitting Algorithm (SSA) support int AIRct_ssa[8]; // count of how many atoms in each AIR - uint16_t (*ndxAIR_ssa)[8]; // for each atom, last neighbor index of each AIR class NPair *np; // ptr to NPair instance I depend on // methods