From d7a2949d1ab0237c20b1e0cf21d9dea2f0bf8b65 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 25 Mar 2019 21:30:48 -0400 Subject: [PATCH] Revert "Rendezvous" --- src/RIGID/fix_rigid_small.cpp | 512 ++++++------ src/RIGID/fix_rigid_small.h | 26 +- src/RIGID/fix_shake.cpp | 804 ++++++------------- src/RIGID/fix_shake.h | 43 +- src/comm.cpp | 428 ---------- src/comm.h | 13 - src/create_atoms.cpp | 37 +- src/hashlittle.cpp | 349 -------- src/hashlittle.h | 5 - src/irregular.cpp | 203 +---- src/irregular.h | 2 - src/read_data.cpp | 15 - src/read_restart.cpp | 15 - src/replicate.cpp | 8 +- src/special.cpp | 1416 ++++++++++++++------------------- src/special.h | 38 +- 16 files changed, 1207 insertions(+), 2707 deletions(-) delete mode 100644 src/hashlittle.cpp delete mode 100644 src/hashlittle.h diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp index d74dd0fc3b..fb185d7702 100644 --- a/src/RIGID/fix_rigid_small.cpp +++ b/src/RIGID/fix_rigid_small.cpp @@ -28,14 +28,12 @@ #include "modify.h" #include "group.h" #include "comm.h" -#include "neighbor.h" #include "force.h" #include "input.h" #include "output.h" #include "variable.h" #include "random_mars.h" #include "math_const.h" -#include "hashlittle.h" #include "memory.h" #include "error.h" @@ -45,8 +43,6 @@ using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; -#define RVOUS 1 // 0 for irregular, 1 for all2all - #define MAXLINE 1024 #define CHUNK 1024 #define ATTRIBUTE_PERBODY 20 @@ -74,7 +70,8 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : xcmimage(NULL), displace(NULL), eflags(NULL), orient(NULL), dorient(NULL), avec_ellipsoid(NULL), avec_line(NULL), avec_tri(NULL), counts(NULL), itensor(NULL), mass_body(NULL), langextra(NULL), random(NULL), - id_dilate(NULL), onemols(NULL) + id_dilate(NULL), onemols(NULL), hash(NULL), bbox(NULL), ctr(NULL), + idclose(NULL), rsqclose(NULL) { int i; @@ -110,18 +107,18 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : // parse args for rigid body specification int *mask = atom->mask; - tagint *bodyID = NULL; + tagint *bodyid = NULL; int nlocal = atom->nlocal; if (narg < 4) error->all(FLERR,"Illegal fix rigid/small command"); if (strcmp(arg[3],"molecule") == 0) { if (atom->molecule_flag == 0) error->all(FLERR,"Fix rigid/small requires atom attribute molecule"); - bodyID = atom->molecule; + bodyid = atom->molecule; } else if (strcmp(arg[3],"custom") == 0) { if (narg < 5) error->all(FLERR,"Illegal fix rigid/small command"); - bodyID = new tagint[nlocal]; + bodyid = new tagint[nlocal]; customflag = 1; // determine whether atom-style variable or atom property is used. @@ -129,11 +126,9 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : int is_double=0; int custom_index = atom->find_custom(arg[4]+2,is_double); if (custom_index == -1) - error->all(FLERR,"Fix rigid/small custom requires " - "previously defined property/atom"); + error->all(FLERR,"Fix rigid/small custom requires previously defined property/atom"); else if (is_double) - error->all(FLERR,"Fix rigid/small custom requires " - "integer-valued property/atom"); + error->all(FLERR,"Fix rigid/small custom requires integer-valued property/atom"); int minval = INT_MAX; int *value = atom->ivector[custom_index]; @@ -144,17 +139,15 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : for (i = 0; i < nlocal; i++) if (mask[i] & groupbit) - bodyID[i] = (tagint)(value[i] - minval + 1); - else bodyID[i] = 0; + bodyid[i] = (tagint)(value[i] - minval + 1); + else bodyid[i] = 0; } else if (strstr(arg[4],"v_") == arg[4]) { int ivariable = input->variable->find(arg[4]+2); if (ivariable < 0) - error->all(FLERR,"Variable name for fix rigid/small custom " - "does not exist"); + error->all(FLERR,"Variable name for fix rigid/small custom does not exist"); if (input->variable->atomstyle(ivariable) == 0) - error->all(FLERR,"Fix rigid/small custom variable is not " - "atom-style variable"); + error->all(FLERR,"Fix rigid/small custom variable is no atom-style variable"); double *value = new double[nlocal]; input->variable->compute_atom(ivariable,0,value,1,0); int minval = INT_MAX; @@ -165,8 +158,8 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : for (i = 0; i < nlocal; i++) if (mask[i] & groupbit) - bodyID[i] = (tagint)((tagint)value[i] - minval + 1); - else bodyID[0] = 0; + bodyid[i] = (tagint)((tagint)value[i] - minval + 1); + else bodyid[0] = 0; delete[] value; } else error->all(FLERR,"Unsupported fix rigid custom property"); } else error->all(FLERR,"Illegal fix rigid/small command"); @@ -174,11 +167,10 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : if (atom->map_style == 0) error->all(FLERR,"Fix rigid/small requires an atom map, see atom_modify"); - // maxmol = largest bodyID # - + // maxmol = largest bodyid # maxmol = -1; for (i = 0; i < nlocal; i++) - if (mask[i] & groupbit) maxmol = MAX(maxmol,bodyID[i]); + if (mask[i] & groupbit) maxmol = MAX(maxmol,bodyid[i]); tagint itmp; MPI_Allreduce(&maxmol,&itmp,1,MPI_LMP_TAGINT,MPI_MAX,world); @@ -408,19 +400,8 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : // sets bodytag for owned atoms // body attributes are computed later by setup_bodies() - double time1 = MPI_Wtime(); - - create_bodies(bodyID); - if (customflag) delete [] bodyID; - - double time2 = MPI_Wtime(); - - if (comm->me == 0) { - if (screen) - fprintf(screen," create bodies CPU = %g secs\n",time2-time1); - if (logfile) - fprintf(logfile," create bodies CPU = %g secs\n",time2-time1); - } + create_bodies(bodyid); + if (customflag) delete [] bodyid; // set nlocal_body and allocate bodies I own @@ -588,22 +569,12 @@ void FixRigidSmall::init() if (rflag && (modify->fmask[i] & POST_FORCE) && !modify->fix[i]->rigid_flag) { char str[128]; - snprintf(str,128,"Fix %s alters forces after fix rigid", - modify->fix[i]->id); + snprintf(str,128,"Fix %s alters forces after fix rigid",modify->fix[i]->id); error->warning(FLERR,str); } } } - // error if maxextent > comm->cutghost - // NOTE: could just warn if an override flag set - // NOTE: this could fail for comm multi mode if user sets a wrong cutoff - // for atom types in rigid bodies - need a more careful test - - double cutghost = MAX(neighbor->cutneighmax,comm->cutghostuser); - if (maxextent > cutghost) - error->all(FLERR,"Rigid body extent > ghost cutoff - use comm_modify cutoff"); - // error if npt,nph fix comes before rigid fix for (i = 0; i < modify->nfix; i++) { @@ -1543,72 +1514,175 @@ void FixRigidSmall::set_v() set bodytag for all owned atoms ------------------------------------------------------------------------- */ -void FixRigidSmall::create_bodies(tagint *bodyID) +void FixRigidSmall::create_bodies(tagint *bodyid) { - int i,m; + int i,m,n; + double unwrap[3]; - // allocate buffer for input to rendezvous comm - // ncount = # of my atoms in bodies - + // error check on image flags of atoms in rigid bodies + + imageint *image = atom->image; int *mask = atom->mask; int nlocal = atom->nlocal; + int *periodicity = domain->periodicity; + int xbox,ybox,zbox; + + int flag = 0; + for (i = 0; i < nlocal; i++) { + if (!(mask[i] & groupbit)) continue; + xbox = (image[i] & IMGMASK) - IMGMAX; + ybox = (image[i] >> IMGBITS & IMGMASK) - IMGMAX; + zbox = (image[i] >> IMG2BITS) - IMGMAX; + if ((xbox && !periodicity[0]) || (ybox && !periodicity[1]) || + (zbox && !periodicity[2])) flag = 1; + } + + int flagall; + MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); + if (flagall) error->all(FLERR,"Fix rigid/small atom has non-zero image flag " + "in a non-periodic dimension"); + + // allocate buffer for passing messages around ring of procs + // percount = max number of values to put in buffer for each of ncount + int ncount = 0; for (i = 0; i < nlocal; i++) if (mask[i] & groupbit) ncount++; - int *proclist; - memory->create(proclist,ncount,"rigid/small:proclist"); - InRvous *inbuf = (InRvous *) - memory->smalloc(ncount*sizeof(InRvous),"rigid/small:inbuf"); + int percount = 5; + double *buf; + memory->create(buf,ncount*percount,"rigid/small:buf"); - // setup buf to pass to rendezvous comm - // one BodyMsg datum for each constituent atom - // datum = me, local index of atom, atomID, bodyID, unwrapped coords - // owning proc for each datum = random hash of bodyID + // create map hash for storing unique body IDs of my atoms + // key = body ID + // value = index into per-body data structure + // n = # of entries in hash + + hash = new std::map(); + hash->clear(); + + // setup hash + // key = body ID + // value = index into N-length data structure + // n = count of unique bodies my atoms are part of + + n = 0; + for (i = 0; i < nlocal; i++) { + if (!(mask[i] & groupbit)) continue; + if (hash->find(bodyid[i]) == hash->end()) (*hash)[bodyid[i]] = n++; + } + + // bbox = bounding box of each rigid body my atoms are part of + + memory->create(bbox,n,6,"rigid/small:bbox"); + + for (i = 0; i < n; i++) { + bbox[i][0] = bbox[i][2] = bbox[i][4] = BIG; + bbox[i][1] = bbox[i][3] = bbox[i][5] = -BIG; + } + + // pack my atoms into buffer as body ID, unwrapped coords double **x = atom->x; - tagint *tag = atom->tag; - imageint *image = atom->image; m = 0; for (i = 0; i < nlocal; i++) { if (!(mask[i] & groupbit)) continue; - proclist[m] = hashlittle(&bodyID[i],sizeof(tagint),0) % nprocs; - inbuf[m].me = me; - inbuf[m].ilocal = i; - inbuf[m].atomID = tag[i]; - inbuf[m].bodyID = bodyID[i]; - domain->unmap(x[i],image[i],inbuf[m].x); - m++; + domain->unmap(x[i],image[i],unwrap); + buf[m++] = bodyid[i]; + buf[m++] = unwrap[0]; + buf[m++] = unwrap[1]; + buf[m++] = unwrap[2]; } - // perform rendezvous operation - // each proc owns random subset of bodies - // receives all atoms in those bodies - // func = compute bbox of each body, find atom closest to geometric center + // pass buffer around ring of procs + // func = update bbox with atom coords from every proc + // when done, have full bbox for every rigid body my atoms are part of - char *buf; - int nreturn = comm->rendezvous(RVOUS,ncount,(char *) inbuf,sizeof(InRvous), - 0,proclist, - rendezvous_body,0,buf,sizeof(OutRvous), - (void *) this); - OutRvous *outbuf = (OutRvous *) buf; - - memory->destroy(proclist); - memory->sfree(inbuf); + comm->ring(m,sizeof(double),buf,1,ring_bbox,NULL,(void *)this); - // set bodytag of all owned atoms based on outbuf info for constituent atoms + // check if any bbox is size 0.0, meaning rigid body is a single particle - for (i = 0; i < nlocal; i++) - if (!(mask[i] & groupbit)) bodytag[i] = 0; + flag = 0; + for (i = 0; i < n; i++) + if (bbox[i][0] == bbox[i][1] && bbox[i][2] == bbox[i][3] && + bbox[i][4] == bbox[i][5]) flag = 1; + MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); + if (flagall) + error->all(FLERR,"One or more rigid bodies are a single particle"); - for (m = 0; m < nreturn; m++) - bodytag[outbuf[m].ilocal] = outbuf[m].atomID; + // ctr = center pt of each rigid body my atoms are part of - memory->sfree(outbuf); + memory->create(ctr,n,6,"rigid/small:bbox"); - // maxextent = max of rsqfar across all procs + for (i = 0; i < n; i++) { + ctr[i][0] = 0.5 * (bbox[i][0] + bbox[i][1]); + ctr[i][1] = 0.5 * (bbox[i][2] + bbox[i][3]); + ctr[i][2] = 0.5 * (bbox[i][4] + bbox[i][5]); + } + + // idclose = ID of atom in body closest to center pt (smaller ID if tied) + // rsqclose = distance squared from idclose to center pt + + memory->create(idclose,n,"rigid/small:idclose"); + memory->create(rsqclose,n,"rigid/small:rsqclose"); + + for (i = 0; i < n; i++) rsqclose[i] = BIG; + + // pack my atoms into buffer as body ID, atom ID, unwrapped coords + + tagint *tag = atom->tag; + + m = 0; + for (i = 0; i < nlocal; i++) { + if (!(mask[i] & groupbit)) continue; + domain->unmap(x[i],image[i],unwrap); + buf[m++] = bodyid[i]; + buf[m++] = ubuf(tag[i]).d; + buf[m++] = unwrap[0]; + buf[m++] = unwrap[1]; + buf[m++] = unwrap[2]; + } + + // pass buffer around ring of procs + // func = update idclose,rsqclose with atom IDs from every proc + // when done, have idclose for every rigid body my atoms are part of + + comm->ring(m,sizeof(double),buf,2,ring_nearest,NULL,(void *)this); + + // set bodytag of all owned atoms, based on idclose + // find max value of rsqclose across all procs + + double rsqmax = 0.0; + for (i = 0; i < nlocal; i++) { + bodytag[i] = 0; + if (!(mask[i] & groupbit)) continue; + m = hash->find(bodyid[i])->second; + bodytag[i] = idclose[m]; + rsqmax = MAX(rsqmax,rsqclose[m]); + } + + // pack my atoms into buffer as bodytag of owning atom, unwrapped coords + + m = 0; + for (i = 0; i < nlocal; i++) { + if (!(mask[i] & groupbit)) continue; + domain->unmap(x[i],image[i],unwrap); + buf[m++] = ubuf(bodytag[i]).d; + buf[m++] = unwrap[0]; + buf[m++] = unwrap[1]; + buf[m++] = unwrap[2]; + } + + // pass buffer around ring of procs + // func = update rsqfar for atoms belonging to bodies I own + // when done, have rsqfar for all atoms in bodies I own + + rsqfar = 0.0; + comm->ring(m,sizeof(double),buf,3,ring_farthest,NULL,(void *)this); + + // find maxextent of rsqfar across all procs // if defined, include molecule->maxextent MPI_Allreduce(&rsqfar,&maxextent,1,MPI_DOUBLE,MPI_MAX,world); @@ -1617,156 +1691,125 @@ void FixRigidSmall::create_bodies(tagint *bodyID) for (int i = 0; i < nmol; i++) maxextent = MAX(maxextent,onemols[i]->maxextent); } -} - -/* ---------------------------------------------------------------------- - process rigid bodies assigned to me - buf = list of N BodyMsg datums -------------------------------------------------------------------------- */ - -int FixRigidSmall::rendezvous_body(int n, char *inbuf, - int &rflag, int *&proclist, char *&outbuf, - void *ptr) -{ - int i,j,m; - double delx,dely,delz,rsq; - int *iclose; - tagint *idclose; - double *x,*xown,*rsqclose; - double **bbox,**ctr; - - FixRigidSmall *frsptr = (FixRigidSmall *) ptr; - Memory *memory = frsptr->memory; - Error *error = frsptr->error; - MPI_Comm world = frsptr->world; - - // setup hash - // use STL map instead of atom->map - // b/c know nothing about body ID values specified by user - // ncount = number of bodies assigned to me - // key = body ID - // value = index into Ncount-length data structure - - InRvous *in = (InRvous *) inbuf; - std::map hash; - tagint id; - - int ncount = 0; - for (i = 0; i < n; i++) { - id = in[i].bodyID; - if (hash.find(id) == hash.end()) hash[id] = ncount++; - } - - // bbox = bounding box of each rigid body - - memory->create(bbox,ncount,6,"rigid/small:bbox"); - - for (m = 0; m < ncount; m++) { - bbox[m][0] = bbox[m][2] = bbox[m][4] = BIG; - bbox[m][1] = bbox[m][3] = bbox[m][5] = -BIG; - } - - for (i = 0; i < n; i++) { - m = hash.find(in[i].bodyID)->second; - x = in[i].x; - bbox[m][0] = MIN(bbox[m][0],x[0]); - bbox[m][1] = MAX(bbox[m][1],x[0]); - bbox[m][2] = MIN(bbox[m][2],x[1]); - bbox[m][3] = MAX(bbox[m][3],x[1]); - bbox[m][4] = MIN(bbox[m][4],x[2]); - bbox[m][5] = MAX(bbox[m][5],x[2]); - } - - // check if any bbox is size 0.0, meaning rigid body is a single particle - - int flag = 0; - for (m = 0; m < ncount; m++) - if (bbox[m][0] == bbox[m][1] && bbox[m][2] == bbox[m][3] && - bbox[m][4] == bbox[m][5]) flag = 1; - int flagall; - MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); // sync here? - if (flagall) - error->all(FLERR,"One or more rigid bodies are a single particle"); - - // ctr = geometric center pt of each rigid body - - memory->create(ctr,ncount,3,"rigid/small:bbox"); - - for (m = 0; m < ncount; m++) { - ctr[m][0] = 0.5 * (bbox[m][0] + bbox[m][1]); - ctr[m][1] = 0.5 * (bbox[m][2] + bbox[m][3]); - ctr[m][2] = 0.5 * (bbox[m][4] + bbox[m][5]); - } - - // idclose = atomID closest to center point of each body - - memory->create(idclose,ncount,"rigid/small:idclose"); - memory->create(iclose,ncount,"rigid/small:iclose"); - memory->create(rsqclose,ncount,"rigid/small:rsqclose"); - for (m = 0; m < ncount; m++) rsqclose[m] = BIG; - - for (i = 0; i < n; i++) { - m = hash.find(in[i].bodyID)->second; - x = in[i].x; - delx = x[0] - ctr[m][0]; - dely = x[1] - ctr[m][1]; - delz = x[2] - ctr[m][2]; - rsq = delx*delx + dely*dely + delz*delz; - if (rsq <= rsqclose[m]) { - if (rsq == rsqclose[m] && in[i].atomID > idclose[m]) continue; - iclose[m] = i; - idclose[m] = in[i].atomID; - rsqclose[m] = rsq; - } - } - - // compute rsqfar for all bodies I own - // set rsqfar back in caller - - double rsqfar = 0.0; - - for (int i = 0; i < n; i++) { - m = hash.find(in[i].bodyID)->second; - xown = in[iclose[m]].x; - x = in[i].x; - delx = x[0] - xown[0]; - dely = x[1] - xown[1]; - delz = x[2] - xown[2]; - rsq = delx*delx + dely*dely + delz*delz; - rsqfar = MAX(rsqfar,rsq); - } - - frsptr->rsqfar = rsqfar; - - // pass list of OutRvous datums back to comm->rendezvous - - int nout = n; - memory->create(proclist,nout,"rigid/small:proclist"); - OutRvous *out = (OutRvous *) - memory->smalloc(nout*sizeof(OutRvous),"rigid/small:out"); - - for (int i = 0; i < nout; i++) { - proclist[i] = in[i].me; - out[i].ilocal = in[i].ilocal; - m = hash.find(in[i].bodyID)->second; - out[i].atomID = idclose[m]; - } - - outbuf = (char *) out; // clean up - // Comm::rendezvous will delete proclist and out (outbuf) + delete hash; + memory->destroy(buf); memory->destroy(bbox); memory->destroy(ctr); memory->destroy(idclose); - memory->destroy(iclose); memory->destroy(rsqclose); +} - // flag = 2: new outbuf +/* ---------------------------------------------------------------------- + process rigid body atoms from another proc + update bounding box for rigid bodies my atoms are part of +------------------------------------------------------------------------- */ - rflag = 2; - return nout; +void FixRigidSmall::ring_bbox(int n, char *cbuf, void *ptr) +{ + FixRigidSmall *frsptr = (FixRigidSmall *) ptr; + std::map *hash = frsptr->hash; + double **bbox = frsptr->bbox; + + double *buf = (double *) cbuf; + int ndatums = n/4; + + int j,imol; + double *x; + + int m = 0; + for (int i = 0; i < ndatums; i++, m += 4) { + imol = static_cast (buf[m]); + if (hash->find(imol) != hash->end()) { + j = hash->find(imol)->second; + x = &buf[m+1]; + bbox[j][0] = MIN(bbox[j][0],x[0]); + bbox[j][1] = MAX(bbox[j][1],x[0]); + bbox[j][2] = MIN(bbox[j][2],x[1]); + bbox[j][3] = MAX(bbox[j][3],x[1]); + bbox[j][4] = MIN(bbox[j][4],x[2]); + bbox[j][5] = MAX(bbox[j][5],x[2]); + } + } +} + +/* ---------------------------------------------------------------------- + process rigid body atoms from another proc + update nearest atom to body center for rigid bodies my atoms are part of +------------------------------------------------------------------------- */ + +void FixRigidSmall::ring_nearest(int n, char *cbuf, void *ptr) +{ + FixRigidSmall *frsptr = (FixRigidSmall *) ptr; + std::map *hash = frsptr->hash; + double **ctr = frsptr->ctr; + tagint *idclose = frsptr->idclose; + double *rsqclose = frsptr->rsqclose; + + double *buf = (double *) cbuf; + int ndatums = n/5; + + int j,imol; + tagint tag; + double delx,dely,delz,rsq; + double *x; + + int m = 0; + for (int i = 0; i < ndatums; i++, m += 5) { + imol = static_cast (buf[m]); + if (hash->find(imol) != hash->end()) { + j = hash->find(imol)->second; + tag = (tagint) ubuf(buf[m+1]).i; + x = &buf[m+2]; + delx = x[0] - ctr[j][0]; + dely = x[1] - ctr[j][1]; + delz = x[2] - ctr[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + if (rsq <= rsqclose[j]) { + if (rsq == rsqclose[j] && tag > idclose[j]) continue; + idclose[j] = tag; + rsqclose[j] = rsq; + } + } + } +} + +/* ---------------------------------------------------------------------- + process rigid body atoms from another proc + update rsqfar = distance from owning atom to other atom +------------------------------------------------------------------------- */ + +void FixRigidSmall::ring_farthest(int n, char *cbuf, void *ptr) +{ + FixRigidSmall *frsptr = (FixRigidSmall *) ptr; + double **x = frsptr->atom->x; + imageint *image = frsptr->atom->image; + int nlocal = frsptr->atom->nlocal; + + double *buf = (double *) cbuf; + int ndatums = n/4; + + int iowner; + tagint tag; + double delx,dely,delz,rsq; + double *xx; + double unwrap[3]; + + int m = 0; + for (int i = 0; i < ndatums; i++, m += 4) { + tag = (tagint) ubuf(buf[m]).i; + iowner = frsptr->atom->map(tag); + if (iowner < 0 || iowner >= nlocal) continue; + frsptr->domain->unmap(x[iowner],image[iowner],unwrap); + xx = &buf[m+1]; + delx = xx[0] - unwrap[0]; + dely = xx[1] - unwrap[1]; + delz = xx[2] - unwrap[2]; + rsq = delx*delx + dely*dely + delz*delz; + frsptr->rsqfar = MAX(frsptr->rsqfar,rsq); + } } /* ---------------------------------------------------------------------- @@ -2429,9 +2472,9 @@ void FixRigidSmall::readfile(int which, double **array, int *inbody) int nlocal = atom->nlocal; - std::map hash; + hash = new std::map(); for (i = 0; i < nlocal; i++) - if (bodyown[i] >= 0) hash[atom->molecule[i]] = bodyown[i]; + if (bodyown[i] >= 0) (*hash)[atom->molecule[i]] = bodyown[i]; // open file and read header @@ -2490,11 +2533,11 @@ void FixRigidSmall::readfile(int which, double **array, int *inbody) id = ATOTAGINT(values[0]); if (id <= 0 || id > maxmol) error->all(FLERR,"Invalid rigid body ID in fix rigid/small file"); - if (hash.find(id) == hash.end()) { + if (hash->find(id) == hash->end()) { buf = next + 1; continue; } - m = hash[id]; + m = (*hash)[id]; inbody[m] = 1; if (which == 0) { @@ -2533,6 +2576,7 @@ void FixRigidSmall::readfile(int which, double **array, int *inbody) delete [] buffer; delete [] values; + delete hash; } /* ---------------------------------------------------------------------- diff --git a/src/RIGID/fix_rigid_small.h b/src/RIGID/fix_rigid_small.h index f6ad1b7206..3f6826f9bb 100644 --- a/src/RIGID/fix_rigid_small.h +++ b/src/RIGID/fix_rigid_small.h @@ -23,7 +23,7 @@ FixStyle(rigid/small,FixRigidSmall) #include "fix.h" // replace this later -//#include +#include namespace LAMMPS_NS { @@ -180,21 +180,13 @@ class FixRigidSmall : public Fix { // class data used by ring communication callbacks + std::map *hash; + double **bbox; + double **ctr; + tagint *idclose; + double *rsqclose; double rsqfar; - struct InRvous { - int me,ilocal; - tagint atomID,bodyID; - double x[3]; - }; - - struct OutRvous { - int ilocal; - tagint atomID; - }; - - // local methods - void image_shift(); void set_xv(); void set_v(); @@ -207,9 +199,11 @@ class FixRigidSmall : public Fix { void grow_body(); void reset_atom2body(); - // callback function for rendezvous communication + // callback functions for ring communication - static int rendezvous_body(int, char *, int &, int *&, char *&, void *); + static void ring_bbox(int, char *, void *); + static void ring_nearest(int, char *, void *); + static void ring_farthest(int, char *, void *); // debug diff --git a/src/RIGID/fix_shake.cpp b/src/RIGID/fix_shake.cpp index dcdb190d45..e0d1bf132b 100644 --- a/src/RIGID/fix_shake.cpp +++ b/src/RIGID/fix_shake.cpp @@ -39,8 +39,6 @@ using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; -#define RVOUS 1 // 0 for irregular, 1 for all2all - #define BIG 1.0e20 #define MASSDELTA 0.1 @@ -221,19 +219,8 @@ FixShake::FixShake(LAMMPS *lmp, int narg, char **arg) : // identify all SHAKE clusters - double time1 = MPI_Wtime(); - find_clusters(); - double time2 = MPI_Wtime(); - - if (comm->me == 0) { - if (screen) - fprintf(screen," find clusters CPU = %g secs\n",time2-time1); - if (logfile) - fprintf(logfile," find clusters CPU = %g secs\n",time2-time1); - } - // initialize list of SHAKE clusters to constrain maxlist = 0; @@ -720,6 +707,13 @@ void FixShake::find_clusters() int nlocal = atom->nlocal; int angles_allow = atom->avec->angles_allow; + // setup ring of procs + + int next = me + 1; + int prev = me -1; + if (next == nprocs) next = 0; + if (prev < 0) prev = nprocs - 1; + // ----------------------------------------------------- // allocate arrays for self (1d) and bond partners (2d) // max = max # of bond partners for owned atoms = 2nd dim of partner arrays @@ -761,10 +755,6 @@ void FixShake::find_clusters() memory->create(partner_shake,nlocal,max,"shake:partner_shake"); memory->create(partner_nshake,nlocal,max,"shake:partner_nshake"); - // setup atomIDs and procowner vectors in rendezvous decomposition - - atom_owners(); - // ----------------------------------------------------- // set npartner and partner_tag from special arrays // ----------------------------------------------------- @@ -788,13 +778,86 @@ void FixShake::find_clusters() } // ----------------------------------------------------- - // set partner_mask, partner_type, partner_massflag, - // partner_bondtype for all my bonded partners - // requires rendezvous communication for off-proc partners + // set partner_mask, partner_type, partner_massflag, partner_bondtype + // for bonded partners + // requires communication for off-proc partners // ----------------------------------------------------- - partner_info(npartner,partner_tag,partner_mask,partner_type, - partner_massflag,partner_bondtype); + // fill in mask, type, massflag, bondtype if own bond partner + // info to store in buf for each off-proc bond = nper = 6 + // 2 atoms IDs in bond, space for mask, type, massflag, bondtype + // nbufmax = largest buffer needed to hold info from any proc + + int nper = 6; + + nbuf = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < npartner[i]; j++) { + partner_mask[i][j] = 0; + partner_type[i][j] = 0; + partner_massflag[i][j] = 0; + partner_bondtype[i][j] = 0; + + m = atom->map(partner_tag[i][j]); + if (m >= 0 && m < nlocal) { + partner_mask[i][j] = mask[m]; + partner_type[i][j] = type[m]; + if (nmass) { + if (rmass) massone = rmass[m]; + else massone = mass[type[m]]; + partner_massflag[i][j] = masscheck(massone); + } + n = bondtype_findset(i,tag[i],partner_tag[i][j],0); + if (n) partner_bondtype[i][j] = n; + else { + n = bondtype_findset(m,tag[i],partner_tag[i][j],0); + if (n) partner_bondtype[i][j] = n; + } + } else nbuf += nper; + } + } + + memory->create(buf,nbuf,"shake:buf"); + + // fill buffer with info + + size = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < npartner[i]; j++) { + m = atom->map(partner_tag[i][j]); + if (m < 0 || m >= nlocal) { + buf[size] = tag[i]; + buf[size+1] = partner_tag[i][j]; + buf[size+2] = 0; + buf[size+3] = 0; + buf[size+4] = 0; + n = bondtype_findset(i,tag[i],partner_tag[i][j],0); + if (n) buf[size+5] = n; + else buf[size+5] = 0; + size += nper; + } + } + } + + // cycle buffer around ring of procs back to self + + comm->ring(size,sizeof(tagint),buf,1,ring_bonds,buf,(void *)this); + + // store partner info returned to me + + m = 0; + while (m < size) { + i = atom->map(buf[m]); + for (j = 0; j < npartner[i]; j++) + if (buf[m+1] == partner_tag[i][j]) break; + partner_mask[i][j] = buf[m+2]; + partner_type[i][j] = buf[m+3]; + partner_massflag[i][j] = buf[m+4]; + partner_bondtype[i][j] = buf[m+5]; + m += nper; + } + + memory->destroy(buf); // error check for unfilled partner info // if partner_type not set, is an error @@ -805,18 +868,17 @@ void FixShake::find_clusters() // else it's an error flag = 0; - int flag2 = 0; for (i = 0; i < nlocal; i++) for (j = 0; j < npartner[i]; j++) { - if (partner_type[i][j] == 0) flag++; + if (partner_type[i][j] == 0) flag = 1; if (!(mask[i] & groupbit)) continue; if (!(partner_mask[i][j] & groupbit)) continue; - if (partner_bondtype[i][j] == 0) flag2++; + if (partner_bondtype[i][j] == 0) flag = 1; } MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); if (flag_all) error->all(FLERR,"Did not find fix shake partner info"); - + // ----------------------------------------------------- // identify SHAKEable bonds // set nshake[i] = # of SHAKE bonds attached to atom i @@ -869,11 +931,56 @@ void FixShake::find_clusters() // ----------------------------------------------------- // set partner_nshake for bonded partners - // requires rendezvous communication for off-proc partners + // requires communication for off-proc partners // ----------------------------------------------------- - nshake_info(npartner,partner_tag,partner_nshake); - + // fill in partner_nshake if own bond partner + // info to store in buf for each off-proc bond = + // 2 atoms IDs in bond, space for nshake value + // nbufmax = largest buffer needed to hold info from any proc + + nbuf = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < npartner[i]; j++) { + m = atom->map(partner_tag[i][j]); + if (m >= 0 && m < nlocal) partner_nshake[i][j] = nshake[m]; + else nbuf += 3; + } + } + + memory->create(buf,nbuf,"shake:buf"); + + // fill buffer with info + + size = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < npartner[i]; j++) { + m = atom->map(partner_tag[i][j]); + if (m < 0 || m >= nlocal) { + buf[size] = tag[i]; + buf[size+1] = partner_tag[i][j]; + size += 3; + } + } + } + + // cycle buffer around ring of procs back to self + + comm->ring(size,sizeof(tagint),buf,2,ring_nshake,buf,(void *)this); + + // store partner info returned to me + + m = 0; + while (m < size) { + i = atom->map(buf[m]); + for (j = 0; j < npartner[i]; j++) + if (buf[m+1] == partner_tag[i][j]) break; + partner_nshake[i][j] = buf[m+2]; + m += 3; + } + + memory->destroy(buf); + // ----------------------------------------------------- // error checks // no atom with nshake > 3 @@ -881,7 +988,7 @@ void FixShake::find_clusters() // ----------------------------------------------------- flag = 0; - for (i = 0; i < nlocal; i++) if (nshake[i] > 3) flag++; + for (i = 0; i < nlocal; i++) if (nshake[i] > 3) flag = 1; MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); if (flag_all) error->all(FLERR,"Shake cluster of more than 4 atoms"); @@ -889,7 +996,7 @@ void FixShake::find_clusters() for (i = 0; i < nlocal; i++) { if (nshake[i] <= 1) continue; for (j = 0; j < npartner[i]; j++) - if (partner_shake[i][j] && partner_nshake[i][j] > 1) flag++; + if (partner_shake[i][j] && partner_nshake[i][j] > 1) flag = 1; } MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); if (flag_all) error->all(FLERR,"Shake clusters are connected"); @@ -957,18 +1064,68 @@ void FixShake::find_clusters() // ----------------------------------------------------- // set shake_flag,shake_atom,shake_type for non-central atoms - // requires rendezvous communication for off-proc atoms + // requires communication for off-proc atoms // ----------------------------------------------------- - shake_info(npartner,partner_tag,partner_shake); + // fill in shake arrays for each bond partner I own + // info to store in buf for each off-proc bond = + // all values from shake_flag, shake_atom, shake_type + // nbufmax = largest buffer needed to hold info from any proc + + nbuf = 0; + for (i = 0; i < nlocal; i++) { + if (shake_flag[i] == 0) continue; + for (j = 0; j < npartner[i]; j++) { + if (partner_shake[i][j] == 0) continue; + m = atom->map(partner_tag[i][j]); + if (m >= 0 && m < nlocal) { + shake_flag[m] = shake_flag[i]; + shake_atom[m][0] = shake_atom[i][0]; + shake_atom[m][1] = shake_atom[i][1]; + shake_atom[m][2] = shake_atom[i][2]; + shake_atom[m][3] = shake_atom[i][3]; + shake_type[m][0] = shake_type[i][0]; + shake_type[m][1] = shake_type[i][1]; + shake_type[m][2] = shake_type[i][2]; + } else nbuf += 9; + } + } + + memory->create(buf,nbuf,"shake:buf"); + + // fill buffer with info + + size = 0; + for (i = 0; i < nlocal; i++) { + if (shake_flag[i] == 0) continue; + for (j = 0; j < npartner[i]; j++) { + if (partner_shake[i][j] == 0) continue; + m = atom->map(partner_tag[i][j]); + if (m < 0 || m >= nlocal) { + buf[size] = partner_tag[i][j]; + buf[size+1] = shake_flag[i]; + buf[size+2] = shake_atom[i][0]; + buf[size+3] = shake_atom[i][1]; + buf[size+4] = shake_atom[i][2]; + buf[size+5] = shake_atom[i][3]; + buf[size+6] = shake_type[i][0]; + buf[size+7] = shake_type[i][1]; + buf[size+8] = shake_type[i][2]; + size += 9; + } + } + } + + // cycle buffer around ring of procs back to self + + comm->ring(size,sizeof(tagint),buf,3,ring_shake,NULL,(void *)this); + + memory->destroy(buf); // ----------------------------------------------------- // free local memory // ----------------------------------------------------- - memory->destroy(atomIDs); - memory->destroy(procowner); - memory->destroy(npartner); memory->destroy(nshake); memory->destroy(partner_tag); @@ -1042,551 +1199,98 @@ void FixShake::find_clusters() } /* ---------------------------------------------------------------------- - setup atomIDs and procowner + when receive buffer, scan bond partner IDs for atoms I own + if I own partner: + fill in mask and type and massflag + search for bond with 1st atom and fill in bondtype ------------------------------------------------------------------------- */ -void FixShake::atom_owners() +void FixShake::ring_bonds(int ndatum, char *cbuf, void *ptr) { - tagint *tag = atom->tag; - int nlocal = atom->nlocal; - - int *proclist; - memory->create(proclist,nlocal,"shake:proclist"); - IDRvous *idbuf = (IDRvous *) - memory->smalloc((bigint) nlocal*sizeof(IDRvous),"shake:idbuf"); - - // setup input buf to rendezvous comm - // input datums = pairs of bonded atoms - // owning proc for each datum = random hash of atomID - // one datum for each owned atom: datum = owning proc, atomID - - for (int i = 0; i < nlocal; i++) { - proclist[i] = tag[i] % nprocs; - idbuf[i].me = me; - idbuf[i].atomID = tag[i]; - } - - // perform rendezvous operation - // each proc assigned every 1/Pth atom - - char *buf; - comm->rendezvous(RVOUS,nlocal,(char *) idbuf,sizeof(IDRvous), - 0,proclist, - rendezvous_ids,0,buf,0,(void *) this); - - memory->destroy(proclist); - memory->sfree(idbuf); -} - -/* ---------------------------------------------------------------------- - setup partner_mask, partner_type, partner_massflag, partner_bondtype -------------------------------------------------------------------------- */ - -void FixShake::partner_info(int *npartner, tagint **partner_tag, - int **partner_mask, int **partner_type, - int **partner_massflag, int **partner_bondtype) -{ - int i,j,m,n; - int nlocal = atom->nlocal; - - // nsend = # of my datums to send - // one datum for every off-processor partner - - int nsend = 0; - for (i = 0; i < nlocal; i++) { - for (j = 0; j < npartner[i]; j++) { - m = atom->map(partner_tag[i][j]); - if (m < 0 || m >= nlocal) nsend++; - } - } - - int *proclist; - memory->create(proclist,nsend,"special:proclist"); - PartnerInfo *inbuf = (PartnerInfo *) - memory->smalloc((bigint) nsend*sizeof(PartnerInfo),"special:inbuf"); - - // set values in 4 partner arrays for all partner atoms I own - // also setup input buf to rendezvous comm - // input datums = pair of bonded atoms where I do not own partner - // owning proc for each datum = partner_tag % nprocs - // datum: atomID = partner_tag (off-proc), partnerID = tag (on-proc) - // 4 values for my owned atom - + FixShake *fsptr = (FixShake *)ptr; + Atom *atom = fsptr->atom; double *rmass = atom->rmass; double *mass = atom->mass; - int *type = atom->type; int *mask = atom->mask; - tagint *tag = atom->tag; - - double massone; - - nsend = 0; - for (i = 0; i < nlocal; i++) { - for (j = 0; j < npartner[i]; j++) { - partner_mask[i][j] = 0; - partner_type[i][j] = 0; - partner_massflag[i][j] = 0; - partner_bondtype[i][j] = 0; - - m = atom->map(partner_tag[i][j]); - - if (m >= 0 && m < nlocal) { - partner_mask[i][j] = mask[m]; - partner_type[i][j] = type[m]; - if (nmass) { - if (rmass) massone = rmass[m]; - else massone = mass[type[m]]; - partner_massflag[i][j] = masscheck(massone); - } - n = bondtype_findset(i,tag[i],partner_tag[i][j],0); - if (n) partner_bondtype[i][j] = n; - else { - n = bondtype_findset(m,tag[i],partner_tag[i][j],0); - if (n) partner_bondtype[i][j] = n; - } - - } else { - proclist[nsend] = partner_tag[i][j] % nprocs; - inbuf[nsend].atomID = partner_tag[i][j]; - inbuf[nsend].partnerID = tag[i]; - inbuf[nsend].mask = mask[i]; - inbuf[nsend].type = type[i]; - if (nmass) { - if (rmass) massone = rmass[i]; - else massone = mass[type[i]]; - inbuf[nsend].massflag = masscheck(massone); - } else inbuf[nsend].massflag = 0; - - // my atom may own bond, in which case set partner_bondtype - // else receiver of this datum will own the bond and return the value - - n = bondtype_findset(i,tag[i],partner_tag[i][j],0); - if (n) { - partner_bondtype[i][j] = n; - inbuf[nsend].bondtype = n; - } else inbuf[nsend].bondtype = 0; - - nsend++; - } - } - } - - // perform rendezvous operation - // each proc owns random subset of atoms - // receives all data needed to populate un-owned partner 4 values - - char *buf; - int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(PartnerInfo), - 0,proclist, - rendezvous_partners_info, - 0,buf,sizeof(PartnerInfo), - (void *) this); - PartnerInfo *outbuf = (PartnerInfo *) buf; - - memory->destroy(proclist); - memory->sfree(inbuf); - - // set partner 4 values for un-onwed partners based on output info - // outbuf.atomID = my owned atom, outbuf.partnerID = partner the info is for - - for (m = 0; m < nreturn; m++) { - i = atom->map(outbuf[m].atomID); - for (j = 0; j < npartner[i]; j++) - if (partner_tag[i][j] == outbuf[m].partnerID) break; - partner_mask[i][j] = outbuf[m].mask; - partner_type[i][j] = outbuf[m].type; - partner_massflag[i][j] = outbuf[m].massflag; - - // only set partner_bondtype if my atom did not set it - // when setting up rendezvous - // if this proc set it, then sender of this datum set outbuf.bondtype = 0 - - if (partner_bondtype[i][j] == 0) - partner_bondtype[i][j] = outbuf[m].bondtype; - } - - memory->sfree(outbuf); -} - -/* ---------------------------------------------------------------------- - setup partner_nshake -------------------------------------------------------------------------- */ - -void FixShake::nshake_info(int *npartner, tagint **partner_tag, - int **partner_nshake) -{ - int i,j,m,n; + int *type = atom->type; int nlocal = atom->nlocal; - - // nsend = # of my datums to send - // one datum for every off-processor partner - - int nsend = 0; - for (i = 0; i < nlocal; i++) { - for (j = 0; j < npartner[i]; j++) { - m = atom->map(partner_tag[i][j]); - if (m < 0 || m >= nlocal) nsend++; - } - } - - int *proclist; - memory->create(proclist,nsend,"special:proclist"); - NShakeInfo *inbuf = (NShakeInfo *) - memory->smalloc((bigint) nsend*sizeof(NShakeInfo),"special:inbuf"); - - // set partner_nshake for all partner atoms I own - // also setup input buf to rendezvous comm - // input datums = pair of bonded atoms where I do not own partner - // owning proc for each datum = partner_tag % nprocs - // datum: atomID = partner_tag (off-proc), partnerID = tag (on-proc) - // nshake value for my owned atom - - tagint *tag = atom->tag; - - nsend = 0; - for (i = 0; i < nlocal; i++) { - for (j = 0; j < npartner[i]; j++) { - partner_nshake[i][j] = 0; - m = atom->map(partner_tag[i][j]); - if (m >= 0 && m < nlocal) { - partner_nshake[i][j] = nshake[m]; - } else { - proclist[nsend] = partner_tag[i][j] % nprocs; - inbuf[nsend].atomID = partner_tag[i][j]; - inbuf[nsend].partnerID = tag[i]; - inbuf[nsend].nshake = nshake[i]; - nsend++; - } - } - } - - // perform rendezvous operation - // each proc owns random subset of atoms - // receives all data needed to populate un-owned partner nshake - - char *buf; - int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(NShakeInfo), - 0,proclist, - rendezvous_nshake,0,buf,sizeof(NShakeInfo), - (void *) this); - NShakeInfo *outbuf = (NShakeInfo *) buf; - - memory->destroy(proclist); - memory->sfree(inbuf); - - // set partner nshake for un-onwed partners based on output info - // outbuf.atomID = my owned atom, outbuf.partnerID = partner the info is for - - for (m = 0; m < nreturn; m++) { - i = atom->map(outbuf[m].atomID); - for (j = 0; j < npartner[i]; j++) - if (partner_tag[i][j] == outbuf[m].partnerID) break; - partner_nshake[i][j] = outbuf[m].nshake; - } - - memory->sfree(outbuf); -} - -/* ---------------------------------------------------------------------- - setup shake_flag, shake_atom, shake_type -------------------------------------------------------------------------- */ - -void FixShake::shake_info(int *npartner, tagint **partner_tag, - int **partner_shake) -{ - int i,j,m,n; - int nlocal = atom->nlocal; - - // nsend = # of my datums to send - // one datum for every off-processor partner - - int nsend = 0; - for (i = 0; i < nlocal; i++) { - for (j = 0; j < npartner[i]; j++) { - m = atom->map(partner_tag[i][j]); - if (m < 0 || m >= nlocal) nsend++; - } - } - - int *proclist; - memory->create(proclist,nsend,"special:proclist"); - ShakeInfo *inbuf = (ShakeInfo *) - memory->smalloc((bigint) nsend*sizeof(ShakeInfo),"special:inbuf"); - - // set 3 shake arrays for all partner atoms I own - // also setup input buf to rendezvous comm - // input datums = partner atom where I do not own partner - // owning proc for each datum = partner_tag % nprocs - // datum: atomID = partner_tag (off-proc) - // values in 3 shake arrays - - nsend = 0; - for (i = 0; i < nlocal; i++) { - if (shake_flag[i] == 0) continue; - for (j = 0; j < npartner[i]; j++) { - if (partner_shake[i][j] == 0) continue; - m = atom->map(partner_tag[i][j]); - - if (m >= 0 && m < nlocal) { - shake_flag[m] = shake_flag[i]; - shake_atom[m][0] = shake_atom[i][0]; - shake_atom[m][1] = shake_atom[i][1]; - shake_atom[m][2] = shake_atom[i][2]; - shake_atom[m][3] = shake_atom[i][3]; - shake_type[m][0] = shake_type[i][0]; - shake_type[m][1] = shake_type[i][1]; - shake_type[m][2] = shake_type[i][2]; - - } else { - proclist[nsend] = partner_tag[i][j] % nprocs; - inbuf[nsend].atomID = partner_tag[i][j]; - inbuf[nsend].shake_flag = shake_flag[i]; - inbuf[nsend].shake_atom[0] = shake_atom[i][0]; - inbuf[nsend].shake_atom[1] = shake_atom[i][1]; - inbuf[nsend].shake_atom[2] = shake_atom[i][2]; - inbuf[nsend].shake_atom[3] = shake_atom[i][3]; - inbuf[nsend].shake_type[0] = shake_type[i][0]; - inbuf[nsend].shake_type[1] = shake_type[i][1]; - inbuf[nsend].shake_type[2] = shake_type[i][2]; - nsend++; - } - } - } - - // perform rendezvous operation - // each proc owns random subset of atoms - // receives all data needed to populate un-owned shake info - - char *buf; - int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(ShakeInfo), - 0,proclist, - rendezvous_shake,0,buf,sizeof(ShakeInfo), - (void *) this); - ShakeInfo *outbuf = (ShakeInfo *) buf; - - memory->destroy(proclist); - memory->sfree(inbuf); - - // set shake info for un-onwed partners based on output info - - for (m = 0; m < nreturn; m++) { - i = atom->map(outbuf[m].atomID); - shake_flag[i] = outbuf[m].shake_flag; - shake_atom[i][0] = outbuf[m].shake_atom[0]; - shake_atom[i][1] = outbuf[m].shake_atom[1]; - shake_atom[i][2] = outbuf[m].shake_atom[2]; - shake_atom[i][3] = outbuf[m].shake_atom[3]; - shake_type[i][0] = outbuf[m].shake_type[0]; - shake_type[i][1] = outbuf[m].shake_type[1]; - shake_type[i][2] = outbuf[m].shake_type[2]; - } - - memory->sfree(outbuf); -} - -/* ---------------------------------------------------------------------- - process data for atoms assigned to me in rendezvous decomposition - inbuf = list of N IDRvous datums - no outbuf -------------------------------------------------------------------------- */ - -int FixShake::rendezvous_ids(int n, char *inbuf, - int &flag, int *&proclist, char *&outbuf, - void *ptr) -{ - FixShake *fsptr = (FixShake *) ptr; - Memory *memory = fsptr->memory; - - tagint *atomIDs; - int *procowner; - - memory->create(atomIDs,n,"special:atomIDs"); - memory->create(procowner,n,"special:procowner"); - - IDRvous *in = (IDRvous *) inbuf; - - for (int i = 0; i < n; i++) { - atomIDs[i] = in[i].atomID; - procowner[i] = in[i].me; - } - - // store rendezvous data in FixShake class - - fsptr->nrvous = n; - fsptr->atomIDs = atomIDs; - fsptr->procowner = procowner; - - // flag = 0: no second comm needed in rendezvous - - flag = 0; - return 0; -} - -/* ---------------------------------------------------------------------- - process data for atoms assigned to me in rendezvous decomposition - inbuf = list of N PairRvous datums - outbuf = same list of N PairRvous datums, routed to different procs -------------------------------------------------------------------------- */ - -int FixShake::rendezvous_partners_info(int n, char *inbuf, - int &flag, int *&proclist, char *&outbuf, - void *ptr) -{ - int i,m; - - FixShake *fsptr = (FixShake *) ptr; - Atom *atom = fsptr->atom; - Memory *memory = fsptr->memory; - - // clear atom map so it can be here as a hash table - // faster than an STL map for large atom counts - - atom->map_clear(); - - // hash atom IDs stored in rendezvous decomposition - - int nrvous = fsptr->nrvous; - tagint *atomIDs = fsptr->atomIDs; - - for (i = 0; i < nrvous; i++) - atom->map_one(atomIDs[i],i); - - // proclist = owner of atomID in caller decomposition - // outbuf = info about owned atomID = 4 values - - PartnerInfo *in = (PartnerInfo *) inbuf; - int *procowner = fsptr->procowner; - memory->create(proclist,n,"shake:proclist"); - - double massone; int nmass = fsptr->nmass; - for (i = 0; i < n; i++) { - m = atom->map(in[i].atomID); - proclist[i] = procowner[m]; + tagint *buf = (tagint *) cbuf; + int m,n; + double massone; + + for (int i = 0; i < ndatum; i += 6) { + m = atom->map(buf[i+1]); + if (m >= 0 && m < nlocal) { + buf[i+2] = mask[m]; + buf[i+3] = type[m]; + if (nmass) { + if (rmass) massone = rmass[m]; + else massone = mass[type[m]]; + buf[i+4] = fsptr->masscheck(massone); + } + if (buf[i+5] == 0) { + n = fsptr->bondtype_findset(m,buf[i],buf[i+1],0); + if (n) buf[i+5] = n; + } + } } - - outbuf = inbuf; - - // re-create atom map - - atom->map_init(0); - atom->nghost = 0; - atom->map_set(); - - // flag = 1: outbuf = inbuf - - flag = 1; - return n; } /* ---------------------------------------------------------------------- - process data for atoms assigned to me in rendezvous decomposition - inbuf = list of N NShakeInfo datums - outbuf = same list of N NShakeInfo datums, routed to different procs + when receive buffer, scan bond partner IDs for atoms I own + if I own partner, fill in nshake value ------------------------------------------------------------------------- */ -int FixShake::rendezvous_nshake(int n, char *inbuf, - int &flag, int *&proclist, char *&outbuf, - void *ptr) +void FixShake::ring_nshake(int ndatum, char *cbuf, void *ptr) { - int i,j,m; - - FixShake *fsptr = (FixShake *) ptr; + FixShake *fsptr = (FixShake *)ptr; Atom *atom = fsptr->atom; - Memory *memory = fsptr->memory; + int nlocal = atom->nlocal; - // clear atom map so it can be here as a hash table - // faster than an STL map for large atom counts + int *nshake = fsptr->nshake; - atom->map_clear(); + tagint *buf = (tagint *) cbuf; + int m; - // hash atom IDs stored in rendezvous decomposition - - int nrvous = fsptr->nrvous; - tagint *atomIDs = fsptr->atomIDs; - - for (i = 0; i < nrvous; i++) - atom->map_one(atomIDs[i],i); - - // proclist = owner of atomID in caller decomposition - // outbuf = info about owned atomID - - NShakeInfo *in = (NShakeInfo *) inbuf; - int *procowner = fsptr->procowner; - memory->create(proclist,n,"shake:proclist"); - - for (i = 0; i < n; i++) { - m = atom->map(in[i].atomID); - proclist[i] = procowner[m]; + for (int i = 0; i < ndatum; i += 3) { + m = atom->map(buf[i+1]); + if (m >= 0 && m < nlocal) buf[i+2] = nshake[m]; } - - outbuf = inbuf; - - // re-create atom map - - atom->map_init(0); - atom->nghost = 0; - atom->map_set(); - - // flag = 1: outbuf = inbuf - - flag = 1; - return n; } + /* ---------------------------------------------------------------------- - process data for atoms assigned to me in rendezvous decomposition - inbuf = list of N PairRvous datums - outbuf = same list of N PairRvous datums, routed to different procs + when receive buffer, scan bond partner IDs for atoms I own + if I own partner, fill in nshake value ------------------------------------------------------------------------- */ -int FixShake::rendezvous_shake(int n, char *inbuf, - int &flag, int *&proclist, char *&outbuf, - void *ptr) +void FixShake::ring_shake(int ndatum, char *cbuf, void *ptr) { - int i,j,m; - - FixShake *fsptr = (FixShake *) ptr; + FixShake *fsptr = (FixShake *)ptr; Atom *atom = fsptr->atom; - Memory *memory = fsptr->memory; + int nlocal = atom->nlocal; - // clear atom map so it can be here as a hash table - // faster than an STL map for large atom counts + int *shake_flag = fsptr->shake_flag; + tagint **shake_atom = fsptr->shake_atom; + int **shake_type = fsptr->shake_type; - atom->map_clear(); + tagint *buf = (tagint *) cbuf; + int m; - // hash atom IDs stored in rendezvous decomposition - - int nrvous = fsptr->nrvous; - tagint *atomIDs = fsptr->atomIDs; - - for (i = 0; i < nrvous; i++) - atom->map_one(atomIDs[i],i); - - // proclist = owner of atomID in caller decomposition - // outbuf = info about owned atomID - - ShakeInfo *in = (ShakeInfo *) inbuf; - int *procowner = fsptr->procowner; - memory->create(proclist,n,"shake:proclist"); - - for (i = 0; i < n; i++) { - m = atom->map(in[i].atomID); - proclist[i] = procowner[m]; + for (int i = 0; i < ndatum; i += 9) { + m = atom->map(buf[i]); + if (m >= 0 && m < nlocal) { + shake_flag[m] = buf[i+1]; + shake_atom[m][0] = buf[i+2]; + shake_atom[m][1] = buf[i+3]; + shake_atom[m][2] = buf[i+4]; + shake_atom[m][3] = buf[i+5]; + shake_type[m][0] = buf[i+6]; + shake_type[m][1] = buf[i+7]; + shake_type[m][2] = buf[i+8]; + } } - - outbuf = inbuf; - - // re-create atom map - - atom->map_init(0); - atom->nghost = 0; - atom->map_set(); - - // flag = 1: outbuf = inbuf; - - flag = 1; - return n; } /* ---------------------------------------------------------------------- diff --git a/src/RIGID/fix_shake.h b/src/RIGID/fix_shake.h index b99a35f958..d4e7b85ec4 100644 --- a/src/RIGID/fix_shake.h +++ b/src/RIGID/fix_shake.h @@ -120,11 +120,6 @@ class FixShake : public Fix { int nmol; void find_clusters(); - void atom_owners(); - void partner_info(int *, tagint **, int **, int **, int **, int **); - void nshake_info(int *, tagint **, int **); - void shake_info(int *, tagint **, int **); - int masscheck(double); void unconstrained_update(); void unconstrained_update_respa(int); @@ -136,40 +131,12 @@ class FixShake : public Fix { int bondtype_findset(int, tagint, tagint, int); int angletype_findset(int, tagint, tagint, int); - // data used by rendezvous callback methods + // static variable for ring communication callback to access class data + // callback functions for ring communication - int nrvous; - tagint *atomIDs; - int *procowner; - - struct IDRvous { - int me; - tagint atomID; - }; - - struct PartnerInfo { - tagint atomID,partnerID; - int mask,type,massflag,bondtype; - }; - - struct NShakeInfo { - tagint atomID,partnerID; - int nshake; - }; - - struct ShakeInfo { - tagint atomID; - tagint shake_atom[4]; - int shake_flag; - int shake_type[3]; - }; - - // callback functions for rendezvous communication - - static int rendezvous_ids(int, char *, int &, int *&, char *&, void *); - static int rendezvous_partners_info(int, char *, int &, int *&, char *&, void *); - static int rendezvous_nshake(int, char *, int &, int *&, char *&, void *); - static int rendezvous_shake(int, char *, int &, int *&, char *&, void *); + static void ring_bonds(int, char *, void *); + static void ring_nshake(int, char *, void *); + static void ring_shake(int, char *, void *); }; } diff --git a/src/comm.cpp b/src/comm.cpp index 729f96581a..f355a562fc 100644 --- a/src/comm.cpp +++ b/src/comm.cpp @@ -28,7 +28,6 @@ #include "dump.h" #include "group.h" #include "procmap.h" -#include "irregular.h" #include "accelerator_kokkos.h" #include "memory.h" #include "error.h" @@ -726,433 +725,6 @@ void Comm::ring(int n, int nper, void *inbuf, int messtag, memory->destroy(bufcopy); } -/* ---------------------------------------------------------------------- - rendezvous communication operation - three stages: - first comm sends inbuf from caller decomp to rvous decomp - callback operates on data in rendevous decomp - second comm sends outbuf from rvous decomp back to caller decomp - inputs: - which = perform (0) irregular or (1) MPI_All2allv communication - n = # of datums in inbuf - inbuf = vector of input datums - insize = byte size of each input datum - inorder = 0 for inbuf in random proc order, 1 for datums ordered by proc - procs: inorder 0 = proc to send each datum to, 1 = # of datums/proc, - callback = caller function to invoke in rendezvous decomposition - takes input datums, returns output datums - outorder = same as inorder, but for datums returned by callback() - ptr = pointer to caller class, passed to callback() - outputs: - nout = # of output datums (function return) - outbuf = vector of output datums - outsize = byte size of each output datum - callback inputs: - nrvous = # of rvous decomp datums in inbuf_rvous - inbuf_rvous = vector of rvous decomp input datums - ptr = pointer to caller class - callback outputs: - nrvous_out = # of rvous decomp output datums (function return) - flag = 0 for no second comm, 1 for outbuf_rvous = inbuf_rvous, - 2 for second comm with new outbuf_rvous - procs_rvous = outorder 0 = proc to send each datum to, 1 = # of datums/proc - allocated - outbuf_rvous = vector of rvous decomp output datums - NOTE: could use MPI_INT or MPI_DOUBLE insead of MPI_CHAR - to avoid checked-for overflow in MPI_Alltoallv? -------------------------------------------------------------------------- */ - -int Comm:: -rendezvous(int which, int n, char *inbuf, int insize, - int inorder, int *procs, - int (*callback)(int, char *, int &, int *&, char *&, void *), - int outorder, char *&outbuf, int outsize, void *ptr, int statflag) -{ - if (which == 0) - return rendezvous_irregular(n,inbuf,insize,inorder,procs,callback, - outorder,outbuf,outsize,ptr,statflag); - else - return rendezvous_all2all(n,inbuf,insize,inorder,procs,callback, - outorder,outbuf,outsize,ptr,statflag); -} - -/* ---------------------------------------------------------------------- */ - -int Comm:: -rendezvous_irregular(int n, char *inbuf, int insize, int inorder, int *procs, - int (*callback)(int, char *, int &, int *&, char *&, void *), - int outorder, char *&outbuf, - int outsize, void *ptr, int statflag) -{ - // irregular comm of inbuf from caller decomp to rendezvous decomp - - Irregular *irregular = new Irregular(lmp); - - int nrvous; - if (inorder) nrvous = irregular->create_data_grouped(n,procs); - else nrvous = irregular->create_data(n,procs); - - char *inbuf_rvous = (char *) memory->smalloc((bigint) nrvous*insize, - "rendezvous:inbuf"); - irregular->exchange_data(inbuf,insize,inbuf_rvous); - - bigint irregular1_bytes = irregular->memory_usage(); - irregular->destroy_data(); - delete irregular; - - // peform rendezvous computation via callback() - // callback() allocates/populates proclist_rvous and outbuf_rvous - - int flag; - int *procs_rvous; - char *outbuf_rvous; - int nrvous_out = callback(nrvous,inbuf_rvous,flag, - procs_rvous,outbuf_rvous,ptr); - - if (flag != 1) memory->sfree(inbuf_rvous); // outbuf_rvous = inbuf_vous - if (flag == 0) { - if (statflag) rendezvous_stats(n,0,nrvous,nrvous_out,insize,outsize, - (bigint) nrvous_out*sizeof(int) + - irregular1_bytes); - return 0; // all nout_rvous are 0, no 2nd comm stage - } - - // irregular comm of outbuf from rendezvous decomp back to caller decomp - // caller will free outbuf - - irregular = new Irregular(lmp); - - int nout; - if (outorder) - nout = irregular->create_data_grouped(nrvous_out,procs_rvous); - else nout = irregular->create_data(nrvous_out,procs_rvous); - - outbuf = (char *) memory->smalloc((bigint) nout*outsize, - "rendezvous:outbuf"); - irregular->exchange_data(outbuf_rvous,outsize,outbuf); - - bigint irregular2_bytes = irregular->memory_usage(); - irregular->destroy_data(); - delete irregular; - - memory->destroy(procs_rvous); - memory->sfree(outbuf_rvous); - - // return number of output datums - // last arg to stats() = memory for procs_rvous + irregular comm - - if (statflag) rendezvous_stats(n,nout,nrvous,nrvous_out,insize,outsize, - (bigint) nrvous_out*sizeof(int) + - MAX(irregular1_bytes,irregular2_bytes)); - return nout; -} - -/* ---------------------------------------------------------------------- */ - -int Comm:: -rendezvous_all2all(int n, char *inbuf, int insize, int inorder, int *procs, - int (*callback)(int, char *, int &, int *&, char *&, void *), - int outorder, char *&outbuf, int outsize, void *ptr, - int statflag) -{ - int iproc; - bigint all2all1_bytes,all2all2_bytes; - int *sendcount,*sdispls,*recvcount,*rdispls; - int *procs_a2a; - bigint *offsets; - char *inbuf_a2a,*outbuf_a2a; - - // create procs and inbuf for All2all if necesary - - if (!inorder) { - memory->create(procs_a2a,nprocs,"rendezvous:procs"); - inbuf_a2a = (char *) memory->smalloc((bigint) n*insize, - "rendezvous:inbuf"); - memory->create(offsets,nprocs,"rendezvous:offsets"); - - for (int i = 0; i < nprocs; i++) procs_a2a[i] = 0; - for (int i = 0; i < n; i++) procs_a2a[procs[i]]++; - - offsets[0] = 0; - for (int i = 1; i < nprocs; i++) - offsets[i] = offsets[i-1] + insize*procs_a2a[i-1]; - - bigint offset = 0; - for (int i = 0; i < n; i++) { - iproc = procs[i]; - memcpy(&inbuf_a2a[offsets[iproc]],&inbuf[offset],insize); - offsets[iproc] += insize; - offset += insize; - } - - all2all1_bytes = nprocs*sizeof(int) + nprocs*sizeof(bigint) + n*insize; - - } else { - procs_a2a = procs; - inbuf_a2a = inbuf; - all2all1_bytes = 0; - } - - // create args for MPI_Alltoallv() on input data - - memory->create(sendcount,nprocs,"rendezvous:sendcount"); - memcpy(sendcount,procs_a2a,nprocs*sizeof(int)); - - memory->create(recvcount,nprocs,"rendezvous:recvcount"); - MPI_Alltoall(sendcount,1,MPI_INT,recvcount,1,MPI_INT,world); - - memory->create(sdispls,nprocs,"rendezvous:sdispls"); - memory->create(rdispls,nprocs,"rendezvous:rdispls"); - sdispls[0] = rdispls[0] = 0; - for (int i = 1; i < nprocs; i++) { - sdispls[i] = sdispls[i-1] + sendcount[i-1]; - rdispls[i] = rdispls[i-1] + recvcount[i-1]; - } - int nrvous = rdispls[nprocs-1] + recvcount[nprocs-1]; - - // test for overflow of input data due to imbalance or insize - // means that individual sdispls or rdispls values overflow - - int overflow = 0; - if ((bigint) n*insize > MAXSMALLINT) overflow = 1; - if ((bigint) nrvous*insize > MAXSMALLINT) overflow = 1; - int overflowall; - MPI_Allreduce(&overflow,&overflowall,1,MPI_INT,MPI_MAX,world); - if (overflowall) error->all(FLERR,"Overflow input size in rendezvous_a2a"); - - for (int i = 0; i < nprocs; i++) { - sendcount[i] *= insize; - sdispls[i] *= insize; - recvcount[i] *= insize; - rdispls[i] *= insize; - } - - // all2all comm of inbuf from caller decomp to rendezvous decomp - - char *inbuf_rvous = (char *) memory->smalloc((bigint) nrvous*insize, - "rendezvous:inbuf"); - - MPI_Alltoallv(inbuf_a2a,sendcount,sdispls,MPI_CHAR, - inbuf_rvous,recvcount,rdispls,MPI_CHAR,world); - - if (!inorder) { - memory->destroy(procs_a2a); - memory->sfree(inbuf_a2a); - memory->destroy(offsets); - } - - // peform rendezvous computation via callback() - // callback() allocates/populates proclist_rvous and outbuf_rvous - - int flag; - int *procs_rvous; - char *outbuf_rvous; - - int nrvous_out = callback(nrvous,inbuf_rvous,flag, - procs_rvous,outbuf_rvous,ptr); - - if (flag != 1) memory->sfree(inbuf_rvous); // outbuf_rvous = inbuf_vous - if (flag == 0) { - memory->destroy(sendcount); - memory->destroy(recvcount); - memory->destroy(sdispls); - memory->destroy(rdispls); - if (statflag) rendezvous_stats(n,0,nrvous,nrvous_out,insize,outsize, - (bigint) nrvous_out*sizeof(int) + - 4*nprocs*sizeof(int) + all2all1_bytes); - return 0; // all nout_rvous are 0, no 2nd irregular - } - - - - - - - // create procs and outbuf for All2all if necesary - - if (!outorder) { - memory->create(procs_a2a,nprocs,"rendezvous_a2a:procs"); - - outbuf_a2a = (char *) memory->smalloc((bigint) nrvous_out*outsize, - "rendezvous:outbuf"); - memory->create(offsets,nprocs,"rendezvous:offsets"); - - for (int i = 0; i < nprocs; i++) procs_a2a[i] = 0; - for (int i = 0; i < nrvous_out; i++) procs_a2a[procs_rvous[i]]++; - - offsets[0] = 0; - for (int i = 1; i < nprocs; i++) - offsets[i] = offsets[i-1] + outsize*procs_a2a[i-1]; - - bigint offset = 0; - for (int i = 0; i < nrvous_out; i++) { - iproc = procs_rvous[i]; - memcpy(&outbuf_a2a[offsets[iproc]],&outbuf_rvous[offset],outsize); - offsets[iproc] += outsize; - offset += outsize; - } - - all2all2_bytes = nprocs*sizeof(int) + nprocs*sizeof(bigint) + - nrvous_out*outsize; - - } else { - procs_a2a = procs_rvous; - outbuf_a2a = outbuf_rvous; - all2all2_bytes = 0; - } - - // comm outbuf from rendezvous decomposition back to caller - - memcpy(sendcount,procs_a2a,nprocs*sizeof(int)); - - MPI_Alltoall(sendcount,1,MPI_INT,recvcount,1,MPI_INT,world); - - sdispls[0] = rdispls[0] = 0; - for (int i = 1; i < nprocs; i++) { - sdispls[i] = sdispls[i-1] + sendcount[i-1]; - rdispls[i] = rdispls[i-1] + recvcount[i-1]; - } - int nout = rdispls[nprocs-1] + recvcount[nprocs-1]; - - // test for overflow of outbuf due to imbalance or outsize - // means that individual sdispls or rdispls values overflow - - overflow = 0; - if ((bigint) nrvous*outsize > MAXSMALLINT) overflow = 1; - if ((bigint) nout*outsize > MAXSMALLINT) overflow = 1; - MPI_Allreduce(&overflow,&overflowall,1,MPI_INT,MPI_MAX,world); - if (overflowall) error->all(FLERR,"Overflow output in rendezvous_a2a"); - - for (int i = 0; i < nprocs; i++) { - sendcount[i] *= outsize; - sdispls[i] *= outsize; - recvcount[i] *= outsize; - rdispls[i] *= outsize; - } - - // all2all comm of outbuf from rendezvous decomp back to caller decomp - // caller will free outbuf - - outbuf = (char *) memory->smalloc((bigint) nout*outsize,"rendezvous:outbuf"); - - MPI_Alltoallv(outbuf_a2a,sendcount,sdispls,MPI_CHAR, - outbuf,recvcount,rdispls,MPI_CHAR,world); - - memory->destroy(procs_rvous); - memory->sfree(outbuf_rvous); - - if (!outorder) { - memory->destroy(procs_a2a); - memory->sfree(outbuf_a2a); - memory->destroy(offsets); - } - - // clean up - - memory->destroy(sendcount); - memory->destroy(recvcount); - memory->destroy(sdispls); - memory->destroy(rdispls); - - // return number of output datums - // last arg to stats() = mem for procs_rvous + per-proc vecs + reordering ops - - if (statflag) rendezvous_stats(n,nout,nrvous,nrvous_out,insize,outsize, - (bigint) nrvous_out*sizeof(int) + - 4*nprocs*sizeof(int) + - MAX(all2all1_bytes,all2all2_bytes)); - return nout; -} - -/* ---------------------------------------------------------------------- - print balance and memory info for rendezvous operation - useful for debugging -------------------------------------------------------------------------- */ - -void Comm::rendezvous_stats(int n, int nout, int nrvous, int nrvous_out, - int insize, int outsize, bigint commsize) -{ - bigint size_in_all,size_in_max,size_in_min; - bigint size_out_all,size_out_max,size_out_min; - bigint size_inrvous_all,size_inrvous_max,size_inrvous_min; - bigint size_outrvous_all,size_outrvous_max,size_outrvous_min; - bigint size_comm_all,size_comm_max,size_comm_min; - - bigint size = (bigint) n*insize; - MPI_Allreduce(&size,&size_in_all,1,MPI_LMP_BIGINT,MPI_SUM,world); - MPI_Allreduce(&size,&size_in_max,1,MPI_LMP_BIGINT,MPI_MAX,world); - MPI_Allreduce(&size,&size_in_min,1,MPI_LMP_BIGINT,MPI_MIN,world); - - size = (bigint) nout*outsize; - MPI_Allreduce(&size,&size_out_all,1,MPI_LMP_BIGINT,MPI_SUM,world); - MPI_Allreduce(&size,&size_out_max,1,MPI_LMP_BIGINT,MPI_MAX,world); - MPI_Allreduce(&size,&size_out_min,1,MPI_LMP_BIGINT,MPI_MIN,world); - - size = (bigint) nrvous*insize; - MPI_Allreduce(&size,&size_inrvous_all,1,MPI_LMP_BIGINT,MPI_SUM,world); - MPI_Allreduce(&size,&size_inrvous_max,1,MPI_LMP_BIGINT,MPI_MAX,world); - MPI_Allreduce(&size,&size_inrvous_min,1,MPI_LMP_BIGINT,MPI_MIN,world); - - size = (bigint) nrvous_out*insize; - MPI_Allreduce(&size,&size_outrvous_all,1,MPI_LMP_BIGINT,MPI_SUM,world); - MPI_Allreduce(&size,&size_outrvous_max,1,MPI_LMP_BIGINT,MPI_MAX,world); - MPI_Allreduce(&size,&size_outrvous_min,1,MPI_LMP_BIGINT,MPI_MIN,world); - - size = commsize; - MPI_Allreduce(&size,&size_comm_all,1,MPI_LMP_BIGINT,MPI_SUM,world); - MPI_Allreduce(&size,&size_comm_max,1,MPI_LMP_BIGINT,MPI_MAX,world); - MPI_Allreduce(&size,&size_comm_min,1,MPI_LMP_BIGINT,MPI_MIN,world); - - int mbytes = 1024*1024; - - if (me == 0) { - if (screen) { - fprintf(screen,"Rendezvous balance and memory info: (tot,ave,max,min) \n"); - fprintf(screen," input datum count: " - BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n", - size_in_all/insize,1.0*size_in_all/nprocs/insize, - size_in_max/insize,size_in_min/insize); - fprintf(screen," input data (MB): %g %g %g %g\n", - 1.0*size_in_all/mbytes,1.0*size_in_all/nprocs/mbytes, - 1.0*size_in_max/mbytes,1.0*size_in_min/mbytes); - if (outsize) - fprintf(screen," output datum count: " - BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n", - size_out_all/outsize,1.0*size_out_all/nprocs/outsize, - size_out_max/outsize,size_out_min/outsize); - else - fprintf(screen," output datum count: " - BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n", - 0,0.0,0,0); - fprintf(screen," output data (MB): %g %g %g %g\n", - 1.0*size_out_all/mbytes,1.0*size_out_all/nprocs/mbytes, - 1.0*size_out_max/mbytes,1.0*size_out_min/mbytes); - fprintf(screen," input rvous datum count: " - BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n", - size_inrvous_all/insize,1.0*size_inrvous_all/nprocs/insize, - size_inrvous_max/insize,size_inrvous_min/insize); - fprintf(screen," input rvous data (MB): %g %g %g %g\n", - 1.0*size_inrvous_all/mbytes,1.0*size_inrvous_all/nprocs/mbytes, - 1.0*size_inrvous_max/mbytes,1.0*size_inrvous_min/mbytes); - if (outsize) - fprintf(screen," output rvous datum count: " - BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n", - size_outrvous_all/outsize,1.0*size_outrvous_all/nprocs/outsize, - size_outrvous_max/outsize,size_outrvous_min/outsize); - else - fprintf(screen," output rvous datum count: " - BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n", - 0,0.0,0,0); - fprintf(screen," output rvous data (MB): %g %g %g %g\n", - 1.0*size_outrvous_all/mbytes,1.0*size_outrvous_all/nprocs/mbytes, - 1.0*size_outrvous_max/mbytes,1.0*size_outrvous_min/mbytes); - fprintf(screen," rvous comm (MB): %g %g %g %g\n", - 1.0*size_comm_all/mbytes,1.0*size_comm_all/nprocs/mbytes, - 1.0*size_comm_max/mbytes,1.0*size_comm_min/mbytes); - } - } -} - /* ---------------------------------------------------------------------- proc 0 reads Nlines from file into buf and bcasts buf to all procs caller allocates buf to max size needed diff --git a/src/comm.h b/src/comm.h index 9c0112b4c4..2579f9b283 100644 --- a/src/comm.h +++ b/src/comm.h @@ -109,10 +109,6 @@ class Comm : protected Pointers { void ring(int, int, void *, int, void (*)(int, char *, void *), void *, void *, int self = 1); - int rendezvous(int, int, char *, int, int, int *, - int (*)(int, char *, int &, int *&, char *&, void *), - int, char *&, int, void *, int statflag=0); - int read_lines_from_file(FILE *, int, int, char *); int read_lines_from_file_universe(FILE *, int, int, char *); @@ -146,15 +142,6 @@ class Comm : protected Pointers { int ncores; // # of cores per node int coregrid[3]; // 3d grid of cores within a node int user_coregrid[3]; // user request for cores in each dim - - int rendezvous_irregular(int, char *, int, int, int *, - int (*)(int, char *, int &, int *&, char *&, void *), - int, char *&, int, void *, int); - int rendezvous_all2all(int, char *, int, int, int *, - int (*)(int, char *, int &, int *&, char *&, void *), - int, char *&, int, void *, int); - void rendezvous_stats(int, int, int, int, int, int, bigint); - public: enum{MULTIPLE}; }; diff --git a/src/create_atoms.cpp b/src/create_atoms.cpp index 52e4256fca..cb0da42970 100644 --- a/src/create_atoms.cpp +++ b/src/create_atoms.cpp @@ -514,6 +514,9 @@ void CreateAtoms::command(int narg, char **arg) if (domain->triclinic) domain->lamda2x(atom->nlocal); } + MPI_Barrier(world); + double time2 = MPI_Wtime(); + // clean up delete ranmol; @@ -523,6 +526,21 @@ void CreateAtoms::command(int narg, char **arg) delete [] ystr; delete [] zstr; + // print status + + if (comm->me == 0) { + if (screen) { + fprintf(screen,"Created " BIGINT_FORMAT " atoms\n", + atom->natoms-natoms_previous); + fprintf(screen," Time spent = %g secs\n",time2-time1); + } + if (logfile) { + fprintf(logfile,"Created " BIGINT_FORMAT " atoms\n", + atom->natoms-natoms_previous); + fprintf(logfile," Time spent = %g secs\n",time2-time1); + } + } + // for MOLECULE mode: // create special bond lists for molecular systems, // but not for atom style template @@ -532,25 +550,6 @@ void CreateAtoms::command(int narg, char **arg) if (atom->molecular == 1 && onemol->bondflag && !onemol->specialflag) { Special special(lmp); special.build(); - - } - } - - // print status - - MPI_Barrier(world); - double time2 = MPI_Wtime(); - - if (comm->me == 0) { - if (screen) { - fprintf(screen,"Created " BIGINT_FORMAT " atoms\n", - atom->natoms-natoms_previous); - fprintf(screen," create_atoms CPU = %g secs\n",time2-time1); - } - if (logfile) { - fprintf(logfile,"Created " BIGINT_FORMAT " atoms\n", - atom->natoms-natoms_previous); - fprintf(logfile," create_atoms CPU = %g secs\n",time2-time1); } } } diff --git a/src/hashlittle.cpp b/src/hashlittle.cpp deleted file mode 100644 index f1d4e61142..0000000000 --- a/src/hashlittle.cpp +++ /dev/null @@ -1,349 +0,0 @@ -// Hash function hashlittle() -// from lookup3.c, by Bob Jenkins, May 2006, Public Domain -// bob_jenkins@burtleburtle.net - -#include -#include -#include - -// if the system defines the __BYTE_ORDER__ define, -// we use it instead of guessing the platform - -#if defined(__BYTE_ORDER__) -# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -# define HASH_LITTLE_ENDIAN 1 -# else -# define HASH_LITTLE_ENDIAN 0 -# endif -#else // heuristic platform guess -# if defined(__bg__) -# define HASH_LITTLE_ENDIAN 0 // IBM BlueGene is big endian -# else -# define HASH_LITTLE_ENDIAN 1 // Intel and AMD x86 are little endian -# endif -#endif - -#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) - -/* -------------------------------------------------------------------------------- -mix -- mix 3 32-bit values reversibly. - -This is reversible, so any information in (a,b,c) before mix() is -still in (a,b,c) after mix(). - -If four pairs of (a,b,c) inputs are run through mix(), or through -mix() in reverse, there are at least 32 bits of the output that -are sometimes the same for one pair and different for another pair. -This was tested for: -* pairs that differed by one bit, by two bits, in any combination - of top bits of (a,b,c), or in any combination of bottom bits of - (a,b,c). -* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed - the output delta to a Gray code (a^(a>>1)) so a string of 1's (as - is commonly produced by subtraction) look like a single 1-bit - difference. -* the base values were pseudorandom, all zero but one bit set, or - all zero plus a counter that starts at zero. - -Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that -satisfy this are - 4 6 8 16 19 4 - 9 15 3 18 27 15 - 14 9 3 7 17 3 -Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing -for "differ" defined as + with a one-bit base and a two-bit delta. I -used http://burtleburtle.net/bob/hash/avalanche.html to choose -the operations, constants, and arrangements of the variables. - -This does not achieve avalanche. There are input bits of (a,b,c) -that fail to affect some output bits of (a,b,c), especially of a. The -most thoroughly mixed value is c, but it doesn't really even achieve -avalanche in c. - -This allows some parallelism. Read-after-writes are good at doubling -the number of bits affected, so the goal of mixing pulls in the opposite -direction as the goal of parallelism. I did what I could. Rotates -seem to cost as much as shifts on every machine I could lay my hands -on, and rotates are much kinder to the top and bottom bits, so I used -rotates. -------------------------------------------------------------------------------- -*/ -#define mix(a,b,c) \ -{ \ - a -= c; a ^= rot(c, 4); c += b; \ - b -= a; b ^= rot(a, 6); a += c; \ - c -= b; c ^= rot(b, 8); b += a; \ - a -= c; a ^= rot(c,16); c += b; \ - b -= a; b ^= rot(a,19); a += c; \ - c -= b; c ^= rot(b, 4); b += a; \ -} - -/* -------------------------------------------------------------------------------- -final -- final mixing of 3 32-bit values (a,b,c) into c - -Pairs of (a,b,c) values differing in only a few bits will usually -produce values of c that look totally different. This was tested for -* pairs that differed by one bit, by two bits, in any combination - of top bits of (a,b,c), or in any combination of bottom bits of - (a,b,c). -* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed - the output delta to a Gray code (a^(a>>1)) so a string of 1's (as - is commonly produced by subtraction) look like a single 1-bit - difference. -* the base values were pseudorandom, all zero but one bit set, or - all zero plus a counter that starts at zero. - -These constants passed: - 14 11 25 16 4 14 24 - 12 14 25 16 4 14 24 -and these came close: - 4 8 15 26 3 22 24 - 10 8 15 26 3 22 24 - 11 8 15 26 3 22 24 -------------------------------------------------------------------------------- -*/ -#define final(a,b,c) \ -{ \ - c ^= b; c -= rot(b,14); \ - a ^= c; a -= rot(c,11); \ - b ^= a; b -= rot(a,25); \ - c ^= b; c -= rot(b,16); \ - a ^= c; a -= rot(c,4); \ - b ^= a; b -= rot(a,14); \ - c ^= b; c -= rot(b,24); \ -} - -/* -------------------------------------------------------------------------------- -hashlittle() -- hash a variable-length key into a 32-bit value - k : the key (the unaligned variable-length array of bytes) - length : the length of the key, counting by bytes - initval : can be any 4-byte value -Returns a 32-bit value. Every bit of the key affects every bit of -the return value. Two keys differing by one or two bits will have -totally different hash values. - -The best hash table sizes are powers of 2. There is no need to do -mod a prime (mod is sooo slow!). If you need less than 32 bits, -use a bitmask. For example, if you need only 10 bits, do - h = (h & hashmask(10)); -In which case, the hash table should have hashsize(10) elements. - -If you are hashing n strings (uint8_t **)k, do it like this: - for (i=0, h=0; i 12) - { - a += k[0]; - b += k[1]; - c += k[2]; - mix(a,b,c); - length -= 12; - k += 3; - } - - /*----------------------------- handle the last (probably partial) block */ - /* - * "k[2]&0xffffff" actually reads beyond the end of the string, but - * then masks off the part it's not allowed to read. Because the - * string is aligned, the masked-off tail is in the same word as the - * rest of the string. Every machine with memory protection I've seen - * does it on word boundaries, so is OK with this. But VALGRIND will - * still catch it and complain. The masking trick does make the hash - * noticably faster for short strings (like English words). - */ -#ifndef VALGRIND - - switch(length) - { - case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; - case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; - case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; - case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; - case 8 : b+=k[1]; a+=k[0]; break; - case 7 : b+=k[1]&0xffffff; a+=k[0]; break; - case 6 : b+=k[1]&0xffff; a+=k[0]; break; - case 5 : b+=k[1]&0xff; a+=k[0]; break; - case 4 : a+=k[0]; break; - case 3 : a+=k[0]&0xffffff; break; - case 2 : a+=k[0]&0xffff; break; - case 1 : a+=k[0]&0xff; break; - case 0 : return c; /* zero length strings require no mixing */ - } - -#else /* make valgrind happy */ - - k8 = (const uint8_t *)k; - switch(length) - { - case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; - case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ - case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ - case 9 : c+=k8[8]; /* fall through */ - case 8 : b+=k[1]; a+=k[0]; break; - case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ - case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ - case 5 : b+=k8[4]; /* fall through */ - case 4 : a+=k[0]; break; - case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ - case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ - case 1 : a+=k8[0]; break; - case 0 : return c; - } - -#endif /* !valgrind */ - - } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { - const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ - const uint8_t *k8; - - /*--------------- all but last block: aligned reads and different mixing */ - while (length > 12) - { - a += k[0] + (((uint32_t)k[1])<<16); - b += k[2] + (((uint32_t)k[3])<<16); - c += k[4] + (((uint32_t)k[5])<<16); - mix(a,b,c); - length -= 12; - k += 6; - } - - /*----------------------------- handle the last (probably partial) block */ - k8 = (const uint8_t *)k; - switch(length) - { - case 12: c+=k[4]+(((uint32_t)k[5])<<16); - b+=k[2]+(((uint32_t)k[3])<<16); - a+=k[0]+(((uint32_t)k[1])<<16); - break; - case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ - case 10: c+=k[4]; - b+=k[2]+(((uint32_t)k[3])<<16); - a+=k[0]+(((uint32_t)k[1])<<16); - break; - case 9 : c+=k8[8]; /* fall through */ - case 8 : b+=k[2]+(((uint32_t)k[3])<<16); - a+=k[0]+(((uint32_t)k[1])<<16); - break; - case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ - case 6 : b+=k[2]; - a+=k[0]+(((uint32_t)k[1])<<16); - break; - case 5 : b+=k8[4]; /* fall through */ - case 4 : a+=k[0]+(((uint32_t)k[1])<<16); - break; - case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ - case 2 : a+=k[0]; - break; - case 1 : a+=k8[0]; - break; - case 0 : return c; /* zero length requires no mixing */ - } - - } else { /* need to read the key one byte at a time */ - const uint8_t *k = (const uint8_t *)key; - - /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ - while (length > 12) - { - a += k[0]; - a += ((uint32_t)k[1])<<8; - a += ((uint32_t)k[2])<<16; - a += ((uint32_t)k[3])<<24; - b += k[4]; - b += ((uint32_t)k[5])<<8; - b += ((uint32_t)k[6])<<16; - b += ((uint32_t)k[7])<<24; - c += k[8]; - c += ((uint32_t)k[9])<<8; - c += ((uint32_t)k[10])<<16; - c += ((uint32_t)k[11])<<24; - mix(a,b,c); - length -= 12; - k += 12; - } - - /*-------------------------------- last block: affect all 32 bits of (c) */ - switch(length) /* all the case statements fall through */ - { - case 12: c+=((uint32_t)k[11])<<24; - case 11: c+=((uint32_t)k[10])<<16; - case 10: c+=((uint32_t)k[9])<<8; - case 9 : c+=k[8]; - case 8 : b+=((uint32_t)k[7])<<24; - case 7 : b+=((uint32_t)k[6])<<16; - case 6 : b+=((uint32_t)k[5])<<8; - case 5 : b+=k[4]; - case 4 : a+=((uint32_t)k[3])<<24; - case 3 : a+=((uint32_t)k[2])<<16; - case 2 : a+=((uint32_t)k[1])<<8; - case 1 : a+=k[0]; - break; - case 0 : return c; - } - } - - final(a,b,c); - return c; - -#else /* PURIFY_HATES_HASHLITTLE */ -/* I don't know what it is about Jenkins' hashlittle function, but - * it drives purify insane, even with VALGRIND defined. It makes - * purify unusable!! The code execution doesn't even make sense. - * Below is a (probably) weaker hash function that at least allows - * testing with purify. - */ -#define MAXINT_DIV_PHI 11400714819323198485U - - uint32_t h, rest, *p, bytes, num_bytes; - char *byteptr; - - num_bytes = length; - - /* First hash the uint32_t-sized portions of the key */ - h = 0; - for (p = (uint32_t *)key, bytes=num_bytes; - bytes >= (uint32_t) sizeof(uint32_t); - bytes-=sizeof(uint32_t), p++){ - h = (h^(*p))*MAXINT_DIV_PHI; - } - - /* Then take care of the remaining bytes, if any */ - rest = 0; - for (byteptr = (char *)p; bytes > 0; bytes--, byteptr++){ - rest = (rest<<8) | (*byteptr); - } - - /* If extra bytes, merge the two parts */ - if (rest) - h = (h^rest)*MAXINT_DIV_PHI; - - return h; -#endif /* PURIFY_HATES_HASHLITTLE */ -} diff --git a/src/hashlittle.h b/src/hashlittle.h deleted file mode 100644 index 7b57a35c80..0000000000 --- a/src/hashlittle.h +++ /dev/null @@ -1,5 +0,0 @@ -// Hash function hashlittle() -// from lookup3.c, by Bob Jenkins, May 2006, Public Domain -// bob_jenkins@burtleburtle.net - -uint32_t hashlittle(const void *key, size_t length, uint32_t); diff --git a/src/irregular.cpp b/src/irregular.cpp index c27a8c8e18..9c15f135d0 100644 --- a/src/irregular.cpp +++ b/src/irregular.cpp @@ -501,8 +501,7 @@ int compare_standalone(const int i, const int j, void *ptr) void Irregular::exchange_atom(double *sendbuf, int *sizes, double *recvbuf) { - int i,m,n,count; - bigint offset; + int i,m,n,offset,count; // post all receives @@ -622,7 +621,6 @@ int Irregular::create_data(int n, int *proclist, int sortflag) num_send = new int[nsend_proc]; index_send = new int[n-work1[me]]; index_self = new int[work1[me]]; - maxindex = n; // proc_send = procs I send to // num_send = # of datums I send to each proc @@ -680,182 +678,8 @@ int Irregular::create_data(int n, int *proclist, int sortflag) // receive incoming messages // proc_recv = procs I recv from - // num_recv = # of datums each proc sends me - // nrecvdatum = total # of datums I recv - - int nrecvdatum = 0; - for (i = 0; i < nrecv_proc; i++) { - MPI_Recv(&num_recv[i],1,MPI_INT,MPI_ANY_SOURCE,0,world,status); - proc_recv[i] = status->MPI_SOURCE; - nrecvdatum += num_recv[i]; - } - nrecvdatum += num_self; - - // sort proc_recv and num_recv by proc ID if requested - // useful for debugging to insure reproducible ordering of received datums - - if (sortflag) { - int *order = new int[nrecv_proc]; - int *proc_recv_ordered = new int[nrecv_proc]; - int *num_recv_ordered = new int[nrecv_proc]; - - for (i = 0; i < nrecv_proc; i++) order[i] = i; - -#if defined(LMP_QSORT) - proc_recv_copy = proc_recv; - qsort(order,nrecv_proc,sizeof(int),compare_standalone); -#else - merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone); -#endif - - int j; - for (i = 0; i < nrecv_proc; i++) { - j = order[i]; - proc_recv_ordered[i] = proc_recv[j]; - num_recv_ordered[i] = num_recv[j]; - } - - memcpy(proc_recv,proc_recv_ordered,nrecv_proc*sizeof(int)); - memcpy(num_recv,num_recv_ordered,nrecv_proc*sizeof(int)); - delete [] order; - delete [] proc_recv_ordered; - delete [] num_recv_ordered; - } - - // barrier to insure all MPI_ANY_SOURCE messages are received - // else another proc could proceed to exchange_data() and send to me - - MPI_Barrier(world); - - // return # of datums I will receive - - return nrecvdatum; -} - -/* ---------------------------------------------------------------------- - create communication plan based on list of datums of uniform size - n = # of datums to send - procs = how many datums to send to each proc, must include self - sort = flag for sorting order of received messages by proc ID - return total # of datums I will recv, including any to self -------------------------------------------------------------------------- */ - -int Irregular::create_data_grouped(int n, int *procs, int sortflag) -{ - int i,j,k,m; - - // setup for collective comm - // work1 = # of datums I send to each proc, set self to 0 - // work2 = 1 for all procs, used for ReduceScatter - - for (i = 0; i < nprocs; i++) { - work1[i] = procs[i]; - work2[i] = 1; - } - work1[me] = 0; - - // nrecv_proc = # of procs I receive messages from, not including self - // options for performing ReduceScatter operation - // some are more efficient on some machines at big sizes - -#ifdef LAMMPS_RS_ALLREDUCE_INPLACE - MPI_Allreduce(MPI_IN_PLACE,work1,nprocs,MPI_INT,MPI_SUM,world); - nrecv_proc = work1[me]; -#else -#ifdef LAMMPS_RS_ALLREDUCE - MPI_Allreduce(work1,work2,nprocs,MPI_INT,MPI_SUM,world); - nrecv_proc = work2[me]; -#else - MPI_Reduce_scatter(work1,&nrecv_proc,work2,MPI_INT,MPI_SUM,world); -#endif -#endif - - // allocate receive arrays - - proc_recv = new int[nrecv_proc]; - num_recv = new int[nrecv_proc]; - request = new MPI_Request[nrecv_proc]; - status = new MPI_Status[nrecv_proc]; - - // work1 = # of datums I send to each proc, including self - // nsend_proc = # of procs I send messages to, not including self - - for (i = 0; i < nprocs; i++) work1[i] = procs[i]; - - nsend_proc = 0; - for (i = 0; i < nprocs; i++) - if (work1[i]) nsend_proc++; - if (work1[me]) nsend_proc--; - - // allocate send and self arrays - - proc_send = new int[nsend_proc]; - num_send = new int[nsend_proc]; - index_send = new int[n-work1[me]]; - index_self = new int[work1[me]]; - maxindex = n; - - // proc_send = procs I send to - // num_send = # of datums I send to each proc - // num_self = # of datums I copy to self - // to balance pattern of send messages: - // each proc begins with iproc > me, continues until iproc = me - // reset work1 to store which send message each proc corresponds to - - int iproc = me; - int isend = 0; - for (i = 0; i < nprocs; i++) { - iproc++; - if (iproc == nprocs) iproc = 0; - if (iproc == me) { - num_self = work1[iproc]; - work1[iproc] = 0; - } else if (work1[iproc] > 0) { - proc_send[isend] = iproc; - num_send[isend] = work1[iproc]; - work1[iproc] = isend; - isend++; - } - } - - // work2 = offsets into index_send for each proc I send to - // m = ptr into index_self - // index_send = list of which datums to send to each proc - // 1st N1 values are datum indices for 1st proc, - // next N2 values are datum indices for 2nd proc, etc - // index_self = list of which datums to copy to self - - work2[0] = 0; - for (i = 1; i < nsend_proc; i++) work2[i] = work2[i-1] + num_send[i-1]; - - m = 0; - i = 0; - for (iproc = 0; iproc < nprocs; iproc++) { - k = procs[iproc]; - for (j = 0; j < k; j++) { - if (iproc == me) index_self[m++] = i++; - else { - isend = work1[iproc]; - index_send[work2[isend]++] = i++; - } - } - } - - // tell receivers how much data I send - // sendmax_proc = largest # of datums I send in a single message - - sendmax_proc = 0; - for (i = 0; i < nsend_proc; i++) { - MPI_Request tmpReq; // Use non-blocking send to avoid possible deadlock - MPI_Isend(&num_send[i],1,MPI_INT,proc_send[i],0,world,&tmpReq); - MPI_Request_free(&tmpReq); // the MPI_Barrier below marks completion - sendmax_proc = MAX(sendmax_proc,num_send[i]); - } - - // receive incoming messages - // proc_recv = procs I recv from - // num_recv = # of datums each proc sends me - // nrecvdatum = total # of datums I recv + // num_recv = total size of message each proc sends me + // nrecvdatum = total size of data I recv int nrecvdatum = 0; for (i = 0; i < nrecv_proc; i++) { @@ -915,13 +739,11 @@ int Irregular::create_data_grouped(int n, int *procs, int sortflag) void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf) { - int i,n,count; - bigint m; // these 2 lines enable send/recv buf to be larger than 2 GB - char *dest; + int i,m,n,offset,count; // post all receives, starting after self copies - bigint offset = num_self*nbytes; + offset = num_self*nbytes; for (int irecv = 0; irecv < nrecv_proc; irecv++) { MPI_Irecv(&recvbuf[offset],num_recv[irecv]*nbytes,MPI_CHAR, proc_recv[irecv],0,world,&request[irecv]); @@ -943,34 +765,23 @@ void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf) n = 0; for (int isend = 0; isend < nsend_proc; isend++) { count = num_send[isend]; - dest = buf; for (i = 0; i < count; i++) { m = index_send[n++]; - memcpy(dest,&sendbuf[m*nbytes],nbytes); - dest += nbytes; + memcpy(&buf[i*nbytes],&sendbuf[m*nbytes],nbytes); } MPI_Send(buf,count*nbytes,MPI_CHAR,proc_send[isend],0,world); } // copy datums to self, put at beginning of recvbuf - dest = recvbuf; for (i = 0; i < num_self; i++) { m = index_self[i]; - memcpy(dest,&sendbuf[m*nbytes],nbytes); - dest += nbytes; + memcpy(&recvbuf[i*nbytes],&sendbuf[m*nbytes],nbytes); } // wait on all incoming messages if (nrecv_proc) MPI_Waitall(nrecv_proc,request,status); - - // approximate memory tally - // DEBUG lines - - //bigint irregular_bytes = 2*nprocs*sizeof(int); - //irregular_bytes += maxindex*sizeof(int); - //irregular_bytes += maxbuf; } /* ---------------------------------------------------------------------- diff --git a/src/irregular.h b/src/irregular.h index d56bcb253d..1f74fe801b 100644 --- a/src/irregular.h +++ b/src/irregular.h @@ -33,7 +33,6 @@ class Irregular : protected Pointers { int *procassign = NULL); int migrate_check(); int create_data(int, int *, int sortflag = 0); - int create_data_grouped(int, int *, int sortflag = 0); void exchange_data(char *, int, char *); void destroy_data(); bigint memory_usage(); @@ -49,7 +48,6 @@ class Irregular : protected Pointers { double *dbuf; // double buf for largest single atom send int maxbuf; // size of char buf in bytes char *buf; // char buf for largest single data send - int maxindex; // combined size of index_send + index_self int *mproclist,*msizes; // persistent vectors in migrate_atoms int maxlocal; // allocated size of mproclist and msizes diff --git a/src/read_data.cpp b/src/read_data.cpp index e2efbaaefa..2f4484010b 100644 --- a/src/read_data.cpp +++ b/src/read_data.cpp @@ -120,9 +120,6 @@ void ReadData::command(int narg, char **arg) { if (narg < 1) error->all(FLERR,"Illegal read_data command"); - MPI_Barrier(world); - double time1 = MPI_Wtime(); - // optional args addflag = NONE; @@ -909,18 +906,6 @@ void ReadData::command(int narg, char **arg) force->kspace = saved_kspace; } - - // total time - - MPI_Barrier(world); - double time2 = MPI_Wtime(); - - if (comm->me == 0) { - if (screen) - fprintf(screen," read_data CPU = %g secs\n",time2-time1); - if (logfile) - fprintf(logfile," read_data CPU = %g secs\n",time2-time1); - } } /* ---------------------------------------------------------------------- diff --git a/src/read_restart.cpp b/src/read_restart.cpp index 5aa4622a67..73dc37f4cb 100644 --- a/src/read_restart.cpp +++ b/src/read_restart.cpp @@ -81,9 +81,6 @@ void ReadRestart::command(int narg, char **arg) if (domain->box_exist) error->all(FLERR,"Cannot read_restart after simulation box is defined"); - MPI_Barrier(world); - double time1 = MPI_Wtime(); - MPI_Comm_rank(world,&me); MPI_Comm_size(world,&nprocs); @@ -565,18 +562,6 @@ void ReadRestart::command(int narg, char **arg) Special special(lmp); special.build(); } - - // total time - - MPI_Barrier(world); - double time2 = MPI_Wtime(); - - if (comm->me == 0) { - if (screen) - fprintf(screen," read_restart CPU = %g secs\n",time2-time1); - if (logfile) - fprintf(logfile," read_restart CPU = %g secs\n",time2-time1); - } } /* ---------------------------------------------------------------------- diff --git a/src/replicate.cpp b/src/replicate.cpp index 3c8f4a8aee..cdadf1fd1f 100644 --- a/src/replicate.cpp +++ b/src/replicate.cpp @@ -76,7 +76,7 @@ void Replicate::command(int narg, char **arg) if (atom->nextra_grow || atom->nextra_restart || atom->nextra_store) error->all(FLERR,"Cannot replicate with fixes that store atom quantities"); - // record wall time for atom replication + // Record wall time for atom replication MPI_Barrier(world); double time1 = MPI_Wtime(); @@ -762,15 +762,15 @@ void Replicate::command(int narg, char **arg) special.build(); } - // total time + // Wall time MPI_Barrier(world); double time2 = MPI_Wtime(); if (me == 0) { if (screen) - fprintf(screen," replicate CPU = %g secs\n",time2-time1); + fprintf(screen," Time spent = %g secs\n",time2-time1); if (logfile) - fprintf(logfile," replicate CPU = %g secs\n",time2-time1); + fprintf(logfile," Time spent = %g secs\n",time2-time1); } } diff --git a/src/special.cpp b/src/special.cpp index b9287df472..fccc930353 100644 --- a/src/special.cpp +++ b/src/special.cpp @@ -21,14 +21,12 @@ #include "modify.h" #include "fix.h" #include "accelerator_kokkos.h" -#include "atom_masks.h" #include "memory.h" #include "error.h" +#include "atom_masks.h" using namespace LAMMPS_NS; -#define RVOUS 1 // 0 for irregular, 1 for all2all - /* ---------------------------------------------------------------------- */ Special::Special(LAMMPS *lmp) : Pointers(lmp) @@ -56,8 +54,18 @@ Special::~Special() void Special::build() { + int i,j,k,size; + int max,maxall,nbuf; + tagint *buf; + MPI_Barrier(world); - double time1 = MPI_Wtime(); + + int nlocal = atom->nlocal; + + tagint *tag = atom->tag; + int *num_bond = atom->num_bond; + tagint **bond_atom = atom->bond_atom; + int **nspecial = atom->nspecial; if (me == 0 && screen) { const double * const special_lj = force->special_lj; @@ -71,57 +79,224 @@ void Special::build() // initialize nspecial counters to 0 - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; - - for (int i = 0; i < nlocal; i++) { + for (i = 0; i < nlocal; i++) { nspecial[i][0] = 0; nspecial[i][1] = 0; nspecial[i][2] = 0; } - // setup atomIDs and procowner vectors in rendezvous decomposition + // ----------------------------------------------------- + // compute nspecial[i][0] = # of 1-2 neighbors of atom i + // ----------------------------------------------------- - atom_owners(); + // bond partners stored by atom itself - // tally nspecial[i][0] = # of 1-2 neighbors of atom i + for (i = 0; i < nlocal; i++) nspecial[i][0] = num_bond[i]; + + // if newton_bond off, then done + // else only counted 1/2 of all bonds, so count other half + + if (force->newton_bond) { + + // nbufmax = largest buffer needed to hold info from any proc + // info for each atom = global tag of 2nd atom in each bond + + nbuf = 0; + for (i = 0; i < nlocal; i++) nbuf += num_bond[i]; + memory->create(buf,nbuf,"special:buf"); + + // fill buffer with global tags of bond partners of my atoms + + size = 0; + for (i = 0; i < nlocal; i++) + for (j = 0; j < num_bond[i]; j++) + buf[size++] = bond_atom[i][j]; + + // cycle buffer around ring of procs back to self + // when receive buffer, scan tags for atoms I own + // when find one, increment nspecial count for that atom + + comm->ring(size,sizeof(tagint),buf,1,ring_one,NULL,(void *)this); + + memory->destroy(buf); + } + + // ---------------------------------------------------- // create onetwo[i] = list of 1-2 neighbors for atom i + // ---------------------------------------------------- - if (force->newton_bond) onetwo_build_newton(); - else onetwo_build_newton_off(); + max = 0; + for (i = 0; i < nlocal; i++) max = MAX(max,nspecial[i][0]); - // print max # of 1-2 neighbors + MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); if (me == 0) { if (screen) fprintf(screen," %d = max # of 1-2 neighbors\n",maxall); if (logfile) fprintf(logfile," %d = max # of 1-2 neighbors\n",maxall); } + memory->create(onetwo,nlocal,maxall,"special:onetwo"); + + // count = accumulating counter + + memory->create(count,nlocal,"special:count"); + for (i = 0; i < nlocal; i++) count[i] = 0; + + // add bond partners stored by atom to onetwo list + + for (i = 0; i < nlocal; i++) + for (j = 0; j < num_bond[i]; j++) + onetwo[i][count[i]++] = bond_atom[i][j]; + + // if newton_bond off, then done + // else only stored 1/2 of all bonds, so store other half + + if (force->newton_bond) { + + // nbufmax = largest buffer needed to hold info from any proc + // info for each atom = 2 global tags in each bond + + nbuf = 0; + for (i = 0; i < nlocal; i++) nbuf += 2*num_bond[i]; + memory->create(buf,nbuf,"special:buf"); + + // fill buffer with global tags of both atoms in bond + + size = 0; + for (i = 0; i < nlocal; i++) + for (j = 0; j < num_bond[i]; j++) { + buf[size++] = tag[i]; + buf[size++] = bond_atom[i][j]; + } + + // cycle buffer around ring of procs back to self + // when receive buffer, scan 2nd-atom tags for atoms I own + // when find one, add 1st-atom tag to onetwo list for 2nd atom + + comm->ring(size,sizeof(tagint),buf,2,ring_two,NULL,(void *)this); + + memory->destroy(buf); + } + + memory->destroy(count); + + // ----------------------------------------------------- // done if special_bond weights for 1-3, 1-4 are set to 1.0 + // ----------------------------------------------------- if (force->special_lj[2] == 1.0 && force->special_coul[2] == 1.0 && force->special_lj[3] == 1.0 && force->special_coul[3] == 1.0) { dedup(); combine(); fix_alteration(); - memory->destroy(procowner); - memory->destroy(atomIDs); - timer_output(time1); return; } - // tally nspecial[i][1] = # of 1-3 neighbors of atom i + // ----------------------------------------------------- + // compute nspecial[i][1] = # of 1-3 neighbors of atom i + // ----------------------------------------------------- + + // nbufmax = largest buffer needed to hold info from any proc + // info for each atom = 2 scalars + list of 1-2 neighbors + + nbuf = 0; + for (i = 0; i < nlocal; i++) nbuf += 2 + nspecial[i][0]; + memory->create(buf,nbuf,"special:buf"); + + // fill buffer with: + // (1) = counter for 1-3 neighbors, initialized to 0 + // (2) = # of 1-2 neighbors + // (3:N) = list of 1-2 neighbors + + size = 0; + for (i = 0; i < nlocal; i++) { + buf[size++] = 0; + buf[size++] = nspecial[i][0]; + for (j = 0; j < nspecial[i][0]; j++) buf[size++] = onetwo[i][j]; + } + + // cycle buffer around ring of procs back to self + // when receive buffer, scan list of 1-2 neighbors for atoms I own + // when find one, increment 1-3 count by # of 1-2 neighbors of my atom, + // subtracting one since my list will contain original atom + + comm->ring(size,sizeof(tagint),buf,3,ring_three,buf,(void *)this); + + // extract count from buffer that has cycled back to me + // nspecial[i][1] = # of 1-3 neighbors of atom i + + j = 0; + for (i = 0; i < nlocal; i++) { + nspecial[i][1] = buf[j]; + j += 2 + nspecial[i][0]; + } + + memory->destroy(buf); + + // ---------------------------------------------------- // create onethree[i] = list of 1-3 neighbors for atom i + // ---------------------------------------------------- - onethree_build(); - - // print max # of 1-3 neighbors + max = 0; + for (i = 0; i < nlocal; i++) max = MAX(max,nspecial[i][1]); + MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); if (me == 0) { if (screen) fprintf(screen," %d = max # of 1-3 neighbors\n",maxall); if (logfile) fprintf(logfile," %d = max # of 1-3 neighbors\n",maxall); } + memory->create(onethree,nlocal,maxall,"special:onethree"); + + // nbufmax = largest buffer needed to hold info from any proc + // info for each atom = 4 scalars + list of 1-2 neighs + list of 1-3 neighs + + nbuf = 0; + for (i = 0; i < nlocal; i++) nbuf += 4 + nspecial[i][0] + nspecial[i][1]; + memory->create(buf,nbuf,"special:buf"); + + // fill buffer with: + // (1) = global tag of original atom + // (2) = # of 1-2 neighbors + // (3) = # of 1-3 neighbors + // (4) = counter for 1-3 neighbors, initialized to 0 + // (5:N) = list of 1-2 neighbors + // (N+1:2N) space for list of 1-3 neighbors + + size = 0; + for (i = 0; i < nlocal; i++) { + buf[size++] = tag[i]; + buf[size++] = nspecial[i][0]; + buf[size++] = nspecial[i][1]; + buf[size++] = 0; + for (j = 0; j < nspecial[i][0]; j++) buf[size++] = onetwo[i][j]; + size += nspecial[i][1]; + } + + // cycle buffer around ring of procs back to self + // when receive buffer, scan list of 1-2 neighbors for atoms I own + // when find one, add its neighbors to 1-3 list + // increment the count in buf(i+4) + // exclude the atom whose tag = original + // this process may include duplicates but they will be culled later + + comm->ring(size,sizeof(tagint),buf,4,ring_four,buf,(void *)this); + + // fill onethree with buffer values that have been returned to me + // sanity check: accumulated buf[i+3] count should equal + // nspecial[i][1] for each atom + + j = 0; + for (i = 0; i < nlocal; i++) { + if (buf[j+3] != nspecial[i][1]) + error->one(FLERR,"1-3 bond count is inconsistent"); + j += 4 + nspecial[i][0]; + for (k = 0; k < nspecial[i][1]; k++) + onethree[i][k] = buf[j++]; + } + + memory->destroy(buf); + // done if special_bond weights for 1-4 are set to 1.0 if (force->special_lj[3] == 1.0 && force->special_coul[3] == 1.0) { @@ -129,410 +304,117 @@ void Special::build() if (force->special_angle) angle_trim(); combine(); fix_alteration(); - memory->destroy(procowner); - memory->destroy(atomIDs); - timer_output(time1); return; } - // tally nspecial[i][2] = # of 1-4 neighbors of atom i + // ----------------------------------------------------- + // compute nspecial[i][2] = # of 1-4 neighbors of atom i + // ----------------------------------------------------- + + // nbufmax = largest buffer needed to hold info from any proc + // info for each atom = 2 scalars + list of 1-3 neighbors + + nbuf = 0; + for (i = 0; i < nlocal; i++) nbuf += 2 + nspecial[i][1]; + memory->create(buf,nbuf,"special:buf"); + + // fill buffer with: + // (1) = counter for 1-4 neighbors, initialized to 0 + // (2) = # of 1-3 neighbors + // (3:N) = list of 1-3 neighbors + + size = 0; + for (i = 0; i < nlocal; i++) { + buf[size++] = 0; + buf[size++] = nspecial[i][1]; + for (j = 0; j < nspecial[i][1]; j++) buf[size++] = onethree[i][j]; + } + + // cycle buffer around ring of procs back to self + // when receive buffer, scan list of 1-3 neighbors for atoms I own + // when find one, increment 1-4 count by # of 1-2 neighbors of my atom + // may include duplicates and original atom but they will be culled later + + comm->ring(size,sizeof(tagint),buf,5,ring_five,buf,(void *)this); + + // extract count from buffer that has cycled back to me + // nspecial[i][2] = # of 1-4 neighbors of atom i + + j = 0; + for (i = 0; i < nlocal; i++) { + nspecial[i][2] = buf[j]; + j += 2 + nspecial[i][1]; + } + + memory->destroy(buf); + + // ---------------------------------------------------- // create onefour[i] = list of 1-4 neighbors for atom i + // ---------------------------------------------------- - onefour_build(); - - // print max # of 1-4 neighbors + max = 0; + for (i = 0; i < nlocal; i++) max = MAX(max,nspecial[i][2]); + MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); if (me == 0) { if (screen) fprintf(screen," %d = max # of 1-4 neighbors\n",maxall); if (logfile) fprintf(logfile," %d = max # of 1-4 neighbors\n",maxall); } - // finish processing the onetwo, onethree, onefour lists + memory->create(onefour,nlocal,maxall,"special:onefour"); + + // nbufmax = largest buffer needed to hold info from any proc + // info for each atom = 3 scalars + list of 1-3 neighs + list of 1-4 neighs + + nbuf = 0; + for (i = 0; i < nlocal; i++) + nbuf += 3 + nspecial[i][1] + nspecial[i][2]; + memory->create(buf,nbuf,"special:buf"); + + // fill buffer with: + // (1) = # of 1-3 neighbors + // (2) = # of 1-4 neighbors + // (3) = counter for 1-4 neighbors, initialized to 0 + // (4:N) = list of 1-3 neighbors + // (N+1:2N) space for list of 1-4 neighbors + + size = 0; + for (i = 0; i < nlocal; i++) { + buf[size++] = nspecial[i][1]; + buf[size++] = nspecial[i][2]; + buf[size++] = 0; + for (j = 0; j < nspecial[i][1]; j++) buf[size++] = onethree[i][j]; + size += nspecial[i][2]; + } + + // cycle buffer around ring of procs back to self + // when receive buffer, scan list of 1-3 neighbors for atoms I own + // when find one, add its neighbors to 1-4 list + // incrementing the count in buf(i+4) + // this process may include duplicates but they will be culled later + + comm->ring(size,sizeof(tagint),buf,6,ring_six,buf,(void *)this); + + // fill onefour with buffer values that have been returned to me + // sanity check: accumulated buf[i+2] count should equal + // nspecial[i][2] for each atom + + j = 0; + for (i = 0; i < nlocal; i++) { + if (buf[j+2] != nspecial[i][2]) + error->one(FLERR,"1-4 bond count is inconsistent"); + j += 3 + nspecial[i][1]; + for (k = 0; k < nspecial[i][2]; k++) + onefour[i][k] = buf[j++]; + } + + memory->destroy(buf); dedup(); if (force->special_angle) angle_trim(); if (force->special_dihedral) dihedral_trim(); combine(); fix_alteration(); - memory->destroy(procowner); - memory->destroy(atomIDs); - - timer_output(time1); -} - -/* ---------------------------------------------------------------------- - setup atomIDs and procowner -------------------------------------------------------------------------- */ - -void Special::atom_owners() -{ - tagint *tag = atom->tag; - int nlocal = atom->nlocal; - - int *proclist; - memory->create(proclist,nlocal,"special:proclist"); - IDRvous *idbuf = (IDRvous *) - memory->smalloc((bigint) nlocal*sizeof(IDRvous),"special:idbuf"); - - // setup input buf for rendezvous comm - // one datum for each owned atom: datum = owning proc, atomID - // each proc assigned every 1/Pth atom - - for (int i = 0; i < nlocal; i++) { - proclist[i] = tag[i] % nprocs; - idbuf[i].me = me; - idbuf[i].atomID = tag[i]; - } - - // perform rendezvous operation - - char *buf; - comm->rendezvous(RVOUS,nlocal,(char *) idbuf,sizeof(IDRvous),0,proclist, - rendezvous_ids,0,buf,0,(void *) this); - - memory->destroy(proclist); - memory->sfree(idbuf); -} - -/* ---------------------------------------------------------------------- - onetwo build when newton_bond flag on - uses rendezvous comm -------------------------------------------------------------------------- */ - -void Special::onetwo_build_newton() -{ - int i,j,m; - - tagint *tag = atom->tag; - int *num_bond = atom->num_bond; - tagint **bond_atom = atom->bond_atom; - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; - - // nsend = # of my datums to send - - int nsend = 0; - for (i = 0; i < nlocal; i++) { - for (j = 0; j < num_bond[i]; j++) { - m = atom->map(bond_atom[i][j]); - if (m < 0 || m >= nlocal) nsend++; - } - } - - int *proclist; - memory->create(proclist,nsend,"special:proclist"); - PairRvous *inbuf = (PairRvous *) - memory->smalloc((bigint) nsend*sizeof(PairRvous),"special:inbuf"); - - // setup input buf to rendezvous comm - // one datum for each unowned bond partner: bond partner ID, atomID - // owning proc for each datum = bond partner ID % nprocs - - nsend = 0; - for (i = 0; i < nlocal; i++) { - for (j = 0; j < num_bond[i]; j++) { - m = atom->map(bond_atom[i][j]); - if (m >= 0 && m < nlocal) continue; - proclist[nsend] = bond_atom[i][j] % nprocs; - inbuf[nsend].atomID = bond_atom[i][j]; - inbuf[nsend].partnerID = tag[i]; - nsend++; - } - } - - // perform rendezvous operation - - char *buf; - int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(PairRvous), - 0,proclist, - rendezvous_pairs,0,buf,sizeof(PairRvous), - (void *) this); - PairRvous *outbuf = (PairRvous *) buf; - - memory->destroy(proclist); - memory->sfree(inbuf); - - // set nspecial[0] and onetwo for all owned atoms - // based on owned info plus rendezvous output info - // output datums = pairs of atoms that are 1-2 neighbors - - for (i = 0; i < nlocal; i++) { - nspecial[i][0] = num_bond[i]; - for (j = 0; j < num_bond[i]; j++) { - m = atom->map(bond_atom[i][j]); - if (m >= 0 && m < nlocal) nspecial[m][0]++; - } - } - - for (m = 0; m < nreturn; m++) { - i = atom->map(outbuf[m].atomID); - nspecial[i][0]++; - } - - int max = 0; - for (i = 0; i < nlocal; i++) - max = MAX(max,nspecial[i][0]); - - MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); - memory->create(onetwo,nlocal,maxall,"special:onetwo"); - - for (i = 0; i < nlocal; i++) nspecial[i][0] = 0; - - for (i = 0; i < nlocal; i++) { - for (j = 0; j < num_bond[i]; j++) { - onetwo[i][nspecial[i][0]++] = bond_atom[i][j]; - m = atom->map(bond_atom[i][j]); - if (m >= 0 && m < nlocal) onetwo[m][nspecial[m][0]++] = tag[i]; - } - } - - for (m = 0; m < nreturn; m++) { - i = atom->map(outbuf[m].atomID); - onetwo[i][nspecial[i][0]++] = outbuf[m].partnerID; - } - - memory->sfree(outbuf); -} - -/* ---------------------------------------------------------------------- - onetwo build with newton_bond flag off - no need for rendezvous comm -------------------------------------------------------------------------- */ - -void Special::onetwo_build_newton_off() -{ - int i,j; - - int *num_bond = atom->num_bond; - tagint **bond_atom = atom->bond_atom; - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; - - int max = 0; - for (i = 0; i < nlocal; i++) - max = MAX(max,num_bond[i]); - - MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); - memory->create(onetwo,nlocal,maxall,"special:onetwo"); - - // nsend = # of my datums to send - // include nlocal datums with owner of each atom - - for (i = 0; i < nlocal; i++) { - nspecial[i][0] = num_bond[i]; - for (j = 0; j < num_bond[i]; j++) - onetwo[i][j] = bond_atom[i][j]; - } -} - -/* ---------------------------------------------------------------------- - onethree build - uses rendezvous comm -------------------------------------------------------------------------- */ - -void Special::onethree_build() -{ - int i,j,k,m,proc; - - tagint *tag = atom->tag; - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; - - // nsend = # of my datums to send - - int nsend = 0; - for (i = 0; i < nlocal; i++) { - for (j = 0; j < nspecial[i][0]; j++) { - m = atom->map(onetwo[i][j]); - if (m < 0 || m >= nlocal) nsend += nspecial[i][0]-1; - } - } - - int *proclist; - memory->create(proclist,nsend,"special:proclist"); - PairRvous *inbuf = (PairRvous *) - memory->smalloc((bigint) nsend*sizeof(PairRvous),"special:inbuf"); - - // setup input buf to rendezvous comm - // datums = pairs of onetwo partners where either is unknown - // these pairs are onethree neighbors - // datum = onetwo ID, onetwo ID - // owning proc for each datum = onetwo ID % nprocs - - nsend = 0; - for (i = 0; i < nlocal; i++) { - for (j = 0; j < nspecial[i][0]; j++) { - m = atom->map(onetwo[i][j]); - if (m >= 0 && m < nlocal) continue; - proc = onetwo[i][j] % nprocs; - for (k = 0; k < nspecial[i][0]; k++) { - if (j == k) continue; - proclist[nsend] = proc; - inbuf[nsend].atomID = onetwo[i][j]; - inbuf[nsend].partnerID = onetwo[i][k]; - nsend++; - } - } - } - - // perform rendezvous operation - - char *buf; - int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(PairRvous), - 0,proclist, - rendezvous_pairs,0,buf,sizeof(PairRvous), - (void *) this); - PairRvous *outbuf = (PairRvous *) buf; - - memory->destroy(proclist); - memory->sfree(inbuf); - - // set nspecial[1] and onethree for all owned atoms - // based on owned info plus rendezvous output info - // output datums = pairs of atoms that are 1-3 neighbors - - for (i = 0; i < nlocal; i++) { - for (j = 0; j < nspecial[i][0]; j++) { - m = atom->map(onetwo[i][j]); - if (m >= 0 && m < nlocal) nspecial[m][1] += nspecial[i][0]-1; - } - } - - for (m = 0; m < nreturn; m++) { - i = atom->map(outbuf[m].atomID); - nspecial[i][1]++; - } - - int max = 0; - for (i = 0; i < nlocal; i++) - max = MAX(max,nspecial[i][1]); - - MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); - memory->create(onethree,nlocal,maxall,"special:onethree"); - - for (i = 0; i < nlocal; i++) nspecial[i][1] = 0; - - for (i = 0; i < nlocal; i++) { - for (j = 0; j < nspecial[i][0]; j++) { - m = atom->map(onetwo[i][j]); - if (m < 0 || m >= nlocal) continue; - for (k = 0; k < nspecial[i][0]; k++) { - if (j == k) continue; - onethree[m][nspecial[m][1]++] = onetwo[i][k]; - } - } - } - - for (m = 0; m < nreturn; m++) { - i = atom->map(outbuf[m].atomID); - onethree[i][nspecial[i][1]++] = outbuf[m].partnerID; - } - - memory->sfree(outbuf); -} - -/* ---------------------------------------------------------------------- - onefour build - uses rendezvous comm -------------------------------------------------------------------------- */ - -void Special::onefour_build() -{ - int i,j,k,m,proc; - - tagint *tag = atom->tag; - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; - - // nsend = # of my datums to send - - int nsend = 0; - for (i = 0; i < nlocal; i++) { - for (j = 0; j < nspecial[i][1]; j++) { - m = atom->map(onethree[i][j]); - if (m < 0 || m >= nlocal) nsend += nspecial[i][0]; - } - } - - int *proclist; - memory->create(proclist,nsend,"special:proclist"); - PairRvous *inbuf = (PairRvous *) - memory->smalloc((bigint) nsend*sizeof(PairRvous),"special:inbuf"); - - // setup input buf to rendezvous comm - // datums = pairs of onethree and onetwo partners where onethree is unknown - // these pairs are onefour neighbors - // datum = onetwo ID, onetwo ID - // owning proc for each datum = onethree ID % nprocs - - nsend = 0; - for (i = 0; i < nlocal; i++) { - for (j = 0; j < nspecial[i][1]; j++) { - m = atom->map(onethree[i][j]); - if (m >= 0 && m < nlocal) continue; - proc = onethree[i][j] % nprocs; - for (k = 0; k < nspecial[i][0]; k++) { - proclist[nsend] = proc; - inbuf[nsend].atomID = onethree[i][j]; - inbuf[nsend].partnerID = onetwo[i][k]; - nsend++; - } - } - } - - // perform rendezvous operation - - char *buf; - int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(PairRvous), - 0,proclist, - rendezvous_pairs,0,buf,sizeof(PairRvous), - (void *) this); - PairRvous *outbuf = (PairRvous *) buf; - - memory->destroy(proclist); - memory->sfree(inbuf); - - // set nspecial[2] and onefour for all owned atoms - // based on owned info plus rendezvous output info - // output datums = pairs of atoms that are 1-4 neighbors - - for (i = 0; i < nlocal; i++) { - for (j = 0; j < nspecial[i][1]; j++) { - m = atom->map(onethree[i][j]); - if (m >= 0 && m < nlocal) nspecial[m][2] += nspecial[i][0]; - } - } - - for (m = 0; m < nreturn; m++) { - i = atom->map(outbuf[m].atomID); - nspecial[i][2]++; - } - - int max = 0; - for (i = 0; i < nlocal; i++) - max = MAX(max,nspecial[i][2]); - - MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); - memory->create(onefour,nlocal,maxall,"special:onefour"); - - for (i = 0; i < nlocal; i++) nspecial[i][2] = 0; - - for (i = 0; i < nlocal; i++) { - for (j = 0; j < nspecial[i][1]; j++) { - m = atom->map(onethree[i][j]); - if (m < 0 || m >= nlocal) continue; - for (k = 0; k < nspecial[i][0]; k++) { - onefour[m][nspecial[m][2]++] = onetwo[i][k]; - } - } - } - - for (m = 0; m < nreturn; m++) { - i = atom->map(outbuf[m].atomID); - onefour[i][nspecial[i][2]++] = outbuf[m].partnerID; - } - - memory->sfree(outbuf); } /* ---------------------------------------------------------------------- @@ -777,24 +659,21 @@ void Special::combine() trim list of 1-3 neighbors by checking defined angles delete a 1-3 neigh if they are not end atoms of a defined angle and if they are not 1,3 or 2,4 atoms of a defined dihedral - uses rendezvous comm ------------------------------------------------------------------------- */ void Special::angle_trim() { - int i,j,k,m;; + int i,j,m,n; int *num_angle = atom->num_angle; int *num_dihedral = atom->num_dihedral; tagint **angle_atom1 = atom->angle_atom1; - tagint **angle_atom2 = atom->angle_atom2; tagint **angle_atom3 = atom->angle_atom3; tagint **dihedral_atom1 = atom->dihedral_atom1; tagint **dihedral_atom2 = atom->dihedral_atom2; tagint **dihedral_atom3 = atom->dihedral_atom3; tagint **dihedral_atom4 = atom->dihedral_atom4; int **nspecial = atom->nspecial; - tagint *tag = atom->tag; int nlocal = atom->nlocal; // stats on old 1-3 neighbor counts @@ -813,206 +692,73 @@ void Special::angle_trim() " %g = # of 1-3 neighbors before angle trim\n",allcount); } - // if angles or dihedrals are defined - // rendezvous angle 1-3 and dihedral 1-3,2-4 pairs + // if angles or dihedrals are defined, + // flag each 1-3 neigh if it appears in an angle or dihedral if ((num_angle && atom->nangles) || (num_dihedral && atom->ndihedrals)) { - // nsend = # of my datums to send - // latter is only for angles or dihedrlas where I own atom2 (newton bond off) + // dflag = flag for 1-3 neighs of all owned atoms + + int maxcount = 0; + for (i = 0; i < nlocal; i++) maxcount = MAX(maxcount,nspecial[i][1]); + memory->create(dflag,nlocal,maxcount,"special::dflag"); - int nsend = 0; for (i = 0; i < nlocal; i++) { - for (j = 0; j < num_angle[i]; j++) { - if (tag[i] != angle_atom2[i][j]) continue; - m = atom->map(angle_atom1[i][j]); - if (m < 0 || m >= nlocal) nsend++; - m = atom->map(angle_atom3[i][j]); - if (m < 0 || m >= nlocal) nsend++; - } - for (j = 0; j < num_dihedral[i]; j++) { - if (tag[i] != dihedral_atom2[i][j]) continue; - m = atom->map(dihedral_atom1[i][j]); - if (m < 0 || m >= nlocal) nsend++; - m = atom->map(dihedral_atom3[i][j]); - if (m < 0 || m >= nlocal) nsend++; - m = atom->map(dihedral_atom4[i][j]); - if (m < 0 || m >= nlocal) nsend++; - } + n = nspecial[i][1]; + for (j = 0; j < n; j++) dflag[i][j] = 0; } - int *proclist; - memory->create(proclist,nsend,"special:proclist"); - PairRvous *inbuf = (PairRvous *) - memory->smalloc((bigint) nsend*sizeof(PairRvous),"special:inbuf"); + // nbufmax = largest buffer needed to hold info from any proc + // info for each atom = list of 1,3 atoms in each angle stored by atom + // and list of 1,3 and 2,4 atoms in each dihedral stored by atom - // setup input buf to rendezvous comm - // datums = pairs of onetwo partners where either is unknown - // these pairs are onethree neighbors - // datum = onetwo ID, onetwo ID - // owning proc for each datum = onetwo ID % nprocs - - nsend = 0; + int nbuf = 0; for (i = 0; i < nlocal; i++) { - for (j = 0; j < num_angle[i]; j++) { - if (tag[i] != angle_atom2[i][j]) continue; - - m = atom->map(angle_atom1[i][j]); - if (m < 0 || m >= nlocal) { - proclist[nsend] = angle_atom1[i][j] % nprocs; - inbuf[nsend].atomID = angle_atom1[i][j]; - inbuf[nsend].partnerID = angle_atom3[i][j]; - nsend++; - } - - m = atom->map(angle_atom3[i][j]); - if (m < 0 || m >= nlocal) { - proclist[nsend] = angle_atom3[i][j] % nprocs; - inbuf[nsend].atomID = angle_atom3[i][j]; - inbuf[nsend].partnerID = angle_atom1[i][j]; - nsend++; - } - } - - for (j = 0; j < num_dihedral[i]; j++) { - if (tag[i] != dihedral_atom2[i][j]) continue; - - m = atom->map(dihedral_atom1[i][j]); - if (m < 0 || m >= nlocal) { - proclist[nsend] = dihedral_atom1[i][j] % nprocs; - inbuf[nsend].atomID = dihedral_atom1[i][j]; - inbuf[nsend].partnerID = dihedral_atom3[i][j]; - nsend++; - } - - m = atom->map(dihedral_atom3[i][j]); - if (m < 0 || m >= nlocal) { - proclist[nsend] = dihedral_atom3[i][j] % nprocs; - inbuf[nsend].atomID = dihedral_atom3[i][j]; - inbuf[nsend].partnerID = dihedral_atom1[i][j]; - nsend++; - } - - m = atom->map(dihedral_atom4[i][j]); - if (m < 0 || m >= nlocal) { - proclist[nsend] = dihedral_atom4[i][j] % nprocs; - inbuf[nsend].atomID = dihedral_atom4[i][j]; - inbuf[nsend].partnerID = dihedral_atom2[i][j]; - nsend++; - } - } + if (num_angle && atom->nangles) nbuf += 2*num_angle[i]; + if (num_dihedral && atom->ndihedrals) nbuf += 2*2*num_dihedral[i]; } + int *buf; + memory->create(buf,nbuf,"special:buf"); - // perform rendezvous operation - - char *buf; - int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(PairRvous), - 0,proclist, - rendezvous_pairs,0,buf,sizeof(PairRvous), - (void *) this); - PairRvous *outbuf = (PairRvous *) buf; + // fill buffer with list of 1,3 atoms in each angle + // and with list of 1,3 and 2,4 atoms in each dihedral - memory->destroy(proclist); - memory->sfree(inbuf); + int size = 0; + if (num_angle && atom->nangles) + for (i = 0; i < nlocal; i++) + for (j = 0; j < num_angle[i]; j++) { + buf[size++] = angle_atom1[i][j]; + buf[size++] = angle_atom3[i][j]; + } - // flag all onethree atoms to keep + if (num_dihedral && atom->ndihedrals) + for (i = 0; i < nlocal; i++) + for (j = 0; j < num_dihedral[i]; j++) { + buf[size++] = dihedral_atom1[i][j]; + buf[size++] = dihedral_atom3[i][j]; + buf[size++] = dihedral_atom2[i][j]; + buf[size++] = dihedral_atom4[i][j]; + } - int max = 0; - for (i = 0; i < nlocal; i++) - max = MAX(max,nspecial[i][1]); - MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); - - int **flag; - memory->create(flag,nlocal,maxall,"special:flag"); + // cycle buffer around ring of procs back to self + // when receive buffer, scan list of 1,3 atoms looking for atoms I own + // when find one, scan its 1-3 neigh list and mark I,J as in an angle - for (i = 0; i < nlocal; i++) + comm->ring(size,sizeof(tagint),buf,7,ring_seven,NULL,(void *)this); + + // delete 1-3 neighbors if they are not flagged in dflag + + for (i = 0; i < nlocal; i++) { + m = 0; for (j = 0; j < nspecial[i][1]; j++) - flag[i][j] = 0; - - // reset nspecial[1] and onethree for all owned atoms based on output info - // based on owned info plus rendezvous output info - // output datums = pairs of atoms that are 1-3 neighbors - - for (i = 0; i < nlocal; i++) { - for (j = 0; j < num_angle[i]; j++) { - if (tag[i] != angle_atom2[i][j]) continue; - - m = atom->map(angle_atom1[i][j]); - if (m >= 0 && m < nlocal) { - for (k = 0; k < nspecial[m][1]; k++) - if (onethree[m][k] == angle_atom3[i][j]) { - flag[m][k] = 1; - break; - } - } - - m = atom->map(angle_atom3[i][j]); - if (m >= 0 && m < nlocal) { - for (k = 0; k < nspecial[m][1]; k++) - if (onethree[m][k] == angle_atom1[i][j]) { - flag[m][k] = 1; - break; - } - } - } - - for (j = 0; j < num_dihedral[i]; j++) { - if (tag[i] != dihedral_atom2[i][j]) continue; - - m = atom->map(dihedral_atom1[i][j]); - if (m >= 0 && m < nlocal) { - for (k = 0; k < nspecial[m][1]; k++) - if (onethree[m][k] == dihedral_atom3[i][j]) { - flag[m][k] = 1; - break; - } - } - - m = atom->map(dihedral_atom3[i][j]); - if (m >= 0 && m < nlocal) { - for (k = 0; k < nspecial[m][1]; k++) - if (onethree[m][k] == dihedral_atom1[i][j]) { - flag[m][k] = 1; - break; - } - } - - m = atom->map(dihedral_atom4[i][j]); - if (m >= 0 && m < nlocal) { - for (k = 0; k < nspecial[m][1]; k++) - if (onethree[m][k] == dihedral_atom2[i][j]) { - flag[m][k] = 1; - break; - } - } - } + if (dflag[i][j]) onethree[i][m++] = onethree[i][j]; + nspecial[i][1] = m; } - for (m = 0; m < nreturn; m++) { - i = atom->map(outbuf[m].atomID); - for (k = 0; k < nspecial[i][1]; k++) - if (onethree[i][k] == outbuf[m].partnerID) { - flag[i][k] = 1; - break; - } - } - - memory->destroy(outbuf); + // clean up - // use flag values to compress onefour list for each atom - - for (i = 0; i < nlocal; i++) { - j = 0; - while (j < nspecial[i][1]) { - if (flag[i][j] == 0) { - onethree[i][j] = onethree[i][nspecial[i][1]-1]; - flag[i][j] = flag[i][nspecial[i][1]-1]; - nspecial[i][1]--; - } else j++; - } - } - - memory->destroy(flag); + memory->destroy(dflag); + memory->destroy(buf); // if no angles or dihedrals are defined, delete all 1-3 neighs @@ -1037,21 +783,18 @@ void Special::angle_trim() } /* ---------------------------------------------------------------------- - trim list of 1-4 neighbors by checking all defined dihedrals + trim list of 1-4 neighbors by checking defined dihedrals delete a 1-4 neigh if they are not end atoms of a defined dihedral - uses rendezvous comm ------------------------------------------------------------------------- */ void Special::dihedral_trim() { - int i,j,k,m; + int i,j,m,n; int *num_dihedral = atom->num_dihedral; tagint **dihedral_atom1 = atom->dihedral_atom1; - tagint **dihedral_atom2 = atom->dihedral_atom2; tagint **dihedral_atom4 = atom->dihedral_atom4; int **nspecial = atom->nspecial; - tagint *tag = atom->tag; int nlocal = atom->nlocal; // stats on old 1-4 neighbor counts @@ -1070,134 +813,58 @@ void Special::dihedral_trim() " %g = # of 1-4 neighbors before dihedral trim\n",allcount); } - // if dihedrals are defined, rendezvous the dihedral 1-4 pairs + // if dihedrals are defined, flag each 1-4 neigh if it appears in a dihedral if (num_dihedral && atom->ndihedrals) { - // nsend = # of my datums to send + // dflag = flag for 1-4 neighs of all owned atoms + + int maxcount = 0; + for (i = 0; i < nlocal; i++) maxcount = MAX(maxcount,nspecial[i][2]); + memory->create(dflag,nlocal,maxcount,"special::dflag"); - int nsend = 0; for (i = 0; i < nlocal; i++) { - for (j = 0; j < num_dihedral[i]; j++) { - if (tag[i] != dihedral_atom2[i][j]) continue; - m = atom->map(dihedral_atom1[i][j]); - if (m < 0 || m >= nlocal) nsend++; - m = atom->map(dihedral_atom4[i][j]); - if (m < 0 || m >= nlocal) nsend++; - } + n = nspecial[i][2]; + for (j = 0; j < n; j++) dflag[i][j] = 0; } - int *proclist; - memory->create(proclist,nsend,"special:proclist"); - PairRvous *inbuf = (PairRvous *) - memory->smalloc((bigint) nsend*sizeof(PairRvous),"special:inbuf"); - - // setup input buf to rendezvous comm - // datums = pairs of onefour atom IDs in a dihedral defined for my atoms - // only dihedrals where I own atom2 (in case newton_bond off) - // datum = atom1 ID and atom4 ID - // send the datum twice, to owner of atom1 ID and atom4 ID - // owning procs for each datum = atom1 or atom4 ID % nprocs + // nbufmax = largest buffer needed to hold info from any proc + // info for each atom = list of 1,4 atoms in each dihedral stored by atom - nsend = 0; - for (i = 0; i < nlocal; i++) { + int nbuf = 0; + for (i = 0; i < nlocal; i++) nbuf += 2*num_dihedral[i]; + int *buf; + memory->create(buf,nbuf,"special:buf"); + + // fill buffer with list of 1,4 atoms in each dihedral + + int size = 0; + for (i = 0; i < nlocal; i++) for (j = 0; j < num_dihedral[i]; j++) { - if (tag[i] != dihedral_atom2[i][j]) continue; - - m = atom->map(dihedral_atom1[i][j]); - if (m < 0 || m >= nlocal) { - proclist[nsend] = dihedral_atom1[i][j] % nprocs; - inbuf[nsend].atomID = dihedral_atom1[i][j]; - inbuf[nsend].partnerID = dihedral_atom4[i][j]; - nsend++; - } - - m = atom->map(dihedral_atom4[i][j]); - if (m < 0 || m >= nlocal) { - proclist[nsend] = dihedral_atom4[i][j] % nprocs; - inbuf[nsend].atomID = dihedral_atom4[i][j]; - inbuf[nsend].partnerID = dihedral_atom1[i][j]; - nsend++; - } + buf[size++] = dihedral_atom1[i][j]; + buf[size++] = dihedral_atom4[i][j]; } - } - // perform rendezvous operation - - char *buf; - int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(PairRvous), - 0,proclist, - rendezvous_pairs,0,buf,sizeof(PairRvous), - (void *) this); - PairRvous *outbuf = (PairRvous *) buf; + // cycle buffer around ring of procs back to self + // when receive buffer, scan list of 1,4 atoms looking for atoms I own + // when find one, scan its 1-4 neigh list and mark I,J as in a dihedral - memory->destroy(proclist); - memory->sfree(inbuf); + comm->ring(size,sizeof(tagint),buf,8,ring_eight,NULL,(void *)this); - // flag all of my onefour IDs to keep + // delete 1-4 neighbors if they are not flagged in dflag - int max = 0; - for (i = 0; i < nlocal; i++) - max = MAX(max,nspecial[i][2]); - MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); - - int **flag; - memory->create(flag,nlocal,maxall,"special:flag"); - - for (i = 0; i < nlocal; i++) + for (i = 0; i < nlocal; i++) { + m = 0; for (j = 0; j < nspecial[i][2]; j++) - flag[i][j] = 0; - - for (i = 0; i < nlocal; i++) { - for (j = 0; j < num_dihedral[i]; j++) { - if (tag[i] != dihedral_atom2[i][j]) continue; - - m = atom->map(dihedral_atom1[i][j]); - if (m >= 0 && m < nlocal) { - for (k = 0; k < nspecial[m][2]; k++) - if (onefour[m][k] == dihedral_atom4[i][j]) { - flag[m][k] = 1; - break; - } - } - - m = atom->map(dihedral_atom4[i][j]); - if (m >= 0 && m < nlocal) { - for (k = 0; k < nspecial[m][2]; k++) - if (onefour[m][k] == dihedral_atom1[i][j]) { - flag[m][k] = 1; - break; - } - } - } + if (dflag[i][j]) onefour[i][m++] = onefour[i][j]; + nspecial[i][2] = m; } - for (m = 0; m < nreturn; m++) { - i = atom->map(outbuf[m].atomID); - for (k = 0; k < nspecial[i][2]; k++) - if (onefour[i][k] == outbuf[m].partnerID) { - flag[i][k] = 1; - break; - } - } + // clean up - memory->destroy(outbuf); + memory->destroy(dflag); + memory->destroy(buf); - // use flag values to compress onefour list for each atom - - for (i = 0; i < nlocal; i++) { - j = 0; - while (j < nspecial[i][2]) { - if (flag[i][j] == 0) { - onefour[i][j] = onefour[i][nspecial[i][2]-1]; - flag[i][j] = flag[i][nspecial[i][2]-1]; - nspecial[i][2]--; - } else j++; - } - } - - memory->destroy(flag); - // if no dihedrals are defined, delete all 1-4 neighs } else { @@ -1221,95 +888,262 @@ void Special::dihedral_trim() } /* ---------------------------------------------------------------------- - process data for atoms assigned to me in rendezvous decomposition - inbuf = list of N IDRvous datums - no outbuf + when receive buffer, scan tags for atoms I own + when find one, increment nspecial count for that atom ------------------------------------------------------------------------- */ -int Special::rendezvous_ids(int n, char *inbuf, - int &flag, int *&proclist, char *&outbuf, - void *ptr) -{ - Special *sptr = (Special *) ptr; - Memory *memory = sptr->memory; - - int *procowner; - tagint *atomIDs; - - memory->create(procowner,n,"special:procowner"); - memory->create(atomIDs,n,"special:atomIDs"); - - IDRvous *in = (IDRvous *) inbuf; - - for (int i = 0; i < n; i++) { - procowner[i] = in[i].me; - atomIDs[i] = in[i].atomID; - } - - // store rendezvous data in Special class - - sptr->nrvous = n; - sptr->procowner = procowner; - sptr->atomIDs = atomIDs; - - // flag = 0: no second comm needed in rendezvous - - flag = 0; - return 0; -} - - -/* ---------------------------------------------------------------------- - process data for atoms assigned to me in rendezvous decomposition - inbuf = list of N PairRvous datums - outbuf = same list of N PairRvous datums, routed to different procs -------------------------------------------------------------------------- */ - -int Special::rendezvous_pairs(int n, char *inbuf, - int &flag, int *&proclist, char *&outbuf, - void *ptr) +void Special::ring_one(int ndatum, char *cbuf, void *ptr) { Special *sptr = (Special *) ptr; Atom *atom = sptr->atom; - Memory *memory = sptr->memory; - - // clear atom map so it can be used here as a hash table - // faster than an STL map for large atom counts - - atom->map_clear(); - - // hash atom IDs stored in rendezvous decomposition - - int nrvous = sptr->nrvous; - tagint *atomIDs = sptr->atomIDs; - - for (int i = 0; i < nrvous; i++) - atom->map_one(atomIDs[i],i); - - // proclist = owner of atomID in caller decomposition - - PairRvous *in = (PairRvous *) inbuf; - int *procowner = sptr->procowner; - memory->create(proclist,n,"special:proclist"); + int **nspecial = atom->nspecial; + int nlocal = atom->nlocal; + tagint *buf = (tagint *) cbuf; int m; - for (int i = 0; i < n; i++) { - m = atom->map(in[i].atomID); - proclist[i] = procowner[m]; + + for (int i = 0; i < ndatum; i++) { + m = atom->map(buf[i]); + if (m >= 0 && m < nlocal) nspecial[m][0]++; } +} - outbuf = inbuf; - - // re-create atom map +/* ---------------------------------------------------------------------- + when receive buffer, scan 2nd-atom tags for atoms I own + when find one, add 1st-atom tag to onetwo list for 2nd atom +------------------------------------------------------------------------- */ - atom->map_init(0); - atom->nghost = 0; - atom->map_set(); +void Special::ring_two(int ndatum, char *cbuf, void *ptr) +{ + Special *sptr = (Special *) ptr; + Atom *atom = sptr->atom; + int nlocal = atom->nlocal; - // flag = 1: outbuf = inbuf - - flag = 1; - return n; + tagint **onetwo = sptr->onetwo; + int *count = sptr->count; + + tagint *buf = (tagint *) cbuf; + int m; + + for (int i = 1; i < ndatum; i += 2) { + m = atom->map(buf[i]); + if (m >= 0 && m < nlocal) onetwo[m][count[m]++] = buf[i-1]; + } +} + +/* ---------------------------------------------------------------------- + when receive buffer, scan list of 1-2 neighbors for atoms I own + when find one, increment 1-3 count by # of 1-2 neighbors of my atom, + subtracting one since my list will contain original atom +------------------------------------------------------------------------- */ + +void Special::ring_three(int ndatum, char *cbuf, void *ptr) +{ + Special *sptr = (Special *) ptr; + Atom *atom = sptr->atom; + int **nspecial = atom->nspecial; + int nlocal = atom->nlocal; + + tagint *buf = (tagint *) cbuf; + int i,j,m,n,num12; + + i = 0; + while (i < ndatum) { + n = buf[i]; + num12 = buf[i+1]; + for (j = 0; j < num12; j++) { + m = atom->map(buf[i+2+j]); + if (m >= 0 && m < nlocal) + n += nspecial[m][0] - 1; + } + buf[i] = n; + i += 2 + num12; + } +} + +/* ---------------------------------------------------------------------- + when receive buffer, scan list of 1-2 neighbors for atoms I own + when find one, add its neighbors to 1-3 list + increment the count in buf(i+4) + exclude the atom whose tag = original + this process may include duplicates but they will be culled later +------------------------------------------------------------------------- */ + +void Special::ring_four(int ndatum, char *cbuf, void *ptr) +{ + Special *sptr = (Special *) ptr; + Atom *atom = sptr->atom; + int **nspecial = atom->nspecial; + int nlocal = atom->nlocal; + + tagint **onetwo = sptr->onetwo; + + tagint *buf = (tagint *) cbuf; + tagint original; + int i,j,k,m,n,num12,num13; + + i = 0; + while (i < ndatum) { + original = buf[i]; + num12 = buf[i+1]; + num13 = buf[i+2]; + n = buf[i+3]; + for (j = 0; j < num12; j++) { + m = atom->map(buf[i+4+j]); + if (m >= 0 && m < nlocal) + for (k = 0; k < nspecial[m][0]; k++) + if (onetwo[m][k] != original) + buf[i+4+num12+(n++)] = onetwo[m][k]; + } + buf[i+3] = n; + i += 4 + num12 + num13; + } +} + +/* ---------------------------------------------------------------------- + when receive buffer, scan list of 1-3 neighbors for atoms I own + when find one, increment 1-4 count by # of 1-2 neighbors of my atom + may include duplicates and original atom but they will be culled later +------------------------------------------------------------------------- */ + +void Special::ring_five(int ndatum, char *cbuf, void *ptr) +{ + Special *sptr = (Special *) ptr; + Atom *atom = sptr->atom; + int **nspecial = atom->nspecial; + int nlocal = atom->nlocal; + + tagint *buf = (tagint *) cbuf; + int i,j,m,n,num13; + + i = 0; + while (i < ndatum) { + n = buf[i]; + num13 = buf[i+1]; + for (j = 0; j < num13; j++) { + m = atom->map(buf[i+2+j]); + if (m >= 0 && m < nlocal) n += nspecial[m][0]; + } + buf[i] = n; + i += 2 + num13; + } +} + +/* ---------------------------------------------------------------------- + when receive buffer, scan list of 1-3 neighbors for atoms I own + when find one, add its neighbors to 1-4 list + incrementing the count in buf(i+4) + this process may include duplicates but they will be culled later +------------------------------------------------------------------------- */ + +void Special::ring_six(int ndatum, char *cbuf, void *ptr) +{ + Special *sptr = (Special *) ptr; + Atom *atom = sptr->atom; + int **nspecial = atom->nspecial; + int nlocal = atom->nlocal; + + tagint **onetwo = sptr->onetwo; + + tagint *buf = (tagint *) cbuf; + int i,j,k,m,n,num13,num14; + + i = 0; + while (i < ndatum) { + num13 = buf[i]; + num14 = buf[i+1]; + n = buf[i+2]; + for (j = 0; j < num13; j++) { + m = atom->map(buf[i+3+j]); + if (m >= 0 && m < nlocal) + for (k = 0; k < nspecial[m][0]; k++) + buf[i+3+num13+(n++)] = onetwo[m][k]; + } + buf[i+2] = n; + i += 3 + num13 + num14; + } +} + +/* ---------------------------------------------------------------------- + when receive buffer, scan list of 1,3 atoms looking for atoms I own + when find one, scan its 1-3 neigh list and mark I,J as in an angle +------------------------------------------------------------------------- */ + +void Special::ring_seven(int ndatum, char *cbuf, void *ptr) +{ + Special *sptr = (Special *) ptr; + Atom *atom = sptr->atom; + int **nspecial = atom->nspecial; + int nlocal = atom->nlocal; + + tagint **onethree = sptr->onethree; + int **dflag = sptr->dflag; + + tagint *buf = (tagint *) cbuf; + tagint iglobal,jglobal; + int i,m,ilocal,jlocal; + + i = 0; + while (i < ndatum) { + iglobal = buf[i]; + jglobal = buf[i+1]; + ilocal = atom->map(iglobal); + jlocal = atom->map(jglobal); + if (ilocal >= 0 && ilocal < nlocal) + for (m = 0; m < nspecial[ilocal][1]; m++) + if (jglobal == onethree[ilocal][m]) { + dflag[ilocal][m] = 1; + break; + } + if (jlocal >= 0 && jlocal < nlocal) + for (m = 0; m < nspecial[jlocal][1]; m++) + if (iglobal == onethree[jlocal][m]) { + dflag[jlocal][m] = 1; + break; + } + i += 2; + } +} + +/* ---------------------------------------------------------------------- + when receive buffer, scan list of 1,4 atoms looking for atoms I own + when find one, scan its 1-4 neigh list and mark I,J as in a dihedral +------------------------------------------------------------------------- */ + +void Special::ring_eight(int ndatum, char *cbuf, void *ptr) +{ + Special *sptr = (Special *) ptr; + Atom *atom = sptr->atom; + int **nspecial = atom->nspecial; + int nlocal = atom->nlocal; + + tagint **onefour = sptr->onefour; + int **dflag = sptr->dflag; + + tagint *buf = (tagint *) cbuf; + tagint iglobal,jglobal; + int i,m,ilocal,jlocal; + + i = 0; + while (i < ndatum) { + iglobal = buf[i]; + jglobal = buf[i+1]; + ilocal = atom->map(iglobal); + jlocal = atom->map(jglobal); + if (ilocal >= 0 && ilocal < nlocal) + for (m = 0; m < nspecial[ilocal][2]; m++) + if (jglobal == onefour[ilocal][m]) { + dflag[ilocal][m] = 1; + break; + } + if (jlocal >= 0 && jlocal < nlocal) + for (m = 0; m < nspecial[jlocal][2]; m++) + if (iglobal == onefour[jlocal][m]) { + dflag[jlocal][m] = 1; + break; + } + i += 2; + } } /* ---------------------------------------------------------------------- @@ -1325,15 +1159,3 @@ void Special::fix_alteration() modify->fix[ifix]->rebuild_special(); } -/* ---------------------------------------------------------------------- - print timing output -------------------------------------------------------------------------- */ - -void Special::timer_output(double time1) -{ - double time2 = MPI_Wtime(); - if (comm->me == 0) { - if (screen) fprintf(screen," special bonds CPU = %g secs\n",time2-time1); - if (logfile) fprintf(logfile," special bonds CPU = %g secs\n",time2-time1); - } -} diff --git a/src/special.h b/src/special.h index d02a8522f6..9f25200336 100644 --- a/src/special.h +++ b/src/special.h @@ -26,43 +26,29 @@ class Special : protected Pointers { private: int me,nprocs; - int maxall; tagint **onetwo,**onethree,**onefour; - // data used by rendezvous callback methods + // data used by ring callback methods - int nrvous; - tagint *atomIDs; - int *procowner; - - struct IDRvous { - int me; - tagint atomID; - }; - - struct PairRvous { - tagint atomID,partnerID; - }; - - // private methods - - void atom_owners(); - void onetwo_build_newton(); - void onetwo_build_newton_off(); - void onethree_build(); - void onefour_build(); + int *count; + int **dflag; void dedup(); void angle_trim(); void dihedral_trim(); void combine(); void fix_alteration(); - void timer_output(double); - // callback functions for rendezvous communication + // callback functions for ring communication - static int rendezvous_ids(int, char *, int &, int *&, char *&, void *); - static int rendezvous_pairs(int, char *, int &, int *&, char *&, void *); + static void ring_one(int, char *, void *); + static void ring_two(int, char *, void *); + static void ring_three(int, char *, void *); + static void ring_four(int, char *, void *); + static void ring_five(int, char *, void *); + static void ring_six(int, char *, void *); + static void ring_seven(int, char *, void *); + static void ring_eight(int, char *, void *); }; }