From ab1c3f649832aa37f770dd9c1775050ce4075b7a Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Fri, 30 Nov 2018 11:54:48 -0700 Subject: [PATCH 01/12] rendevous comm option for special bonds and fix rigid/small --- src/RIGID/fix_rigid_small.cpp | 491 +++++++-------- src/RIGID/fix_rigid_small.h | 26 +- src/comm.cpp | 51 ++ src/comm.h | 4 + src/create_atoms.cpp | 37 +- src/hashlittle.cpp | 333 ++++++++++ src/hashlittle.h | 5 + src/irregular.cpp | 17 +- src/read_data.cpp | 15 + src/read_restart.cpp | 15 + src/replicate.cpp | 8 +- src/special.cpp | 1095 ++++++++++++++++----------------- src/special.h | 27 +- 13 files changed, 1249 insertions(+), 875 deletions(-) create mode 100644 src/hashlittle.cpp create mode 100644 src/hashlittle.h diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp index 44e1870e0a..4421d9ae17 100644 --- a/src/RIGID/fix_rigid_small.cpp +++ b/src/RIGID/fix_rigid_small.cpp @@ -34,6 +34,7 @@ #include "variable.h" #include "random_mars.h" #include "math_const.h" +#include "hashlittle.h" #include "memory.h" #include "error.h" @@ -70,8 +71,7 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : xcmimage(NULL), displace(NULL), eflags(NULL), orient(NULL), dorient(NULL), avec_ellipsoid(NULL), avec_line(NULL), avec_tri(NULL), counts(NULL), itensor(NULL), mass_body(NULL), langextra(NULL), random(NULL), - id_dilate(NULL), onemols(NULL), hash(NULL), bbox(NULL), ctr(NULL), - idclose(NULL), rsqclose(NULL) + id_dilate(NULL), onemols(NULL) { int i; @@ -107,18 +107,18 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : // parse args for rigid body specification int *mask = atom->mask; - tagint *bodyid = NULL; + tagint *bodyID = NULL; int nlocal = atom->nlocal; if (narg < 4) error->all(FLERR,"Illegal fix rigid/small command"); if (strcmp(arg[3],"molecule") == 0) { if (atom->molecule_flag == 0) error->all(FLERR,"Fix rigid/small requires atom attribute molecule"); - bodyid = atom->molecule; + bodyID = atom->molecule; } else if (strcmp(arg[3],"custom") == 0) { if (narg < 5) error->all(FLERR,"Illegal fix rigid/small command"); - bodyid = new tagint[nlocal]; + bodyID = new tagint[nlocal]; customflag = 1; // determine whether atom-style variable or atom property is used. @@ -126,9 +126,11 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : int is_double=0; int custom_index = atom->find_custom(arg[4]+2,is_double); if (custom_index == -1) - error->all(FLERR,"Fix rigid/small custom requires previously defined property/atom"); + error->all(FLERR,"Fix rigid/small custom requires " + "previously defined property/atom"); else if (is_double) - error->all(FLERR,"Fix rigid/small custom requires integer-valued property/atom"); + error->all(FLERR,"Fix rigid/small custom requires " + "integer-valued property/atom"); int minval = INT_MAX; int *value = atom->ivector[custom_index]; @@ -139,15 +141,17 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : for (i = 0; i < nlocal; i++) if (mask[i] & groupbit) - bodyid[i] = (tagint)(value[i] - minval + 1); - else bodyid[i] = 0; + bodyID[i] = (tagint)(value[i] - minval + 1); + else bodyID[i] = 0; } else if (strstr(arg[4],"v_") == arg[4]) { int ivariable = input->variable->find(arg[4]+2); if (ivariable < 0) - error->all(FLERR,"Variable name for fix rigid/small custom does not exist"); + error->all(FLERR,"Variable name for fix rigid/small custom " + "does not exist"); if (input->variable->atomstyle(ivariable) == 0) - error->all(FLERR,"Fix rigid/small custom variable is no atom-style variable"); + error->all(FLERR,"Fix rigid/small custom variable is not " + "atom-style variable"); double *value = new double[nlocal]; input->variable->compute_atom(ivariable,0,value,1,0); int minval = INT_MAX; @@ -158,8 +162,8 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : for (i = 0; i < nlocal; i++) if (mask[i] & groupbit) - bodyid[i] = (tagint)((tagint)value[i] - minval + 1); - else bodyid[0] = 0; + bodyID[i] = (tagint)((tagint)value[i] - minval + 1); + else bodyID[0] = 0; delete[] value; } else error->all(FLERR,"Unsupported fix rigid custom property"); } else error->all(FLERR,"Illegal fix rigid/small command"); @@ -167,10 +171,11 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : if (atom->map_style == 0) error->all(FLERR,"Fix rigid/small requires an atom map, see atom_modify"); - // maxmol = largest bodyid # + // maxmol = largest bodyID # + maxmol = -1; for (i = 0; i < nlocal; i++) - if (mask[i] & groupbit) maxmol = MAX(maxmol,bodyid[i]); + if (mask[i] & groupbit) maxmol = MAX(maxmol,bodyID[i]); tagint itmp; MPI_Allreduce(&maxmol,&itmp,1,MPI_LMP_TAGINT,MPI_MAX,world); @@ -400,8 +405,19 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : // sets bodytag for owned atoms // body attributes are computed later by setup_bodies() - create_bodies(bodyid); - if (customflag) delete [] bodyid; + double time1 = MPI_Wtime(); + + create_bodies(bodyID); + if (customflag) delete [] bodyID; + + double time2 = MPI_Wtime(); + + if (comm->me == 0) { + if (screen) + fprintf(screen," create_bodies CPU = %g secs\n",time2-time1); + if (logfile) + fprintf(logfile," create_bodies CPU = %g secs\n",time2-time1); + } // set nlocal_body and allocate bodies I own @@ -1514,175 +1530,71 @@ void FixRigidSmall::set_v() set bodytag for all owned atoms ------------------------------------------------------------------------- */ -void FixRigidSmall::create_bodies(tagint *bodyid) +void FixRigidSmall::create_bodies(tagint *bodyID) { - int i,m,n; - double unwrap[3]; + int i,m; - // error check on image flags of atoms in rigid bodies - - imageint *image = atom->image; + // allocate buffer for input to rendezvous comm + // ncount = # of my atoms in bodies + int *mask = atom->mask; int nlocal = atom->nlocal; - int *periodicity = domain->periodicity; - int xbox,ybox,zbox; - - int flag = 0; - for (i = 0; i < nlocal; i++) { - if (!(mask[i] & groupbit)) continue; - xbox = (image[i] & IMGMASK) - IMGMAX; - ybox = (image[i] >> IMGBITS & IMGMASK) - IMGMAX; - zbox = (image[i] >> IMG2BITS) - IMGMAX; - if ((xbox && !periodicity[0]) || (ybox && !periodicity[1]) || - (zbox && !periodicity[2])) flag = 1; - } - - int flagall; - MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); - if (flagall) error->all(FLERR,"Fix rigid/small atom has non-zero image flag " - "in a non-periodic dimension"); - - // allocate buffer for passing messages around ring of procs - // percount = max number of values to put in buffer for each of ncount - int ncount = 0; for (i = 0; i < nlocal; i++) if (mask[i] & groupbit) ncount++; - int percount = 5; - double *buf; - memory->create(buf,ncount*percount,"rigid/small:buf"); + int *proclist; + memory->create(proclist,ncount,"rigid/small:proclist"); + InRvous *inbuf = (InRvous *) + memory->smalloc(ncount*sizeof(InRvous),"rigid/small:inbuf"); - // create map hash for storing unique body IDs of my atoms - // key = body ID - // value = index into per-body data structure - // n = # of entries in hash - - hash = new std::map(); - hash->clear(); - - // setup hash - // key = body ID - // value = index into N-length data structure - // n = count of unique bodies my atoms are part of - - n = 0; - for (i = 0; i < nlocal; i++) { - if (!(mask[i] & groupbit)) continue; - if (hash->find(bodyid[i]) == hash->end()) (*hash)[bodyid[i]] = n++; - } - - // bbox = bounding box of each rigid body my atoms are part of - - memory->create(bbox,n,6,"rigid/small:bbox"); - - for (i = 0; i < n; i++) { - bbox[i][0] = bbox[i][2] = bbox[i][4] = BIG; - bbox[i][1] = bbox[i][3] = bbox[i][5] = -BIG; - } - - // pack my atoms into buffer as body ID, unwrapped coords + // setup buf to pass to rendezvous comm + // one BodyMsg datum for each constituent atom + // datum = me, local index of atom, atomID, bodyID, unwrapped coords + // owning proc for each datum = random hash of bodyID double **x = atom->x; - - m = 0; - for (i = 0; i < nlocal; i++) { - if (!(mask[i] & groupbit)) continue; - domain->unmap(x[i],image[i],unwrap); - buf[m++] = bodyid[i]; - buf[m++] = unwrap[0]; - buf[m++] = unwrap[1]; - buf[m++] = unwrap[2]; - } - - // pass buffer around ring of procs - // func = update bbox with atom coords from every proc - // when done, have full bbox for every rigid body my atoms are part of - - comm->ring(m,sizeof(double),buf,1,ring_bbox,NULL,(void *)this); - - // check if any bbox is size 0.0, meaning rigid body is a single particle - - flag = 0; - for (i = 0; i < n; i++) - if (bbox[i][0] == bbox[i][1] && bbox[i][2] == bbox[i][3] && - bbox[i][4] == bbox[i][5]) flag = 1; - MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); - if (flagall) - error->all(FLERR,"One or more rigid bodies are a single particle"); - - // ctr = center pt of each rigid body my atoms are part of - - memory->create(ctr,n,6,"rigid/small:bbox"); - - for (i = 0; i < n; i++) { - ctr[i][0] = 0.5 * (bbox[i][0] + bbox[i][1]); - ctr[i][1] = 0.5 * (bbox[i][2] + bbox[i][3]); - ctr[i][2] = 0.5 * (bbox[i][4] + bbox[i][5]); - } - - // idclose = ID of atom in body closest to center pt (smaller ID if tied) - // rsqclose = distance squared from idclose to center pt - - memory->create(idclose,n,"rigid/small:idclose"); - memory->create(rsqclose,n,"rigid/small:rsqclose"); - - for (i = 0; i < n; i++) rsqclose[i] = BIG; - - // pack my atoms into buffer as body ID, atom ID, unwrapped coords - tagint *tag = atom->tag; + imageint *image = atom->image; m = 0; for (i = 0; i < nlocal; i++) { if (!(mask[i] & groupbit)) continue; - domain->unmap(x[i],image[i],unwrap); - buf[m++] = bodyid[i]; - buf[m++] = ubuf(tag[i]).d; - buf[m++] = unwrap[0]; - buf[m++] = unwrap[1]; - buf[m++] = unwrap[2]; + proclist[m] = hashlittle(&bodyID[i],sizeof(tagint),0) % nprocs; + inbuf[m].me = me; + inbuf[m].ilocal = i; + inbuf[m].atomID = tag[i]; + inbuf[m].bodyID = bodyID[i]; + domain->unmap(x[i],image[i],inbuf[m].x); + m++; } - // pass buffer around ring of procs - // func = update idclose,rsqclose with atom IDs from every proc - // when done, have idclose for every rigid body my atoms are part of + // perform rendezvous operation + // each proc owns random subset of bodies, receives all atoms in the bodies + // func = compute bbox of each body, flag atom closest to geometric center + // when done: each atom has atom ID of owning atom of its body - comm->ring(m,sizeof(double),buf,2,ring_nearest,NULL,(void *)this); + char *buf; + int nreturn = comm->rendezvous(ncount,proclist,(char *) inbuf,sizeof(InRvous), + rendezvous_body,buf,sizeof(OutRvous), + (void *) this); + OutRvous *outbuf = (OutRvous *) buf; + + memory->destroy(proclist); + memory->sfree(inbuf); - // set bodytag of all owned atoms, based on idclose - // find max value of rsqclose across all procs + // set bodytag of all owned atoms based on outbuf info for constituent atoms - double rsqmax = 0.0; - for (i = 0; i < nlocal; i++) { - bodytag[i] = 0; - if (!(mask[i] & groupbit)) continue; - m = hash->find(bodyid[i])->second; - bodytag[i] = idclose[m]; - rsqmax = MAX(rsqmax,rsqclose[m]); - } + for (i = 0; i < nlocal; i++) + if (!(mask[i] & groupbit)) bodytag[i] = 0; - // pack my atoms into buffer as bodytag of owning atom, unwrapped coords + for (m = 0; m < nreturn; m++) + bodytag[outbuf[m].ilocal] = outbuf[m].atomID; - m = 0; - for (i = 0; i < nlocal; i++) { - if (!(mask[i] & groupbit)) continue; - domain->unmap(x[i],image[i],unwrap); - buf[m++] = ubuf(bodytag[i]).d; - buf[m++] = unwrap[0]; - buf[m++] = unwrap[1]; - buf[m++] = unwrap[2]; - } + memory->sfree(outbuf); - // pass buffer around ring of procs - // func = update rsqfar for atoms belonging to bodies I own - // when done, have rsqfar for all atoms in bodies I own - - rsqfar = 0.0; - comm->ring(m,sizeof(double),buf,3,ring_farthest,NULL,(void *)this); - - // find maxextent of rsqfar across all procs + // maxextent = max of rsqfar across all procs // if defined, include molecule->maxextent MPI_Allreduce(&rsqfar,&maxextent,1,MPI_DOUBLE,MPI_MAX,world); @@ -1691,125 +1603,151 @@ void FixRigidSmall::create_bodies(tagint *bodyid) for (int i = 0; i < nmol; i++) maxextent = MAX(maxextent,onemols[i]->maxextent); } +} + +/* ---------------------------------------------------------------------- + process rigid bodies assigned to me + buf = list of N BodyMsg datums +------------------------------------------------------------------------- */ + +int FixRigidSmall::rendezvous_body(int n, char *inbuf, + int *&proclist, char *&outbuf, + void *ptr) +{ + int i,j,m; + double delx,dely,delz,rsq; + int *iclose; + tagint *idclose; + double *x,*xown,*rsqclose; + double **bbox,**ctr; + + FixRigidSmall *frsptr = (FixRigidSmall *) ptr; + Memory *memory = frsptr->memory; + Error *error = frsptr->error; + MPI_Comm world = frsptr->world; + + // setup hash + // ncount = number of bodies assigned to me + // key = body ID + // value = index into Ncount-length data structure + + InRvous *in = (InRvous *) inbuf; + std::map hash; + tagint id; + + int ncount = 0; + for (i = 0; i < n; i++) { + id = in[i].bodyID; + if (hash.find(id) == hash.end()) hash[id] = ncount++; + } + + // bbox = bounding box of each rigid body + + memory->create(bbox,ncount,6,"rigid/small:bbox"); + + for (m = 0; m < ncount; m++) { + bbox[m][0] = bbox[m][2] = bbox[m][4] = BIG; + bbox[m][1] = bbox[m][3] = bbox[m][5] = -BIG; + } + + for (i = 0; i < n; i++) { + m = hash.find(in[i].bodyID)->second; + x = in[i].x; + bbox[m][0] = MIN(bbox[m][0],x[0]); + bbox[m][1] = MAX(bbox[m][1],x[0]); + bbox[m][2] = MIN(bbox[m][2],x[1]); + bbox[m][3] = MAX(bbox[m][3],x[1]); + bbox[m][4] = MIN(bbox[m][4],x[2]); + bbox[m][5] = MAX(bbox[m][5],x[2]); + } + + // check if any bbox is size 0.0, meaning rigid body is a single particle + + int flag = 0; + for (m = 0; m < ncount; m++) + if (bbox[m][0] == bbox[m][1] && bbox[m][2] == bbox[m][3] && + bbox[m][4] == bbox[m][5]) flag = 1; + int flagall; + MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); // sync here? + if (flagall) + error->all(FLERR,"One or more rigid bodies are a single particle"); + + // ctr = geometric center pt of each rigid body + + memory->create(ctr,ncount,3,"rigid/small:bbox"); + + for (m = 0; m < ncount; m++) { + ctr[m][0] = 0.5 * (bbox[m][0] + bbox[m][1]); + ctr[m][1] = 0.5 * (bbox[m][2] + bbox[m][3]); + ctr[m][2] = 0.5 * (bbox[m][4] + bbox[m][5]); + } + + // idclose = atomID closest to center point of each body + + memory->create(idclose,ncount,"rigid/small:idclose"); + memory->create(iclose,ncount,"rigid/small:iclose"); + memory->create(rsqclose,ncount,"rigid/small:rsqclose"); + for (m = 0; m < ncount; m++) rsqclose[m] = BIG; + + for (i = 0; i < n; i++) { + m = hash.find(in[i].bodyID)->second; + x = in[i].x; + delx = x[0] - ctr[m][0]; + dely = x[1] - ctr[m][1]; + delz = x[2] - ctr[m][2]; + rsq = delx*delx + dely*dely + delz*delz; + if (rsq <= rsqclose[m]) { + if (rsq == rsqclose[m] && in[i].atomID > idclose[m]) continue; + iclose[m] = i; + idclose[m] = in[i].atomID; + rsqclose[m] = rsq; + } + } + + // compute rsqfar for all bodies I own + // set rsqfar back in caller + + double rsqfar = 0.0; + + for (int i = 0; i < n; i++) { + m = hash.find(in[i].bodyID)->second; + xown = in[iclose[m]].x; + x = in[i].x; + delx = x[0] - xown[0]; + dely = x[1] - xown[1]; + delz = x[2] - xown[2]; + rsq = delx*delx + dely*dely + delz*delz; + rsqfar = MAX(rsqfar,rsq); + } + + frsptr->rsqfar = rsqfar; + + // pass list of OutRvous datums back to comm->rendezvous + + int nout = n; + memory->create(proclist,nout,"rigid/small:proclist"); + OutRvous *out = (OutRvous *) + memory->smalloc(nout*sizeof(OutRvous),"rigid/small:out"); + + for (int i = 0; i < nout; i++) { + proclist[i] = in[i].me; + out[i].ilocal = in[i].ilocal; + m = hash.find(in[i].bodyID)->second; + out[i].atomID = idclose[m]; + } + + outbuf = (char *) out; // clean up + // Comm::rendezvous will delete proclist and out (outbuf) - delete hash; - memory->destroy(buf); memory->destroy(bbox); memory->destroy(ctr); memory->destroy(idclose); + memory->destroy(iclose); memory->destroy(rsqclose); -} -/* ---------------------------------------------------------------------- - process rigid body atoms from another proc - update bounding box for rigid bodies my atoms are part of -------------------------------------------------------------------------- */ - -void FixRigidSmall::ring_bbox(int n, char *cbuf, void *ptr) -{ - FixRigidSmall *frsptr = (FixRigidSmall *) ptr; - std::map *hash = frsptr->hash; - double **bbox = frsptr->bbox; - - double *buf = (double *) cbuf; - int ndatums = n/4; - - int j,imol; - double *x; - - int m = 0; - for (int i = 0; i < ndatums; i++, m += 4) { - imol = static_cast (buf[m]); - if (hash->find(imol) != hash->end()) { - j = hash->find(imol)->second; - x = &buf[m+1]; - bbox[j][0] = MIN(bbox[j][0],x[0]); - bbox[j][1] = MAX(bbox[j][1],x[0]); - bbox[j][2] = MIN(bbox[j][2],x[1]); - bbox[j][3] = MAX(bbox[j][3],x[1]); - bbox[j][4] = MIN(bbox[j][4],x[2]); - bbox[j][5] = MAX(bbox[j][5],x[2]); - } - } -} - -/* ---------------------------------------------------------------------- - process rigid body atoms from another proc - update nearest atom to body center for rigid bodies my atoms are part of -------------------------------------------------------------------------- */ - -void FixRigidSmall::ring_nearest(int n, char *cbuf, void *ptr) -{ - FixRigidSmall *frsptr = (FixRigidSmall *) ptr; - std::map *hash = frsptr->hash; - double **ctr = frsptr->ctr; - tagint *idclose = frsptr->idclose; - double *rsqclose = frsptr->rsqclose; - - double *buf = (double *) cbuf; - int ndatums = n/5; - - int j,imol; - tagint tag; - double delx,dely,delz,rsq; - double *x; - - int m = 0; - for (int i = 0; i < ndatums; i++, m += 5) { - imol = static_cast (buf[m]); - if (hash->find(imol) != hash->end()) { - j = hash->find(imol)->second; - tag = (tagint) ubuf(buf[m+1]).i; - x = &buf[m+2]; - delx = x[0] - ctr[j][0]; - dely = x[1] - ctr[j][1]; - delz = x[2] - ctr[j][2]; - rsq = delx*delx + dely*dely + delz*delz; - if (rsq <= rsqclose[j]) { - if (rsq == rsqclose[j] && tag > idclose[j]) continue; - idclose[j] = tag; - rsqclose[j] = rsq; - } - } - } -} - -/* ---------------------------------------------------------------------- - process rigid body atoms from another proc - update rsqfar = distance from owning atom to other atom -------------------------------------------------------------------------- */ - -void FixRigidSmall::ring_farthest(int n, char *cbuf, void *ptr) -{ - FixRigidSmall *frsptr = (FixRigidSmall *) ptr; - double **x = frsptr->atom->x; - imageint *image = frsptr->atom->image; - int nlocal = frsptr->atom->nlocal; - - double *buf = (double *) cbuf; - int ndatums = n/4; - - int iowner; - tagint tag; - double delx,dely,delz,rsq; - double *xx; - double unwrap[3]; - - int m = 0; - for (int i = 0; i < ndatums; i++, m += 4) { - tag = (tagint) ubuf(buf[m]).i; - iowner = frsptr->atom->map(tag); - if (iowner < 0 || iowner >= nlocal) continue; - frsptr->domain->unmap(x[iowner],image[iowner],unwrap); - xx = &buf[m+1]; - delx = xx[0] - unwrap[0]; - dely = xx[1] - unwrap[1]; - delz = xx[2] - unwrap[2]; - rsq = delx*delx + dely*dely + delz*delz; - frsptr->rsqfar = MAX(frsptr->rsqfar,rsq); - } + return nout; } /* ---------------------------------------------------------------------- @@ -2472,9 +2410,9 @@ void FixRigidSmall::readfile(int which, double **array, int *inbody) int nlocal = atom->nlocal; - hash = new std::map(); + std::map hash; for (i = 0; i < nlocal; i++) - if (bodyown[i] >= 0) (*hash)[atom->molecule[i]] = bodyown[i]; + if (bodyown[i] >= 0) hash[atom->molecule[i]] = bodyown[i]; // open file and read header @@ -2533,11 +2471,11 @@ void FixRigidSmall::readfile(int which, double **array, int *inbody) id = ATOTAGINT(values[0]); if (id <= 0 || id > maxmol) error->all(FLERR,"Invalid rigid body ID in fix rigid/small file"); - if (hash->find(id) == hash->end()) { + if (hash.find(id) == hash.end()) { buf = next + 1; continue; } - m = (*hash)[id]; + m = hash[id]; inbody[m] = 1; if (which == 0) { @@ -2576,7 +2514,6 @@ void FixRigidSmall::readfile(int which, double **array, int *inbody) delete [] buffer; delete [] values; - delete hash; } /* ---------------------------------------------------------------------- diff --git a/src/RIGID/fix_rigid_small.h b/src/RIGID/fix_rigid_small.h index 3f6826f9bb..a820efcdea 100644 --- a/src/RIGID/fix_rigid_small.h +++ b/src/RIGID/fix_rigid_small.h @@ -23,7 +23,7 @@ FixStyle(rigid/small,FixRigidSmall) #include "fix.h" // replace this later -#include +//#include namespace LAMMPS_NS { @@ -180,13 +180,21 @@ class FixRigidSmall : public Fix { // class data used by ring communication callbacks - std::map *hash; - double **bbox; - double **ctr; - tagint *idclose; - double *rsqclose; double rsqfar; + struct InRvous { + int me,ilocal; + tagint atomID,bodyID; + double x[3]; + }; + + struct OutRvous { + int ilocal; + tagint atomID; + }; + + // local methods + void image_shift(); void set_xv(); void set_v(); @@ -199,11 +207,9 @@ class FixRigidSmall : public Fix { void grow_body(); void reset_atom2body(); - // callback functions for ring communication + // callback function for rendezvous communication - static void ring_bbox(int, char *, void *); - static void ring_nearest(int, char *, void *); - static void ring_farthest(int, char *, void *); + static int rendezvous_body(int, char *, int *&, char *&, void *); // debug diff --git a/src/comm.cpp b/src/comm.cpp index f355a562fc..5fcfa141d0 100644 --- a/src/comm.cpp +++ b/src/comm.cpp @@ -28,6 +28,7 @@ #include "dump.h" #include "group.h" #include "procmap.h" +#include "irregular.h" #include "accelerator_kokkos.h" #include "memory.h" #include "error.h" @@ -725,6 +726,56 @@ void Comm::ring(int n, int nper, void *inbuf, int messtag, memory->destroy(bufcopy); } +/* ---------------------------------------------------------------------- + rendezvous communication operation +------------------------------------------------------------------------- */ + +int Comm::rendezvous(int n, int *proclist, char *inbuf, int insize, + int (*callback)(int, char *, int *&, char *&, void *), + char *&outbuf, int outsize, void *ptr) +{ + // comm data from caller decomposition to rendezvous decomposition + + Irregular *irregular = new Irregular(lmp); + + int n_rvous = irregular->create_data(n,proclist); // add sort + char *inbuf_rvous = (char *) memory->smalloc((bigint) n_rvous*insize, + "rendezvous:inbuf_rvous"); + irregular->exchange_data(inbuf,insize,inbuf_rvous); + + irregular->destroy_data(); + delete irregular; + + // peform rendezvous computation via callback() + // callback() allocates proclist_rvous and outbuf_rvous + + int *proclist_rvous; + char *outbuf_rvous; + + int nout_rvous = + callback(n_rvous,inbuf_rvous,proclist_rvous,outbuf_rvous,ptr); + + memory->sfree(inbuf_rvous); + + // comm data from rendezvous decomposition back to caller + // caller will free outbuf + + irregular = new Irregular(lmp); + + int nout = irregular->create_data(nout_rvous,proclist_rvous); + outbuf = (char *) memory->smalloc((bigint) nout*outsize,"rendezvous:outbuf"); + irregular->exchange_data(outbuf_rvous,outsize,outbuf); + + irregular->destroy_data(); + delete irregular; + memory->destroy(proclist_rvous); + memory->sfree(outbuf_rvous); + + // return number of datums + + return nout; +} + /* ---------------------------------------------------------------------- proc 0 reads Nlines from file into buf and bcasts buf to all procs caller allocates buf to max size needed diff --git a/src/comm.h b/src/comm.h index 2579f9b283..8bb057a0c1 100644 --- a/src/comm.h +++ b/src/comm.h @@ -109,6 +109,10 @@ class Comm : protected Pointers { void ring(int, int, void *, int, void (*)(int, char *, void *), void *, void *, int self = 1); + int rendezvous(int, int *, char *, int, + int (*)(int, char *, int *&, char *&, void *), + char *&, int, void *); + int read_lines_from_file(FILE *, int, int, char *); int read_lines_from_file_universe(FILE *, int, int, char *); diff --git a/src/create_atoms.cpp b/src/create_atoms.cpp index 383c60f1cd..ecbf612f70 100644 --- a/src/create_atoms.cpp +++ b/src/create_atoms.cpp @@ -513,9 +513,6 @@ void CreateAtoms::command(int narg, char **arg) if (domain->triclinic) domain->lamda2x(atom->nlocal); } - MPI_Barrier(world); - double time2 = MPI_Wtime(); - // clean up delete ranmol; @@ -525,21 +522,6 @@ void CreateAtoms::command(int narg, char **arg) delete [] ystr; delete [] zstr; - // print status - - if (comm->me == 0) { - if (screen) { - fprintf(screen,"Created " BIGINT_FORMAT " atoms\n", - atom->natoms-natoms_previous); - fprintf(screen," Time spent = %g secs\n",time2-time1); - } - if (logfile) { - fprintf(logfile,"Created " BIGINT_FORMAT " atoms\n", - atom->natoms-natoms_previous); - fprintf(logfile," Time spent = %g secs\n",time2-time1); - } - } - // for MOLECULE mode: // create special bond lists for molecular systems, // but not for atom style template @@ -549,6 +531,25 @@ void CreateAtoms::command(int narg, char **arg) if (atom->molecular == 1 && onemol->bondflag && !onemol->specialflag) { Special special(lmp); special.build(); + + } + } + + // print status + + MPI_Barrier(world); + double time2 = MPI_Wtime(); + + if (comm->me == 0) { + if (screen) { + fprintf(screen,"Created " BIGINT_FORMAT " atoms\n", + atom->natoms-natoms_previous); + fprintf(screen," create_atoms CPU = %g secs\n",time2-time1); + } + if (logfile) { + fprintf(logfile,"Created " BIGINT_FORMAT " atoms\n", + atom->natoms-natoms_previous); + fprintf(logfile," create_atoms CPU = %g secs\n",time2-time1); } } } diff --git a/src/hashlittle.cpp b/src/hashlittle.cpp new file mode 100644 index 0000000000..be930217a1 --- /dev/null +++ b/src/hashlittle.cpp @@ -0,0 +1,333 @@ +// Hash function hashlittle() +// from lookup3.c, by Bob Jenkins, May 2006, Public Domain +// bob_jenkins@burtleburtle.net + +#include "stddef.h" +#include "stdint.h" + +#define HASH_LITTLE_ENDIAN 1 // Intel and AMD are little endian + +#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) + +/* +------------------------------------------------------------------------------- +mix -- mix 3 32-bit values reversibly. + +This is reversible, so any information in (a,b,c) before mix() is +still in (a,b,c) after mix(). + +If four pairs of (a,b,c) inputs are run through mix(), or through +mix() in reverse, there are at least 32 bits of the output that +are sometimes the same for one pair and different for another pair. +This was tested for: +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that +satisfy this are + 4 6 8 16 19 4 + 9 15 3 18 27 15 + 14 9 3 7 17 3 +Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing +for "differ" defined as + with a one-bit base and a two-bit delta. I +used http://burtleburtle.net/bob/hash/avalanche.html to choose +the operations, constants, and arrangements of the variables. + +This does not achieve avalanche. There are input bits of (a,b,c) +that fail to affect some output bits of (a,b,c), especially of a. The +most thoroughly mixed value is c, but it doesn't really even achieve +avalanche in c. + +This allows some parallelism. Read-after-writes are good at doubling +the number of bits affected, so the goal of mixing pulls in the opposite +direction as the goal of parallelism. I did what I could. Rotates +seem to cost as much as shifts on every machine I could lay my hands +on, and rotates are much kinder to the top and bottom bits, so I used +rotates. +------------------------------------------------------------------------------- +*/ +#define mix(a,b,c) \ +{ \ + a -= c; a ^= rot(c, 4); c += b; \ + b -= a; b ^= rot(a, 6); a += c; \ + c -= b; c ^= rot(b, 8); b += a; \ + a -= c; a ^= rot(c,16); c += b; \ + b -= a; b ^= rot(a,19); a += c; \ + c -= b; c ^= rot(b, 4); b += a; \ +} + +/* +------------------------------------------------------------------------------- +final -- final mixing of 3 32-bit values (a,b,c) into c + +Pairs of (a,b,c) values differing in only a few bits will usually +produce values of c that look totally different. This was tested for +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +These constants passed: + 14 11 25 16 4 14 24 + 12 14 25 16 4 14 24 +and these came close: + 4 8 15 26 3 22 24 + 10 8 15 26 3 22 24 + 11 8 15 26 3 22 24 +------------------------------------------------------------------------------- +*/ +#define final(a,b,c) \ +{ \ + c ^= b; c -= rot(b,14); \ + a ^= c; a -= rot(c,11); \ + b ^= a; b -= rot(a,25); \ + c ^= b; c -= rot(b,16); \ + a ^= c; a -= rot(c,4); \ + b ^= a; b -= rot(a,14); \ + c ^= b; c -= rot(b,24); \ +} + +/* +------------------------------------------------------------------------------- +hashlittle() -- hash a variable-length key into a 32-bit value + k : the key (the unaligned variable-length array of bytes) + length : the length of the key, counting by bytes + initval : can be any 4-byte value +Returns a 32-bit value. Every bit of the key affects every bit of +the return value. Two keys differing by one or two bits will have +totally different hash values. + +The best hash table sizes are powers of 2. There is no need to do +mod a prime (mod is sooo slow!). If you need less than 32 bits, +use a bitmask. For example, if you need only 10 bits, do + h = (h & hashmask(10)); +In which case, the hash table should have hashsize(10) elements. + +If you are hashing n strings (uint8_t **)k, do it like this: + for (i=0, h=0; i 12) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 12; + k += 3; + } + + /*----------------------------- handle the last (probably partial) block */ + /* + * "k[2]&0xffffff" actually reads beyond the end of the string, but + * then masks off the part it's not allowed to read. Because the + * string is aligned, the masked-off tail is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But VALGRIND will + * still catch it and complain. The masking trick does make the hash + * noticably faster for short strings (like English words). + */ +#ifndef VALGRIND + + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; + case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; + case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=k[1]&0xffffff; a+=k[0]; break; + case 6 : b+=k[1]&0xffff; a+=k[0]; break; + case 5 : b+=k[1]&0xff; a+=k[0]; break; + case 4 : a+=k[0]; break; + case 3 : a+=k[0]&0xffffff; break; + case 2 : a+=k[0]&0xffff; break; + case 1 : a+=k[0]&0xff; break; + case 0 : return c; /* zero length strings require no mixing */ + } + +#else /* make valgrind happy */ + + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]; break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ + case 1 : a+=k8[0]; break; + case 0 : return c; + } + +#endif /* !valgrind */ + + } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { + const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ + const uint8_t *k8; + + /*--------------- all but last block: aligned reads and different mixing */ + while (length > 12) + { + a += k[0] + (((uint32_t)k[1])<<16); + b += k[2] + (((uint32_t)k[3])<<16); + c += k[4] + (((uint32_t)k[5])<<16); + mix(a,b,c); + length -= 12; + k += 6; + } + + /*----------------------------- handle the last (probably partial) block */ + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[4]+(((uint32_t)k[5])<<16); + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=k[4]; + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=k[2]; + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=k[0]; + break; + case 1 : a+=k8[0]; + break; + case 0 : return c; /* zero length requires no mixing */ + } + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = (const uint8_t *)key; + + /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + a += ((uint32_t)k[1])<<8; + a += ((uint32_t)k[2])<<16; + a += ((uint32_t)k[3])<<24; + b += k[4]; + b += ((uint32_t)k[5])<<8; + b += ((uint32_t)k[6])<<16; + b += ((uint32_t)k[7])<<24; + c += k[8]; + c += ((uint32_t)k[9])<<8; + c += ((uint32_t)k[10])<<16; + c += ((uint32_t)k[11])<<24; + mix(a,b,c); + length -= 12; + k += 12; + } + + /*-------------------------------- last block: affect all 32 bits of (c) */ + switch(length) /* all the case statements fall through */ + { + case 12: c+=((uint32_t)k[11])<<24; + case 11: c+=((uint32_t)k[10])<<16; + case 10: c+=((uint32_t)k[9])<<8; + case 9 : c+=k[8]; + case 8 : b+=((uint32_t)k[7])<<24; + case 7 : b+=((uint32_t)k[6])<<16; + case 6 : b+=((uint32_t)k[5])<<8; + case 5 : b+=k[4]; + case 4 : a+=((uint32_t)k[3])<<24; + case 3 : a+=((uint32_t)k[2])<<16; + case 2 : a+=((uint32_t)k[1])<<8; + case 1 : a+=k[0]; + break; + case 0 : return c; + } + } + + final(a,b,c); + return c; + +#else /* PURIFY_HATES_HASHLITTLE */ +/* I don't know what it is about Jenkins' hashlittle function, but + * it drives purify insane, even with VALGRIND defined. It makes + * purify unusable!! The code execution doesn't even make sense. + * Below is a (probably) weaker hash function that at least allows + * testing with purify. + */ +#define MAXINT_DIV_PHI 11400714819323198485U + + uint32_t h, rest, *p, bytes, num_bytes; + char *byteptr; + + num_bytes = length; + + /* First hash the uint32_t-sized portions of the key */ + h = 0; + for (p = (uint32_t *)key, bytes=num_bytes; + bytes >= (uint32_t) sizeof(uint32_t); + bytes-=sizeof(uint32_t), p++){ + h = (h^(*p))*MAXINT_DIV_PHI; + } + + /* Then take care of the remaining bytes, if any */ + rest = 0; + for (byteptr = (char *)p; bytes > 0; bytes--, byteptr++){ + rest = (rest<<8) | (*byteptr); + } + + /* If extra bytes, merge the two parts */ + if (rest) + h = (h^rest)*MAXINT_DIV_PHI; + + return h; +#endif /* PURIFY_HATES_HASHLITTLE */ +} diff --git a/src/hashlittle.h b/src/hashlittle.h new file mode 100644 index 0000000000..7b57a35c80 --- /dev/null +++ b/src/hashlittle.h @@ -0,0 +1,5 @@ +// Hash function hashlittle() +// from lookup3.c, by Bob Jenkins, May 2006, Public Domain +// bob_jenkins@burtleburtle.net + +uint32_t hashlittle(const void *key, size_t length, uint32_t); diff --git a/src/irregular.cpp b/src/irregular.cpp index 9c15f135d0..60025249cf 100644 --- a/src/irregular.cpp +++ b/src/irregular.cpp @@ -501,7 +501,8 @@ int compare_standalone(const int i, const int j, void *ptr) void Irregular::exchange_atom(double *sendbuf, int *sizes, double *recvbuf) { - int i,m,n,offset,count; + int i,m,n,count; + bigint offset; // post all receives @@ -739,11 +740,13 @@ int Irregular::create_data(int n, int *proclist, int sortflag) void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf) { - int i,m,n,offset,count; + int i,n,count; + bigint m; // these 2 lines enable send/recv buf to be larger than 2 GB + char *dest; // post all receives, starting after self copies - offset = num_self*nbytes; + bigint offset = num_self*nbytes; for (int irecv = 0; irecv < nrecv_proc; irecv++) { MPI_Irecv(&recvbuf[offset],num_recv[irecv]*nbytes,MPI_CHAR, proc_recv[irecv],0,world,&request[irecv]); @@ -765,18 +768,22 @@ void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf) n = 0; for (int isend = 0; isend < nsend_proc; isend++) { count = num_send[isend]; + dest = buf; for (i = 0; i < count; i++) { m = index_send[n++]; - memcpy(&buf[i*nbytes],&sendbuf[m*nbytes],nbytes); + memcpy(dest,&sendbuf[m*nbytes],nbytes); + dest += nbytes; } MPI_Send(buf,count*nbytes,MPI_CHAR,proc_send[isend],0,world); } // copy datums to self, put at beginning of recvbuf + dest = recvbuf; for (i = 0; i < num_self; i++) { m = index_self[i]; - memcpy(&recvbuf[i*nbytes],&sendbuf[m*nbytes],nbytes); + memcpy(dest,&sendbuf[m*nbytes],nbytes); + dest += nbytes; } // wait on all incoming messages diff --git a/src/read_data.cpp b/src/read_data.cpp index 373ba30151..d7bd298e7b 100644 --- a/src/read_data.cpp +++ b/src/read_data.cpp @@ -120,6 +120,9 @@ void ReadData::command(int narg, char **arg) { if (narg < 1) error->all(FLERR,"Illegal read_data command"); + MPI_Barrier(world); + double time1 = MPI_Wtime; + // optional args addflag = NONE; @@ -905,6 +908,18 @@ void ReadData::command(int narg, char **arg) force->kspace = saved_kspace; } + + // total time + + MPI_Barrier(world); + double time2 = MPI_Wtime; + + if (comm->me == 0) { + if (screen) + fprintf(screen," read_atoms CPU = %g secs\n",time2-time1); + if (logfile) + fprintf(logfile," read_atoms CPU = %g secs\n",time2-time1); + } } /* ---------------------------------------------------------------------- diff --git a/src/read_restart.cpp b/src/read_restart.cpp index 82028d8316..a338a843d7 100644 --- a/src/read_restart.cpp +++ b/src/read_restart.cpp @@ -81,6 +81,9 @@ void ReadRestart::command(int narg, char **arg) if (domain->box_exist) error->all(FLERR,"Cannot read_restart after simulation box is defined"); + MPI_Barrier(world); + double time1 = MPI_Wtime; + MPI_Comm_rank(world,&me); MPI_Comm_size(world,&nprocs); @@ -562,6 +565,18 @@ void ReadRestart::command(int narg, char **arg) Special special(lmp); special.build(); } + + // total time + + MPI_Barrier(world); + double time2 = MPI_Wtime; + + if (comm->me == 0) { + if (screen) + fprintf(screen," read_restart CPU = %g secs\n",time2-time1); + if (logfile) + fprintf(logfile," read_restart CPU = %g secs\n",time2-time1); + } } /* ---------------------------------------------------------------------- diff --git a/src/replicate.cpp b/src/replicate.cpp index cdadf1fd1f..3c8f4a8aee 100644 --- a/src/replicate.cpp +++ b/src/replicate.cpp @@ -76,7 +76,7 @@ void Replicate::command(int narg, char **arg) if (atom->nextra_grow || atom->nextra_restart || atom->nextra_store) error->all(FLERR,"Cannot replicate with fixes that store atom quantities"); - // Record wall time for atom replication + // record wall time for atom replication MPI_Barrier(world); double time1 = MPI_Wtime(); @@ -762,15 +762,15 @@ void Replicate::command(int narg, char **arg) special.build(); } - // Wall time + // total time MPI_Barrier(world); double time2 = MPI_Wtime(); if (me == 0) { if (screen) - fprintf(screen," Time spent = %g secs\n",time2-time1); + fprintf(screen," replicate CPU = %g secs\n",time2-time1); if (logfile) - fprintf(logfile," Time spent = %g secs\n",time2-time1); + fprintf(logfile," replicate CPU = %g secs\n",time2-time1); } } diff --git a/src/special.cpp b/src/special.cpp index fccc930353..a18c597765 100644 --- a/src/special.cpp +++ b/src/special.cpp @@ -21,9 +21,10 @@ #include "modify.h" #include "fix.h" #include "accelerator_kokkos.h" +#include "hashlittle.h" +#include "atom_masks.h" #include "memory.h" #include "error.h" -#include "atom_masks.h" using namespace LAMMPS_NS; @@ -54,11 +55,12 @@ Special::~Special() void Special::build() { - int i,j,k,size; - int max,maxall,nbuf; - tagint *buf; + int i,j,k,m,n,size,proc; + int max,maxall; + char *buf; MPI_Barrier(world); + double time1 = MPI_Wtime(); int nlocal = atom->nlocal; @@ -87,99 +89,88 @@ void Special::build() // ----------------------------------------------------- // compute nspecial[i][0] = # of 1-2 neighbors of atom i + // create onetwo[i] = list of 1-2 neighbors for atom i // ----------------------------------------------------- - // bond partners stored by atom itself + // ncount = # of my datums to send (newton or newton off) + // include nlocal datums with owner of each atom - for (i = 0; i < nlocal; i++) nspecial[i][0] = num_bond[i]; + int newton_bond = force->newton_bond; - // if newton_bond off, then done - // else only counted 1/2 of all bonds, so count other half + int ncount = 0; + for (i = 0; i < nlocal; i++) ncount += num_bond[i]; + if (newton_bond) ncount *= 2; + ncount += nlocal; - if (force->newton_bond) { + int *proclist; + memory->create(proclist,ncount,"special:proclist"); + InRvous *inbuf = (InRvous *) + memory->smalloc((bigint) ncount*sizeof(InRvous),"special:inbuf"); - // nbufmax = largest buffer needed to hold info from any proc - // info for each atom = global tag of 2nd atom in each bond + // setup input buf to rendezvous comm + // one datum for each owned atom: datum = proc, atomID + // one datum for each bond partner: datum = atomID, bond partner ID + // owning proc for each datum = random hash of atomID - nbuf = 0; - for (i = 0; i < nlocal; i++) nbuf += num_bond[i]; - memory->create(buf,nbuf,"special:buf"); + m = 0; + for (i = 0; i < nlocal; i++) { + proc = hashlittle(&tag[i],sizeof(tagint),0) % nprocs; + proclist[m] = proc; + inbuf[m].me = me; + inbuf[m].atomID = tag[i]; + inbuf[m].partnerID = 0; + m++; - // fill buffer with global tags of bond partners of my atoms - - size = 0; - for (i = 0; i < nlocal; i++) - for (j = 0; j < num_bond[i]; j++) - buf[size++] = bond_atom[i][j]; - - // cycle buffer around ring of procs back to self - // when receive buffer, scan tags for atoms I own - // when find one, increment nspecial count for that atom - - comm->ring(size,sizeof(tagint),buf,1,ring_one,NULL,(void *)this); - - memory->destroy(buf); + for (j = 0; j < num_bond[i]; j++) { + proclist[m] = proc; + inbuf[m].me = -1; + inbuf[m].atomID = tag[i]; + inbuf[m].partnerID = bond_atom[i][j]; + m++; + } + if (newton_bond) { + for (j = 0; j < num_bond[i]; j++) { + proclist[m] = hashlittle(&bond_atom[i][j],sizeof(tagint),0) % nprocs; + inbuf[m].me = -1; + inbuf[m].atomID = bond_atom[i][j]; + inbuf[m].partnerID = tag[i]; + m++; + } + } } - // ---------------------------------------------------- - // create onetwo[i] = list of 1-2 neighbors for atom i - // ---------------------------------------------------- + // perform rendezvous operation + // each proc owns random subset of atoms, receives all their bond partners - max = 0; - for (i = 0; i < nlocal; i++) max = MAX(max,nspecial[i][0]); + int nreturn = comm->rendezvous(ncount,proclist,(char *) inbuf,sizeof(InRvous), + rendezvous_1234, + buf,sizeof(OutRvous),(void *) this); + OutRvous *outbuf = (OutRvous *) buf; - MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); + memory->destroy(proclist); + memory->sfree(inbuf); + + // set nspecial[0] and onetwo for all owned atoms based on output info + + MPI_Allreduce(&max_rvous,&maxall,1,MPI_INT,MPI_MAX,world); + memory->create(onetwo,nlocal,maxall,"special:onetwo"); + + for (i = 0; i < nlocal; i++) nspecial[i][0] = 0; + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + onetwo[i][nspecial[i][0]++] = outbuf[m].partnerID; + } + + memory->sfree(outbuf); + + // compute and print max # of 1-2 neighbors if (me == 0) { if (screen) fprintf(screen," %d = max # of 1-2 neighbors\n",maxall); if (logfile) fprintf(logfile," %d = max # of 1-2 neighbors\n",maxall); } - memory->create(onetwo,nlocal,maxall,"special:onetwo"); - - // count = accumulating counter - - memory->create(count,nlocal,"special:count"); - for (i = 0; i < nlocal; i++) count[i] = 0; - - // add bond partners stored by atom to onetwo list - - for (i = 0; i < nlocal; i++) - for (j = 0; j < num_bond[i]; j++) - onetwo[i][count[i]++] = bond_atom[i][j]; - - // if newton_bond off, then done - // else only stored 1/2 of all bonds, so store other half - - if (force->newton_bond) { - - // nbufmax = largest buffer needed to hold info from any proc - // info for each atom = 2 global tags in each bond - - nbuf = 0; - for (i = 0; i < nlocal; i++) nbuf += 2*num_bond[i]; - memory->create(buf,nbuf,"special:buf"); - - // fill buffer with global tags of both atoms in bond - - size = 0; - for (i = 0; i < nlocal; i++) - for (j = 0; j < num_bond[i]; j++) { - buf[size++] = tag[i]; - buf[size++] = bond_atom[i][j]; - } - - // cycle buffer around ring of procs back to self - // when receive buffer, scan 2nd-atom tags for atoms I own - // when find one, add 1st-atom tag to onetwo list for 2nd atom - - comm->ring(size,sizeof(tagint),buf,2,ring_two,NULL,(void *)this); - - memory->destroy(buf); - } - - memory->destroy(count); - // ----------------------------------------------------- // done if special_bond weights for 1-3, 1-4 are set to 1.0 // ----------------------------------------------------- @@ -189,114 +180,83 @@ void Special::build() dedup(); combine(); fix_alteration(); + timer_output(time1); return; } // ----------------------------------------------------- // compute nspecial[i][1] = # of 1-3 neighbors of atom i + // create onethree[i] = list of 1-3 neighbors for atom i // ----------------------------------------------------- - // nbufmax = largest buffer needed to hold info from any proc - // info for each atom = 2 scalars + list of 1-2 neighbors + // ncount = # of my datums to send + // include nlocal datums with owner of each atom - nbuf = 0; - for (i = 0; i < nlocal; i++) nbuf += 2 + nspecial[i][0]; - memory->create(buf,nbuf,"special:buf"); + ncount = nlocal; + for (i = 0; i < nlocal; i++) ncount += nspecial[i][0]*(nspecial[i][0]-1); - // fill buffer with: - // (1) = counter for 1-3 neighbors, initialized to 0 - // (2) = # of 1-2 neighbors - // (3:N) = list of 1-2 neighbors + memory->create(proclist,ncount,"special:proclist"); + inbuf = (InRvous *) + memory->smalloc((bigint) ncount*sizeof(InRvous),"special:inbuf"); - size = 0; + // setup input buf to rendezvous comm + // one datum for each owned atom: datum = proc, atomID + // one datum for each bond partner: datum = atomID, bond partner ID + // owning proc for each datum = random hash of atomID + + m = 0; for (i = 0; i < nlocal; i++) { - buf[size++] = 0; - buf[size++] = nspecial[i][0]; - for (j = 0; j < nspecial[i][0]; j++) buf[size++] = onetwo[i][j]; + proclist[m] = hashlittle(&tag[i],sizeof(tagint),0) % nprocs; + inbuf[m].me = me; + inbuf[m].atomID = tag[i]; + inbuf[m].partnerID = 0; + m++; + + for (j = 0; j < nspecial[i][0]; j++) { + proc = hashlittle(&onetwo[i][j],sizeof(tagint),0) % nprocs; + for (k = 0; k < nspecial[i][0]; k++) { + if (j == k) continue; + proclist[m] = proc; + inbuf[m].me = -1; + inbuf[m].atomID = onetwo[i][j]; + inbuf[m].partnerID = onetwo[i][k]; + m++; + } + } } - // cycle buffer around ring of procs back to self - // when receive buffer, scan list of 1-2 neighbors for atoms I own - // when find one, increment 1-3 count by # of 1-2 neighbors of my atom, - // subtracting one since my list will contain original atom + // perform rendezvous operation + // each proc owns random subset of atoms, receives all their bond partners - comm->ring(size,sizeof(tagint),buf,3,ring_three,buf,(void *)this); + nreturn = comm->rendezvous(ncount,proclist,(char *) inbuf,sizeof(InRvous), + rendezvous_1234, + buf,sizeof(OutRvous),(void *) this); + outbuf = (OutRvous *) buf; - // extract count from buffer that has cycled back to me - // nspecial[i][1] = # of 1-3 neighbors of atom i + memory->destroy(proclist); + memory->sfree(inbuf); - j = 0; - for (i = 0; i < nlocal; i++) { - nspecial[i][1] = buf[j]; - j += 2 + nspecial[i][0]; + // set nspecial[1] and onethree for all owned atoms based on output info + + MPI_Allreduce(&max_rvous,&maxall,1,MPI_INT,MPI_MAX,world); + memory->create(onethree,nlocal,maxall,"special:onethree"); + + for (i = 0; i < nlocal; i++) nspecial[i][1] = 0; + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + onethree[i][nspecial[i][1]++] = outbuf[m].partnerID; } - memory->destroy(buf); + memory->destroy(outbuf); - // ---------------------------------------------------- - // create onethree[i] = list of 1-3 neighbors for atom i - // ---------------------------------------------------- - - max = 0; - for (i = 0; i < nlocal; i++) max = MAX(max,nspecial[i][1]); - MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); + // compute and print max # of 1-3 neighbors if (me == 0) { if (screen) fprintf(screen," %d = max # of 1-3 neighbors\n",maxall); if (logfile) fprintf(logfile," %d = max # of 1-3 neighbors\n",maxall); } - memory->create(onethree,nlocal,maxall,"special:onethree"); - - // nbufmax = largest buffer needed to hold info from any proc - // info for each atom = 4 scalars + list of 1-2 neighs + list of 1-3 neighs - - nbuf = 0; - for (i = 0; i < nlocal; i++) nbuf += 4 + nspecial[i][0] + nspecial[i][1]; - memory->create(buf,nbuf,"special:buf"); - - // fill buffer with: - // (1) = global tag of original atom - // (2) = # of 1-2 neighbors - // (3) = # of 1-3 neighbors - // (4) = counter for 1-3 neighbors, initialized to 0 - // (5:N) = list of 1-2 neighbors - // (N+1:2N) space for list of 1-3 neighbors - - size = 0; - for (i = 0; i < nlocal; i++) { - buf[size++] = tag[i]; - buf[size++] = nspecial[i][0]; - buf[size++] = nspecial[i][1]; - buf[size++] = 0; - for (j = 0; j < nspecial[i][0]; j++) buf[size++] = onetwo[i][j]; - size += nspecial[i][1]; - } - - // cycle buffer around ring of procs back to self - // when receive buffer, scan list of 1-2 neighbors for atoms I own - // when find one, add its neighbors to 1-3 list - // increment the count in buf(i+4) - // exclude the atom whose tag = original - // this process may include duplicates but they will be culled later - - comm->ring(size,sizeof(tagint),buf,4,ring_four,buf,(void *)this); - - // fill onethree with buffer values that have been returned to me - // sanity check: accumulated buf[i+3] count should equal - // nspecial[i][1] for each atom - - j = 0; - for (i = 0; i < nlocal; i++) { - if (buf[j+3] != nspecial[i][1]) - error->one(FLERR,"1-3 bond count is inconsistent"); - j += 4 + nspecial[i][0]; - for (k = 0; k < nspecial[i][1]; k++) - onethree[i][k] = buf[j++]; - } - - memory->destroy(buf); - // done if special_bond weights for 1-4 are set to 1.0 if (force->special_lj[3] == 1.0 && force->special_coul[3] == 1.0) { @@ -304,117 +264,92 @@ void Special::build() if (force->special_angle) angle_trim(); combine(); fix_alteration(); + timer_output(time1); return; } // ----------------------------------------------------- // compute nspecial[i][2] = # of 1-4 neighbors of atom i + // create onefour[i] = list of 1-4 neighbors for atom i // ----------------------------------------------------- - // nbufmax = largest buffer needed to hold info from any proc - // info for each atom = 2 scalars + list of 1-3 neighbors + // ncount = # of my datums to send + // include nlocal datums with owner of each atom - nbuf = 0; - for (i = 0; i < nlocal; i++) nbuf += 2 + nspecial[i][1]; - memory->create(buf,nbuf,"special:buf"); + ncount = nlocal; + for (i = 0; i < nlocal; i++) ncount += nspecial[i][1]*nspecial[i][0]; - // fill buffer with: - // (1) = counter for 1-4 neighbors, initialized to 0 - // (2) = # of 1-3 neighbors - // (3:N) = list of 1-3 neighbors + memory->create(proclist,ncount,"special:proclist"); + inbuf = (InRvous *) + memory->smalloc((bigint) ncount*sizeof(InRvous),"special:inbuf"); - size = 0; + // setup input buf to rendezvous comm + // one datum for each owned atom: datum = proc, atomID + // one datum for each partner: datum = atomID, bond partner ID + // owning proc for each datum = random hash of atomID + + m = 0; for (i = 0; i < nlocal; i++) { - buf[size++] = 0; - buf[size++] = nspecial[i][1]; - for (j = 0; j < nspecial[i][1]; j++) buf[size++] = onethree[i][j]; + proclist[m] = hashlittle(&tag[i],sizeof(tagint),0) % nprocs; + inbuf[m].me = me; + inbuf[m].atomID = tag[i]; + inbuf[m].partnerID = 0; + m++; + + for (j = 0; j < nspecial[i][1]; j++) { + proc = hashlittle(&onethree[i][j],sizeof(tagint),0) % nprocs; + for (k = 0; k < nspecial[i][0]; k++) { + proclist[m] = proc; + inbuf[m].me = -1; + inbuf[m].atomID = onethree[i][j]; + inbuf[m].partnerID = onetwo[i][k]; + m++; + } + } } - // cycle buffer around ring of procs back to self - // when receive buffer, scan list of 1-3 neighbors for atoms I own - // when find one, increment 1-4 count by # of 1-2 neighbors of my atom - // may include duplicates and original atom but they will be culled later + // perform rendezvous operation + // each proc owns random subset of bodies, receives all atoms in the bodies + // func = compute bbox of each body, flag atom closest to geometric center + // when done: each atom has atom ID of owning atom of its body - comm->ring(size,sizeof(tagint),buf,5,ring_five,buf,(void *)this); + nreturn = comm->rendezvous(ncount,proclist,(char *) inbuf,sizeof(InRvous), + rendezvous_1234, + buf,sizeof(OutRvous),(void *) this); + outbuf = (OutRvous *) buf; - // extract count from buffer that has cycled back to me - // nspecial[i][2] = # of 1-4 neighbors of atom i + memory->destroy(proclist); + memory->sfree(inbuf); - j = 0; - for (i = 0; i < nlocal; i++) { - nspecial[i][2] = buf[j]; - j += 2 + nspecial[i][1]; + // set nspecial[2] and onefour for all owned atoms based on output info + + MPI_Allreduce(&max_rvous,&maxall,1,MPI_INT,MPI_MAX,world); + memory->create(onefour,nlocal,maxall,"special:onefour"); + + for (i = 0; i < nlocal; i++) nspecial[i][2] = 0; + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + onefour[i][nspecial[i][2]++] = outbuf[m].partnerID; } - memory->destroy(buf); + memory->destroy(outbuf); - // ---------------------------------------------------- - // create onefour[i] = list of 1-4 neighbors for atom i - // ---------------------------------------------------- - - max = 0; - for (i = 0; i < nlocal; i++) max = MAX(max,nspecial[i][2]); - MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); + // compute and print max # of 1-4 neighbors if (me == 0) { if (screen) fprintf(screen," %d = max # of 1-4 neighbors\n",maxall); if (logfile) fprintf(logfile," %d = max # of 1-4 neighbors\n",maxall); } - memory->create(onefour,nlocal,maxall,"special:onefour"); - - // nbufmax = largest buffer needed to hold info from any proc - // info for each atom = 3 scalars + list of 1-3 neighs + list of 1-4 neighs - - nbuf = 0; - for (i = 0; i < nlocal; i++) - nbuf += 3 + nspecial[i][1] + nspecial[i][2]; - memory->create(buf,nbuf,"special:buf"); - - // fill buffer with: - // (1) = # of 1-3 neighbors - // (2) = # of 1-4 neighbors - // (3) = counter for 1-4 neighbors, initialized to 0 - // (4:N) = list of 1-3 neighbors - // (N+1:2N) space for list of 1-4 neighbors - - size = 0; - for (i = 0; i < nlocal; i++) { - buf[size++] = nspecial[i][1]; - buf[size++] = nspecial[i][2]; - buf[size++] = 0; - for (j = 0; j < nspecial[i][1]; j++) buf[size++] = onethree[i][j]; - size += nspecial[i][2]; - } - - // cycle buffer around ring of procs back to self - // when receive buffer, scan list of 1-3 neighbors for atoms I own - // when find one, add its neighbors to 1-4 list - // incrementing the count in buf(i+4) - // this process may include duplicates but they will be culled later - - comm->ring(size,sizeof(tagint),buf,6,ring_six,buf,(void *)this); - - // fill onefour with buffer values that have been returned to me - // sanity check: accumulated buf[i+2] count should equal - // nspecial[i][2] for each atom - - j = 0; - for (i = 0; i < nlocal; i++) { - if (buf[j+2] != nspecial[i][2]) - error->one(FLERR,"1-4 bond count is inconsistent"); - j += 3 + nspecial[i][1]; - for (k = 0; k < nspecial[i][2]; k++) - onefour[i][k] = buf[j++]; - } - - memory->destroy(buf); + // finish processing the onetwo, onethree, onefour lists dedup(); if (force->special_angle) angle_trim(); if (force->special_dihedral) dihedral_trim(); combine(); fix_alteration(); + timer_output(time1); } /* ---------------------------------------------------------------------- @@ -663,17 +598,19 @@ void Special::combine() void Special::angle_trim() { - int i,j,m,n; + int i,j,m,n,proc,index; int *num_angle = atom->num_angle; int *num_dihedral = atom->num_dihedral; tagint **angle_atom1 = atom->angle_atom1; + tagint **angle_atom2 = atom->angle_atom2; tagint **angle_atom3 = atom->angle_atom3; tagint **dihedral_atom1 = atom->dihedral_atom1; tagint **dihedral_atom2 = atom->dihedral_atom2; tagint **dihedral_atom3 = atom->dihedral_atom3; tagint **dihedral_atom4 = atom->dihedral_atom4; int **nspecial = atom->nspecial; + tagint *tag = atom->tag; int nlocal = atom->nlocal; // stats on old 1-3 neighbor counts @@ -697,68 +634,125 @@ void Special::angle_trim() if ((num_angle && atom->nangles) || (num_dihedral && atom->ndihedrals)) { - // dflag = flag for 1-3 neighs of all owned atoms - - int maxcount = 0; - for (i = 0; i < nlocal; i++) maxcount = MAX(maxcount,nspecial[i][1]); - memory->create(dflag,nlocal,maxcount,"special::dflag"); + // ncount = # of my datums to send in 3 parts for each owned atom + // proc owner, onethree list, angle end points + // angle end points are from angle list and 1-3 and 2-4 pairs in dihedrals + // latter is only for angles or dihedrlas where I own atom2 + int ncount = nlocal; + for (i = 0; i < nlocal; i++) ncount += nspecial[i][1]; for (i = 0; i < nlocal; i++) { - n = nspecial[i][1]; - for (j = 0; j < n; j++) dflag[i][j] = 0; + for (j = 0; j < num_angle[i]; j++) { + index = atom->map(angle_atom2[i][j]); + if (index >= 0 && index < nlocal) ncount += 2; + } + for (j = 0; j < num_dihedral[i]; j++) { + index = atom->map(dihedral_atom2[i][j]); + if (index >= 0 && index < nlocal) ncount += 4; + } } - // nbufmax = largest buffer needed to hold info from any proc - // info for each atom = list of 1,3 atoms in each angle stored by atom - // and list of 1,3 and 2,4 atoms in each dihedral stored by atom + int *proclist; + memory->create(proclist,ncount,"special:proclist"); + InRvous *inbuf = (InRvous *) + memory->smalloc((bigint) ncount*sizeof(InRvous),"special:inbuf"); - int nbuf = 0; + // setup input buf to rendezvous comm + // one datum for each owned atom: datum = proc, atomID + // sent to owner of atomID + // one datum for each 1-4 partner: datum = atomID, ID + // sent to owner of atomID + // two datums for each dihedral 1-4 endatoms : datum = atomID, ID + // sent to owner of atomID + + m = 0; for (i = 0; i < nlocal; i++) { - if (num_angle && atom->nangles) nbuf += 2*num_angle[i]; - if (num_dihedral && atom->ndihedrals) nbuf += 2*2*num_dihedral[i]; - } - int *buf; - memory->create(buf,nbuf,"special:buf"); + proc = hashlittle(&tag[i],sizeof(tagint),0) % nprocs; + proclist[m] = proc; + inbuf[m].me = me; + inbuf[m].atomID = tag[i]; + inbuf[m].partnerID = 0; + m++; - // fill buffer with list of 1,3 atoms in each angle - // and with list of 1,3 and 2,4 atoms in each dihedral + for (j = 0; j < nspecial[i][1]; j++) { + proclist[m] = proc; + inbuf[m].me = -1; + inbuf[m].atomID = tag[i]; + inbuf[m].partnerID = onethree[i][j]; + m++; + } - int size = 0; - if (num_angle && atom->nangles) - for (i = 0; i < nlocal; i++) - for (j = 0; j < num_angle[i]; j++) { - buf[size++] = angle_atom1[i][j]; - buf[size++] = angle_atom3[i][j]; - } + for (j = 0; j < num_angle[i]; j++) { + index = atom->map(angle_atom2[i][j]); + if (index < 0 || index >= nlocal) continue; - if (num_dihedral && atom->ndihedrals) - for (i = 0; i < nlocal; i++) - for (j = 0; j < num_dihedral[i]; j++) { - buf[size++] = dihedral_atom1[i][j]; - buf[size++] = dihedral_atom3[i][j]; - buf[size++] = dihedral_atom2[i][j]; - buf[size++] = dihedral_atom4[i][j]; - } + proclist[m] = hashlittle(&angle_atom1[i][j],sizeof(tagint),0) % nprocs; + inbuf[m].me = -2; + inbuf[m].atomID = angle_atom1[i][j]; + inbuf[m].partnerID = angle_atom3[i][j]; + m++; - // cycle buffer around ring of procs back to self - // when receive buffer, scan list of 1,3 atoms looking for atoms I own - // when find one, scan its 1-3 neigh list and mark I,J as in an angle + proclist[m] = hashlittle(&angle_atom3[i][j],sizeof(tagint),0) % nprocs; + inbuf[m].me = -2; + inbuf[m].atomID = angle_atom3[i][j]; + inbuf[m].partnerID = angle_atom1[i][j]; + m++; + } - comm->ring(size,sizeof(tagint),buf,7,ring_seven,NULL,(void *)this); + for (j = 0; j < num_dihedral[i]; j++) { + index = atom->map(dihedral_atom2[i][j]); + if (index < 0 || index >= nlocal) continue; - // delete 1-3 neighbors if they are not flagged in dflag + proclist[m] = hashlittle(&dihedral_atom1[i][j],sizeof(tagint),0) % nprocs; + inbuf[m].me = -2; + inbuf[m].atomID = dihedral_atom1[i][j]; + inbuf[m].partnerID = dihedral_atom3[i][j]; + m++; - for (i = 0; i < nlocal; i++) { - m = 0; - for (j = 0; j < nspecial[i][1]; j++) - if (dflag[i][j]) onethree[i][m++] = onethree[i][j]; - nspecial[i][1] = m; + proclist[m] = hashlittle(&dihedral_atom2[i][j],sizeof(tagint),0) % nprocs; + inbuf[m].me = -2; + inbuf[m].atomID = dihedral_atom2[i][j]; + inbuf[m].partnerID = dihedral_atom4[i][j]; + m++; + + proclist[m] = hashlittle(&dihedral_atom3[i][j],sizeof(tagint),0) % nprocs; + inbuf[m].me = -2; + inbuf[m].atomID = dihedral_atom3[i][j]; + inbuf[m].partnerID = dihedral_atom1[i][j]; + m++; + + proclist[m] = hashlittle(&dihedral_atom4[i][j],sizeof(tagint),0) % nprocs; + inbuf[m].me = -2; + inbuf[m].atomID = dihedral_atom4[i][j]; + inbuf[m].partnerID = dihedral_atom2[i][j]; + m++; + } } - // clean up + // perform rendezvous operation + // each proc owns random subset of atoms + // func = compute bbox of each body, flag atom closest to geometric center + // when done: each atom has atom ID of owning atom of its body + + char *buf; + int nreturn = comm->rendezvous(ncount,proclist,(char *) inbuf,sizeof(InRvous), + rendezvous_trim, + buf,sizeof(OutRvous),(void *) this); + OutRvous *outbuf = (OutRvous *) buf; - memory->destroy(dflag); - memory->destroy(buf); + memory->destroy(proclist); + memory->sfree(inbuf); + + // reset nspecial[1] and onethree for all owned atoms based on output info + + for (i = 0; i < nlocal; i++) nspecial[i][1] = 0; + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + onethree[i][nspecial[i][1]++] = outbuf[m].partnerID; + } + + memory->destroy(outbuf); // if no angles or dihedrals are defined, delete all 1-3 neighs @@ -789,12 +783,14 @@ void Special::angle_trim() void Special::dihedral_trim() { - int i,j,m,n; + int i,j,m,n,proc,index; int *num_dihedral = atom->num_dihedral; tagint **dihedral_atom1 = atom->dihedral_atom1; + tagint **dihedral_atom2 = atom->dihedral_atom2; tagint **dihedral_atom4 = atom->dihedral_atom4; int **nspecial = atom->nspecial; + tagint *tag = atom->tag; int nlocal = atom->nlocal; // stats on old 1-4 neighbor counts @@ -813,57 +809,95 @@ void Special::dihedral_trim() " %g = # of 1-4 neighbors before dihedral trim\n",allcount); } - // if dihedrals are defined, flag each 1-4 neigh if it appears in a dihedral + // if dihedrals are defined, rendezvous onefour list with dihedral 1-4 pairs if (num_dihedral && atom->ndihedrals) { - // dflag = flag for 1-4 neighs of all owned atoms - - int maxcount = 0; - for (i = 0; i < nlocal; i++) maxcount = MAX(maxcount,nspecial[i][2]); - memory->create(dflag,nlocal,maxcount,"special::dflag"); + // ncount = # of my datums to send in 3 parts for each owned atom + // onefour list, proc owner, dihedral end points + // latter is only for dihedrals where I own atom2 + int ncount = nlocal; + for (i = 0; i < nlocal; i++) ncount += nspecial[i][2]; for (i = 0; i < nlocal; i++) { - n = nspecial[i][2]; - for (j = 0; j < n; j++) dflag[i][j] = 0; + for (j = 0; j < num_dihedral[i]; j++) { + index = atom->map(dihedral_atom2[i][j]); + if (index >= 0 && index < nlocal) ncount += 2; + } } - // nbufmax = largest buffer needed to hold info from any proc - // info for each atom = list of 1,4 atoms in each dihedral stored by atom + int *proclist; + memory->create(proclist,ncount,"special:proclist"); + InRvous *inbuf = (InRvous *) + memory->smalloc((bigint) ncount*sizeof(InRvous),"special:inbuf"); - int nbuf = 0; - for (i = 0; i < nlocal; i++) nbuf += 2*num_dihedral[i]; - int *buf; - memory->create(buf,nbuf,"special:buf"); + // setup input buf to rendezvous comm + // one datum for each owned atom: datum = proc, atomID + // sent to owner of atomID + // one datum for each 1-4 partner: datum = atomID, ID + // sent to owner of atomID + // two datums for each dihedral 1-4 endatoms : datum = atomID, ID + // sent to owner of atomID - // fill buffer with list of 1,4 atoms in each dihedral + m = 0; + for (i = 0; i < nlocal; i++) { + proc = hashlittle(&tag[i],sizeof(tagint),0) % nprocs; + proclist[m] = proc; + inbuf[m].me = me; + inbuf[m].atomID = tag[i]; + inbuf[m].partnerID = 0; + m++; - int size = 0; - for (i = 0; i < nlocal; i++) - for (j = 0; j < num_dihedral[i]; j++) { - buf[size++] = dihedral_atom1[i][j]; - buf[size++] = dihedral_atom4[i][j]; + for (j = 0; j < nspecial[i][2]; j++) { + proclist[m] = proc; + inbuf[m].me = -1; + inbuf[m].atomID = tag[i]; + inbuf[m].partnerID = onefour[i][j]; + m++; } - // cycle buffer around ring of procs back to self - // when receive buffer, scan list of 1,4 atoms looking for atoms I own - // when find one, scan its 1-4 neigh list and mark I,J as in a dihedral + for (j = 0; j < num_dihedral[i]; j++) { + index = atom->map(dihedral_atom2[i][j]); + if (index < 0 || index >= nlocal) continue; - comm->ring(size,sizeof(tagint),buf,8,ring_eight,NULL,(void *)this); + proclist[m] = hashlittle(&dihedral_atom1[i][j],sizeof(tagint),0) % nprocs; + inbuf[m].me = -2; + inbuf[m].atomID = dihedral_atom1[i][j]; + inbuf[m].partnerID = dihedral_atom4[i][j]; + m++; - // delete 1-4 neighbors if they are not flagged in dflag - - for (i = 0; i < nlocal; i++) { - m = 0; - for (j = 0; j < nspecial[i][2]; j++) - if (dflag[i][j]) onefour[i][m++] = onefour[i][j]; - nspecial[i][2] = m; + proclist[m] = hashlittle(&dihedral_atom4[i][j],sizeof(tagint),0) % nprocs; + inbuf[m].me = -2; + inbuf[m].atomID = dihedral_atom4[i][j]; + inbuf[m].partnerID = dihedral_atom1[i][j]; + m++; + } } - // clean up + // perform rendezvous operation + // each proc owns random subset of atoms + // func = compute bbox of each body, flag atom closest to geometric center + // when done: each atom has atom ID of owning atom of its body + + char *buf; + int nreturn = comm->rendezvous(ncount,proclist,(char *) inbuf,sizeof(InRvous), + rendezvous_trim, + buf,sizeof(OutRvous),(void *) this); + OutRvous *outbuf = (OutRvous *) buf; - memory->destroy(dflag); - memory->destroy(buf); + memory->destroy(proclist); + memory->sfree(inbuf); + + // reset nspecial[2] and onefour for all owned atoms based on output info + + for (i = 0; i < nlocal; i++) nspecial[i][2] = 0; + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + onefour[i][nspecial[i][2]++] = outbuf[m].partnerID; + } + + memory->destroy(outbuf); // if no dihedrals are defined, delete all 1-4 neighs @@ -888,262 +922,213 @@ void Special::dihedral_trim() } /* ---------------------------------------------------------------------- - when receive buffer, scan tags for atoms I own - when find one, increment nspecial count for that atom + process data for atoms assigned to me in rendezvous decomposition + inbuf = list of N InRvous datums + create outbuf = list of Nout OutRvous datums ------------------------------------------------------------------------- */ -void Special::ring_one(int ndatum, char *cbuf, void *ptr) +int Special::rendezvous_1234(int n, char *inbuf, + int *&proclist, char *&outbuf, + void *ptr) { + int i,j,m; + Special *sptr = (Special *) ptr; - Atom *atom = sptr->atom; - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; + Memory *memory = sptr->memory; - tagint *buf = (tagint *) cbuf; - int m; + // setup hash + // ncount = number of atoms assigned to me + // key = atom ID + // value = index into Ncount-length data structure - for (int i = 0; i < ndatum; i++) { - m = atom->map(buf[i]); - if (m >= 0 && m < nlocal) nspecial[m][0]++; + InRvous *in = (InRvous *) inbuf; + std::map hash; + tagint id; + + int ncount = 0; + for (i = 0; i < n; i++) + if (in[i].me >= 0) + hash[in[i].atomID] = ncount++; + + // procowner = caller proc that owns each atom + // atomID = ID of each rendezvous atom I own + + int *procowner,*npartner; + tagint *atomID; + memory->create(procowner,ncount,"special:procowner"); + memory->create(atomID,ncount,"special:atomID"); + memory->create(npartner,ncount,"special:npartner"); + for (m = 0; m < ncount; m++) npartner[m] = 0; + + for (i = 0; i < n; i++) { + m = hash.find(in[i].atomID)->second; + if (in[i].me >= 0) { + procowner[m] = in[i].me; + atomID[m] = in[i].atomID; + } else npartner[m]++; } -} -/* ---------------------------------------------------------------------- - when receive buffer, scan 2nd-atom tags for atoms I own - when find one, add 1st-atom tag to onetwo list for 2nd atom -------------------------------------------------------------------------- */ + int max = 0; + for (m = 0; m < ncount; m++) + max = MAX(max,npartner[m]); + sptr->max_rvous = max; -void Special::ring_two(int ndatum, char *cbuf, void *ptr) -{ - Special *sptr = (Special *) ptr; - Atom *atom = sptr->atom; - int nlocal = atom->nlocal; + int **partner; + memory->create(partner,ncount,max,"special:partner"); + for (m = 0; m < ncount; m++) npartner[m] = 0; - tagint **onetwo = sptr->onetwo; - int *count = sptr->count; - - tagint *buf = (tagint *) cbuf; - int m; - - for (int i = 1; i < ndatum; i += 2) { - m = atom->map(buf[i]); - if (m >= 0 && m < nlocal) onetwo[m][count[m]++] = buf[i-1]; + for (i = 0; i < n; i++) { + if (in[i].me >= 0) continue; + m = hash.find(in[i].atomID)->second; + partner[m][npartner[m]++] = in[i].partnerID; } -} -/* ---------------------------------------------------------------------- - when receive buffer, scan list of 1-2 neighbors for atoms I own - when find one, increment 1-3 count by # of 1-2 neighbors of my atom, - subtracting one since my list will contain original atom -------------------------------------------------------------------------- */ + // pass list of OutRvous datums back to comm->rendezvous -void Special::ring_three(int ndatum, char *cbuf, void *ptr) -{ - Special *sptr = (Special *) ptr; - Atom *atom = sptr->atom; - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; + int nout = 0; + for (m = 0; m < ncount; m++) nout += npartner[m]; - tagint *buf = (tagint *) cbuf; - int i,j,m,n,num12; + memory->create(proclist,nout,"special:proclist"); + OutRvous *out = (OutRvous *) + memory->smalloc((bigint) nout*sizeof(OutRvous),"special:out"); - i = 0; - while (i < ndatum) { - n = buf[i]; - num12 = buf[i+1]; - for (j = 0; j < num12; j++) { - m = atom->map(buf[i+2+j]); - if (m >= 0 && m < nlocal) - n += nspecial[m][0] - 1; + nout = 0; + for (m = 0; m < ncount; m++) + for (j = 0; j < npartner[m]; j++) { + proclist[nout] = procowner[m]; + out[nout].atomID = atomID[m]; + out[nout].partnerID = partner[m][j]; + nout++; } - buf[i] = n; - i += 2 + num12; - } + + outbuf = (char *) out; + + // clean up + // Comm::rendezvous will delete proclist and out (outbuf) + + memory->destroy(procowner); + memory->destroy(atomID); + memory->destroy(npartner); + memory->destroy(partner); + + return nout; } /* ---------------------------------------------------------------------- - when receive buffer, scan list of 1-2 neighbors for atoms I own - when find one, add its neighbors to 1-3 list - increment the count in buf(i+4) - exclude the atom whose tag = original - this process may include duplicates but they will be culled later + process data for atoms assigned to me in rendezvous decomposition + inbuf = list of N InRvous datums + create outbuf = list of Nout OutRvous datums ------------------------------------------------------------------------- */ -void Special::ring_four(int ndatum, char *cbuf, void *ptr) +int Special::rendezvous_trim(int n, char *inbuf, + int *&proclist, char *&outbuf, + void *ptr) { + int i,j,m; + Special *sptr = (Special *) ptr; - Atom *atom = sptr->atom; - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; + Memory *memory = sptr->memory; - tagint **onetwo = sptr->onetwo; + // setup hash + // ncount = number of atoms assigned to me + // key = atom ID + // value = index into Ncount-length data structure - tagint *buf = (tagint *) cbuf; - tagint original; - int i,j,k,m,n,num12,num13; + InRvous *in = (InRvous *) inbuf; + std::map hash; + tagint id; + + int ncount = 0; + for (i = 0; i < n; i++) + if (in[i].me >= 0) + hash[in[i].atomID] = ncount++; - i = 0; - while (i < ndatum) { - original = buf[i]; - num12 = buf[i+1]; - num13 = buf[i+2]; - n = buf[i+3]; - for (j = 0; j < num12; j++) { - m = atom->map(buf[i+4+j]); - if (m >= 0 && m < nlocal) - for (k = 0; k < nspecial[m][0]; k++) - if (onetwo[m][k] != original) - buf[i+4+num12+(n++)] = onetwo[m][k]; + // procowner = caller proc that owns each atom + // atomID = ID of each rendezvous atom I own + // npartner = # of 1-3 partners for each atom I own + + int *procowner,*npartner; + tagint *atomID; + memory->create(procowner,ncount,"special:procowner"); + memory->create(atomID,ncount,"special:atomID"); + memory->create(npartner,ncount,"special:npartner"); + for (m = 0; m < ncount; m++) npartner[m] = 0; + + for (i = 0; i < n; i++) { + m = hash.find(in[i].atomID)->second; + if (in[i].me >= 0) { + procowner[m] = in[i].me; + atomID[m] = in[i].atomID; + } else if (in[i].me == -1) npartner[m]++; + } + + int max = 0; + for (m = 0; m < ncount; m++) max = MAX(max,npartner[m]); + + // partner = list of 1-3 or 1-4 partners for each atom I own + + int **partner; + memory->create(partner,ncount,max,"special:partner"); + for (m = 0; m < ncount; m++) npartner[m] = 0; + + for (i = 0; i < n; i++) { + if (in[i].me >= 0 || in[i].me == -2) continue; + m = hash.find(in[i].atomID)->second; + partner[m][npartner[m]++] = in[i].partnerID; + } + + // flag = 1 if partner is in an actual angle or in a dihedral + + int **flag; + memory->create(flag,ncount,max,"special:flag"); + + for (i = 0; i < ncount; i++) + for (j = 0; j < npartner[i]; j++) + flag[i][j] = 0; + + tagint actual; + for (i = 0; i < n; i++) { + if (in[i].me != -2) continue; + actual = in[i].partnerID; + m = hash.find(in[i].atomID)->second; + for (j = 0; j < npartner[m]; j++) + if (partner[m][j] == actual) { + flag[m][j] = 1; + break; + } + } + + // pass list of OutRvous datums back to comm->rendezvous + + int nout = 0; + for (m = 0; m < ncount; m++) nout += npartner[m]; + + memory->create(proclist,nout,"special:proclist"); + OutRvous *out = (OutRvous *) + memory->smalloc((bigint) nout*sizeof(OutRvous),"special:out"); + + nout = 0; + for (m = 0; m < ncount; m++) + for (j = 0; j < npartner[m]; j++) { + if (flag[m][j] == 0) continue; + proclist[nout] = procowner[m]; + out[nout].atomID = atomID[m]; + out[nout].partnerID = partner[m][j]; + nout++; } - buf[i+3] = n; - i += 4 + num12 + num13; - } -} -/* ---------------------------------------------------------------------- - when receive buffer, scan list of 1-3 neighbors for atoms I own - when find one, increment 1-4 count by # of 1-2 neighbors of my atom - may include duplicates and original atom but they will be culled later -------------------------------------------------------------------------- */ + outbuf = (char *) out; -void Special::ring_five(int ndatum, char *cbuf, void *ptr) -{ - Special *sptr = (Special *) ptr; - Atom *atom = sptr->atom; - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; + // clean up + // Comm::rendezvous will delete proclist and out (outbuf) - tagint *buf = (tagint *) cbuf; - int i,j,m,n,num13; + memory->destroy(procowner); + memory->destroy(atomID); + memory->destroy(npartner); + memory->destroy(partner); + memory->destroy(flag); - i = 0; - while (i < ndatum) { - n = buf[i]; - num13 = buf[i+1]; - for (j = 0; j < num13; j++) { - m = atom->map(buf[i+2+j]); - if (m >= 0 && m < nlocal) n += nspecial[m][0]; - } - buf[i] = n; - i += 2 + num13; - } -} - -/* ---------------------------------------------------------------------- - when receive buffer, scan list of 1-3 neighbors for atoms I own - when find one, add its neighbors to 1-4 list - incrementing the count in buf(i+4) - this process may include duplicates but they will be culled later -------------------------------------------------------------------------- */ - -void Special::ring_six(int ndatum, char *cbuf, void *ptr) -{ - Special *sptr = (Special *) ptr; - Atom *atom = sptr->atom; - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; - - tagint **onetwo = sptr->onetwo; - - tagint *buf = (tagint *) cbuf; - int i,j,k,m,n,num13,num14; - - i = 0; - while (i < ndatum) { - num13 = buf[i]; - num14 = buf[i+1]; - n = buf[i+2]; - for (j = 0; j < num13; j++) { - m = atom->map(buf[i+3+j]); - if (m >= 0 && m < nlocal) - for (k = 0; k < nspecial[m][0]; k++) - buf[i+3+num13+(n++)] = onetwo[m][k]; - } - buf[i+2] = n; - i += 3 + num13 + num14; - } -} - -/* ---------------------------------------------------------------------- - when receive buffer, scan list of 1,3 atoms looking for atoms I own - when find one, scan its 1-3 neigh list and mark I,J as in an angle -------------------------------------------------------------------------- */ - -void Special::ring_seven(int ndatum, char *cbuf, void *ptr) -{ - Special *sptr = (Special *) ptr; - Atom *atom = sptr->atom; - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; - - tagint **onethree = sptr->onethree; - int **dflag = sptr->dflag; - - tagint *buf = (tagint *) cbuf; - tagint iglobal,jglobal; - int i,m,ilocal,jlocal; - - i = 0; - while (i < ndatum) { - iglobal = buf[i]; - jglobal = buf[i+1]; - ilocal = atom->map(iglobal); - jlocal = atom->map(jglobal); - if (ilocal >= 0 && ilocal < nlocal) - for (m = 0; m < nspecial[ilocal][1]; m++) - if (jglobal == onethree[ilocal][m]) { - dflag[ilocal][m] = 1; - break; - } - if (jlocal >= 0 && jlocal < nlocal) - for (m = 0; m < nspecial[jlocal][1]; m++) - if (iglobal == onethree[jlocal][m]) { - dflag[jlocal][m] = 1; - break; - } - i += 2; - } -} - -/* ---------------------------------------------------------------------- - when receive buffer, scan list of 1,4 atoms looking for atoms I own - when find one, scan its 1-4 neigh list and mark I,J as in a dihedral -------------------------------------------------------------------------- */ - -void Special::ring_eight(int ndatum, char *cbuf, void *ptr) -{ - Special *sptr = (Special *) ptr; - Atom *atom = sptr->atom; - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; - - tagint **onefour = sptr->onefour; - int **dflag = sptr->dflag; - - tagint *buf = (tagint *) cbuf; - tagint iglobal,jglobal; - int i,m,ilocal,jlocal; - - i = 0; - while (i < ndatum) { - iglobal = buf[i]; - jglobal = buf[i+1]; - ilocal = atom->map(iglobal); - jlocal = atom->map(jglobal); - if (ilocal >= 0 && ilocal < nlocal) - for (m = 0; m < nspecial[ilocal][2]; m++) - if (jglobal == onefour[ilocal][m]) { - dflag[ilocal][m] = 1; - break; - } - if (jlocal >= 0 && jlocal < nlocal) - for (m = 0; m < nspecial[jlocal][2]; m++) - if (iglobal == onefour[jlocal][m]) { - dflag[jlocal][m] = 1; - break; - } - i += 2; - } + return nout; } /* ---------------------------------------------------------------------- @@ -1159,3 +1144,15 @@ void Special::fix_alteration() modify->fix[ifix]->rebuild_special(); } +/* ---------------------------------------------------------------------- + print timing output +------------------------------------------------------------------------- */ + +void Special::timer_output(double time1) +{ + double t2 = MPI_Wtime(); + if (comm->me == 0) { + if (screen) fprintf(screen," special bonds CPU = %g secs\n",t2-t1); + if (logfile) fprintf(logfile," special bonds CPU = %g secs\n",t2-t1); + } +} diff --git a/src/special.h b/src/special.h index 9f25200336..f7892075ac 100644 --- a/src/special.h +++ b/src/special.h @@ -28,27 +28,30 @@ class Special : protected Pointers { int me,nprocs; tagint **onetwo,**onethree,**onefour; - // data used by ring callback methods + // data used by rendezvous callback methods - int *count; - int **dflag; + int max_rvous; + + struct InRvous { + int me; + tagint atomID,partnerID; + }; + + struct OutRvous { + tagint atomID,partnerID; + }; void dedup(); void angle_trim(); void dihedral_trim(); void combine(); void fix_alteration(); + void timer_output(double); - // callback functions for ring communication + // callback function for rendezvous communication - static void ring_one(int, char *, void *); - static void ring_two(int, char *, void *); - static void ring_three(int, char *, void *); - static void ring_four(int, char *, void *); - static void ring_five(int, char *, void *); - static void ring_six(int, char *, void *); - static void ring_seven(int, char *, void *); - static void ring_eight(int, char *, void *); + static int rendezvous_1234(int, char *, int *&, char *&, void *); + static int rendezvous_trim(int, char *, int *&, char *&, void *); }; } From 2e0d69b005ef26ccf1fdf0bcc671bfd8a61fafea Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Fri, 30 Nov 2018 13:06:31 -0700 Subject: [PATCH 02/12] replace STL map with atom->map in special, better code comments --- src/RIGID/fix_rigid_small.cpp | 8 ++-- src/comm.cpp | 20 ++++++-- src/read_data.cpp | 8 ++-- src/read_restart.cpp | 4 +- src/special.cpp | 89 +++++++++++++++++++++++++---------- 5 files changed, 92 insertions(+), 37 deletions(-) diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp index 4421d9ae17..fc51e0aa4f 100644 --- a/src/RIGID/fix_rigid_small.cpp +++ b/src/RIGID/fix_rigid_small.cpp @@ -1571,9 +1571,9 @@ void FixRigidSmall::create_bodies(tagint *bodyID) } // perform rendezvous operation - // each proc owns random subset of bodies, receives all atoms in the bodies - // func = compute bbox of each body, flag atom closest to geometric center - // when done: each atom has atom ID of owning atom of its body + // each proc owns random subset of bodies + // receives all atoms in those bodies + // func = compute bbox of each body, find atom closest to geometric center char *buf; int nreturn = comm->rendezvous(ncount,proclist,(char *) inbuf,sizeof(InRvous), @@ -1627,6 +1627,8 @@ int FixRigidSmall::rendezvous_body(int n, char *inbuf, MPI_Comm world = frsptr->world; // setup hash + // use STL map instead of atom->map + // b/c know nothing about body ID values specified by user // ncount = number of bodies assigned to me // key = body ID // value = index into Ncount-length data structure diff --git a/src/comm.cpp b/src/comm.cpp index 5fcfa141d0..6512b21fef 100644 --- a/src/comm.cpp +++ b/src/comm.cpp @@ -728,13 +728,27 @@ void Comm::ring(int n, int nper, void *inbuf, int messtag, /* ---------------------------------------------------------------------- rendezvous communication operation + three stages: + first Irregular converts inbuf from caller decomp to rvous decomp + callback operates on data in rendevous decomp + last Irregular converts outbuf from rvous decomp back to caller decomp + inputs: + n = # of input datums + proclist = proc that owns each input datum in rendezvous decomposition + inbuf = list of input datums + insize = size in bytes of each input datum + callback = caller function to invoke in rendezvous decomposition + outputs: + nout = # of output datums (function return) + outbuf = list of output datums + outsize = size in bytes of each output datum ------------------------------------------------------------------------- */ int Comm::rendezvous(int n, int *proclist, char *inbuf, int insize, int (*callback)(int, char *, int *&, char *&, void *), char *&outbuf, int outsize, void *ptr) { - // comm data from caller decomposition to rendezvous decomposition + // comm inbuf from caller decomposition to rendezvous decomposition Irregular *irregular = new Irregular(lmp); @@ -747,7 +761,7 @@ int Comm::rendezvous(int n, int *proclist, char *inbuf, int insize, delete irregular; // peform rendezvous computation via callback() - // callback() allocates proclist_rvous and outbuf_rvous + // callback() allocates/populates proclist_rvous and outbuf_rvous int *proclist_rvous; char *outbuf_rvous; @@ -757,7 +771,7 @@ int Comm::rendezvous(int n, int *proclist, char *inbuf, int insize, memory->sfree(inbuf_rvous); - // comm data from rendezvous decomposition back to caller + // comm outbuf from rendezvous decomposition back to caller // caller will free outbuf irregular = new Irregular(lmp); diff --git a/src/read_data.cpp b/src/read_data.cpp index d7bd298e7b..cdeda03066 100644 --- a/src/read_data.cpp +++ b/src/read_data.cpp @@ -121,7 +121,7 @@ void ReadData::command(int narg, char **arg) if (narg < 1) error->all(FLERR,"Illegal read_data command"); MPI_Barrier(world); - double time1 = MPI_Wtime; + double time1 = MPI_Wtime(); // optional args @@ -912,13 +912,13 @@ void ReadData::command(int narg, char **arg) // total time MPI_Barrier(world); - double time2 = MPI_Wtime; + double time2 = MPI_Wtime(); if (comm->me == 0) { if (screen) - fprintf(screen," read_atoms CPU = %g secs\n",time2-time1); + fprintf(screen," read_data CPU = %g secs\n",time2-time1); if (logfile) - fprintf(logfile," read_atoms CPU = %g secs\n",time2-time1); + fprintf(logfile," read_data CPU = %g secs\n",time2-time1); } } diff --git a/src/read_restart.cpp b/src/read_restart.cpp index a338a843d7..b61fb2278f 100644 --- a/src/read_restart.cpp +++ b/src/read_restart.cpp @@ -82,7 +82,7 @@ void ReadRestart::command(int narg, char **arg) error->all(FLERR,"Cannot read_restart after simulation box is defined"); MPI_Barrier(world); - double time1 = MPI_Wtime; + double time1 = MPI_Wtime(); MPI_Comm_rank(world,&me); MPI_Comm_size(world,&nprocs); @@ -569,7 +569,7 @@ void ReadRestart::command(int narg, char **arg) // total time MPI_Barrier(world); - double time2 = MPI_Wtime; + double time2 = MPI_Wtime(); if (comm->me == 0) { if (screen) diff --git a/src/special.cpp b/src/special.cpp index a18c597765..6dad30a9a7 100644 --- a/src/special.cpp +++ b/src/special.cpp @@ -108,9 +108,11 @@ void Special::build() memory->smalloc((bigint) ncount*sizeof(InRvous),"special:inbuf"); // setup input buf to rendezvous comm - // one datum for each owned atom: datum = proc, atomID - // one datum for each bond partner: datum = atomID, bond partner ID + // input datums = pairs of bonded atoms // owning proc for each datum = random hash of atomID + // one datum for each owned atom: datum = owning proc, atomID + // one datum for each bond partner: datum = atomID, bond partner ID + // add inverted datum when netwon_bond on m = 0; for (i = 0; i < nlocal; i++) { @@ -140,7 +142,8 @@ void Special::build() } // perform rendezvous operation - // each proc owns random subset of atoms, receives all their bond partners + // each proc owns random subset of atoms + // receives all info to form and return their onetwo lists int nreturn = comm->rendezvous(ncount,proclist,(char *) inbuf,sizeof(InRvous), rendezvous_1234, @@ -151,6 +154,7 @@ void Special::build() memory->sfree(inbuf); // set nspecial[0] and onetwo for all owned atoms based on output info + // output datums = pairs of atoms that are 1-2 neighbors MPI_Allreduce(&max_rvous,&maxall,1,MPI_INT,MPI_MAX,world); memory->create(onetwo,nlocal,maxall,"special:onetwo"); @@ -200,9 +204,10 @@ void Special::build() memory->smalloc((bigint) ncount*sizeof(InRvous),"special:inbuf"); // setup input buf to rendezvous comm - // one datum for each owned atom: datum = proc, atomID - // one datum for each bond partner: datum = atomID, bond partner ID + // input datums = all pairs of onetwo atoms (they are 1-3 neighbors) // owning proc for each datum = random hash of atomID + // one datum for each owned atom: datum = owning proc, atomID + // one datum for each onetwo pair: datum = atomID1, atomID2 m = 0; for (i = 0; i < nlocal; i++) { @@ -226,7 +231,8 @@ void Special::build() } // perform rendezvous operation - // each proc owns random subset of atoms, receives all their bond partners + // each proc owns random subset of atoms + // receives all info to form and return their onethree lists nreturn = comm->rendezvous(ncount,proclist,(char *) inbuf,sizeof(InRvous), rendezvous_1234, @@ -237,6 +243,7 @@ void Special::build() memory->sfree(inbuf); // set nspecial[1] and onethree for all owned atoms based on output info + // output datums = pairs of atoms that are 1-3 neighbors MPI_Allreduce(&max_rvous,&maxall,1,MPI_INT,MPI_MAX,world); memory->create(onethree,nlocal,maxall,"special:onethree"); @@ -284,9 +291,10 @@ void Special::build() memory->smalloc((bigint) ncount*sizeof(InRvous),"special:inbuf"); // setup input buf to rendezvous comm - // one datum for each owned atom: datum = proc, atomID - // one datum for each partner: datum = atomID, bond partner ID + // input datums = all pairs of onethree and onetwo atoms (they're 1-4 neighbors) // owning proc for each datum = random hash of atomID + // one datum for each owned atom: datum = owning proc, atomID + // one datum for each onethree/onetwo pair: datum = atomID1, atomID2 m = 0; for (i = 0; i < nlocal; i++) { @@ -309,9 +317,8 @@ void Special::build() } // perform rendezvous operation - // each proc owns random subset of bodies, receives all atoms in the bodies - // func = compute bbox of each body, flag atom closest to geometric center - // when done: each atom has atom ID of owning atom of its body + // each proc owns random subset of atoms + // receives all info to form and return their onefour lists nreturn = comm->rendezvous(ncount,proclist,(char *) inbuf,sizeof(InRvous), rendezvous_1234, @@ -322,6 +329,7 @@ void Special::build() memory->sfree(inbuf); // set nspecial[2] and onefour for all owned atoms based on output info + // output datums = pairs of atoms that are 1-4 neighbors MPI_Allreduce(&max_rvous,&maxall,1,MPI_INT,MPI_MAX,world); memory->create(onefour,nlocal,maxall,"special:onefour"); @@ -934,21 +942,28 @@ int Special::rendezvous_1234(int n, char *inbuf, int i,j,m; Special *sptr = (Special *) ptr; + Atom *atom = sptr->atom; Memory *memory = sptr->memory; - // setup hash + // clear atom map so it can be here as a hash table + // faster than an STL map for large atom counts + + atom->map_clear(); + + // initialize hash // ncount = number of atoms assigned to me // key = atom ID // value = index into Ncount-length data structure InRvous *in = (InRvous *) inbuf; - std::map hash; + //std::map hash; tagint id; int ncount = 0; for (i = 0; i < n; i++) if (in[i].me >= 0) - hash[in[i].atomID] = ncount++; + //hash[in[i].atomID] = ncount++; + atom->map_one(in[i].atomID,ncount++); // procowner = caller proc that owns each atom // atomID = ID of each rendezvous atom I own @@ -961,7 +976,8 @@ int Special::rendezvous_1234(int n, char *inbuf, for (m = 0; m < ncount; m++) npartner[m] = 0; for (i = 0; i < n; i++) { - m = hash.find(in[i].atomID)->second; + //m = hash.find(in[i].atomID)->second; + m = atom->map(in[i].atomID); if (in[i].me >= 0) { procowner[m] = in[i].me; atomID[m] = in[i].atomID; @@ -979,7 +995,8 @@ int Special::rendezvous_1234(int n, char *inbuf, for (i = 0; i < n; i++) { if (in[i].me >= 0) continue; - m = hash.find(in[i].atomID)->second; + //m = hash.find(in[i].atomID)->second; + m = atom->map(in[i].atomID); partner[m][npartner[m]++] = in[i].partnerID; } @@ -1011,6 +1028,12 @@ int Special::rendezvous_1234(int n, char *inbuf, memory->destroy(npartner); memory->destroy(partner); + // re-create atom map + + atom->map_init(0); + atom->nghost = 0; + atom->map_set(); + return nout; } @@ -1027,21 +1050,28 @@ int Special::rendezvous_trim(int n, char *inbuf, int i,j,m; Special *sptr = (Special *) ptr; + Atom *atom = sptr->atom; Memory *memory = sptr->memory; - // setup hash + // clear atom map so it can be here as a hash table + // faster than an STL map for large atom counts + + atom->map_clear(); + + // initialize hash // ncount = number of atoms assigned to me // key = atom ID // value = index into Ncount-length data structure InRvous *in = (InRvous *) inbuf; - std::map hash; + //std::map hash; tagint id; int ncount = 0; for (i = 0; i < n; i++) if (in[i].me >= 0) - hash[in[i].atomID] = ncount++; + //hash[in[i].atomID] = ncount++; + atom->map_one(in[i].atomID,ncount++); // procowner = caller proc that owns each atom // atomID = ID of each rendezvous atom I own @@ -1055,7 +1085,8 @@ int Special::rendezvous_trim(int n, char *inbuf, for (m = 0; m < ncount; m++) npartner[m] = 0; for (i = 0; i < n; i++) { - m = hash.find(in[i].atomID)->second; + //m = hash.find(in[i].atomID)->second; + m = atom->map(in[i].atomID); if (in[i].me >= 0) { procowner[m] = in[i].me; atomID[m] = in[i].atomID; @@ -1073,7 +1104,8 @@ int Special::rendezvous_trim(int n, char *inbuf, for (i = 0; i < n; i++) { if (in[i].me >= 0 || in[i].me == -2) continue; - m = hash.find(in[i].atomID)->second; + //m = hash.find(in[i].atomID)->second; + m = atom->map(in[i].atomID); partner[m][npartner[m]++] = in[i].partnerID; } @@ -1090,7 +1122,8 @@ int Special::rendezvous_trim(int n, char *inbuf, for (i = 0; i < n; i++) { if (in[i].me != -2) continue; actual = in[i].partnerID; - m = hash.find(in[i].atomID)->second; + //m = hash.find(in[i].atomID)->second; + m = atom->map(in[i].atomID); for (j = 0; j < npartner[m]; j++) if (partner[m][j] == actual) { flag[m][j] = 1; @@ -1128,6 +1161,12 @@ int Special::rendezvous_trim(int n, char *inbuf, memory->destroy(partner); memory->destroy(flag); + // re-create atom map + + atom->map_init(0); + atom->nghost = 0; + atom->map_set(); + return nout; } @@ -1150,9 +1189,9 @@ void Special::fix_alteration() void Special::timer_output(double time1) { - double t2 = MPI_Wtime(); + double time2 = MPI_Wtime(); if (comm->me == 0) { - if (screen) fprintf(screen," special bonds CPU = %g secs\n",t2-t1); - if (logfile) fprintf(logfile," special bonds CPU = %g secs\n",t2-t1); + if (screen) fprintf(screen," special bonds CPU = %g secs\n",time2-time1); + if (logfile) fprintf(logfile," special bonds CPU = %g secs\n",time2-time1); } } From ece1aff7e9c33b7145973880677e3fbdb3b5e37a Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Fri, 7 Dec 2018 15:46:27 -0700 Subject: [PATCH 03/12] less comm version of special bonds rendezvous --- src/comm.cpp | 8 +- src/comm.h | 2 +- src/special.cpp | 834 ++++++++++++++++++++++++++++-------------------- src/special.h | 26 +- 4 files changed, 517 insertions(+), 353 deletions(-) diff --git a/src/comm.cpp b/src/comm.cpp index 6512b21fef..9bdaf0798a 100644 --- a/src/comm.cpp +++ b/src/comm.cpp @@ -745,7 +745,7 @@ void Comm::ring(int n, int nper, void *inbuf, int messtag, ------------------------------------------------------------------------- */ int Comm::rendezvous(int n, int *proclist, char *inbuf, int insize, - int (*callback)(int, char *, int *&, char *&, void *), + int (*callback)(int, char *, int &, int *&, char *&, void *), char *&outbuf, int outsize, void *ptr) { // comm inbuf from caller decomposition to rendezvous decomposition @@ -763,13 +763,15 @@ int Comm::rendezvous(int n, int *proclist, char *inbuf, int insize, // peform rendezvous computation via callback() // callback() allocates/populates proclist_rvous and outbuf_rvous + int flag; int *proclist_rvous; char *outbuf_rvous; int nout_rvous = - callback(n_rvous,inbuf_rvous,proclist_rvous,outbuf_rvous,ptr); + callback(n_rvous,inbuf_rvous,flag,proclist_rvous,outbuf_rvous,ptr); - memory->sfree(inbuf_rvous); + if (flag != 1) memory->sfree(inbuf_rvous); // outbuf_rvous = inbuf_vous + if (flag == 0) return 0; // all nout_rvous are 0, no 2nd irregular // comm outbuf from rendezvous decomposition back to caller // caller will free outbuf diff --git a/src/comm.h b/src/comm.h index 8bb057a0c1..a1bac53ac8 100644 --- a/src/comm.h +++ b/src/comm.h @@ -110,7 +110,7 @@ class Comm : protected Pointers { void ring(int, int, void *, int, void (*)(int, char *, void *), void *, void *, int self = 1); int rendezvous(int, int *, char *, int, - int (*)(int, char *, int *&, char *&, void *), + int (*)(int, char *, int &, int *&, char *&, void *), char *&, int, void *); int read_lines_from_file(FILE *, int, int, char *); diff --git a/src/special.cpp b/src/special.cpp index 6dad30a9a7..79d2f77e46 100644 --- a/src/special.cpp +++ b/src/special.cpp @@ -55,20 +55,9 @@ Special::~Special() void Special::build() { - int i,j,k,m,n,size,proc; - int max,maxall; - char *buf; - MPI_Barrier(world); double time1 = MPI_Wtime(); - int nlocal = atom->nlocal; - - tagint *tag = atom->tag; - int *num_bond = atom->num_bond; - tagint **bond_atom = atom->bond_atom; - int **nspecial = atom->nspecial; - if (me == 0 && screen) { const double * const special_lj = force->special_lj; const double * const special_coul = force->special_coul; @@ -81,183 +70,51 @@ void Special::build() // initialize nspecial counters to 0 - for (i = 0; i < nlocal; i++) { + int **nspecial = atom->nspecial; + int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; i++) { nspecial[i][0] = 0; nspecial[i][1] = 0; nspecial[i][2] = 0; } - // ----------------------------------------------------- - // compute nspecial[i][0] = # of 1-2 neighbors of atom i + // setup atomIDs and procowner vectors in rendezvous decomposition + + atom_owners(); + + // tally nspecial[i][0] = # of 1-2 neighbors of atom i // create onetwo[i] = list of 1-2 neighbors for atom i - // ----------------------------------------------------- - // ncount = # of my datums to send (newton or newton off) - // include nlocal datums with owner of each atom + if (force->newton_bond) onetwo_build_newton(); + else onetwo_build_newton_off(); - int newton_bond = force->newton_bond; - - int ncount = 0; - for (i = 0; i < nlocal; i++) ncount += num_bond[i]; - if (newton_bond) ncount *= 2; - ncount += nlocal; - - int *proclist; - memory->create(proclist,ncount,"special:proclist"); - InRvous *inbuf = (InRvous *) - memory->smalloc((bigint) ncount*sizeof(InRvous),"special:inbuf"); - - // setup input buf to rendezvous comm - // input datums = pairs of bonded atoms - // owning proc for each datum = random hash of atomID - // one datum for each owned atom: datum = owning proc, atomID - // one datum for each bond partner: datum = atomID, bond partner ID - // add inverted datum when netwon_bond on - - m = 0; - for (i = 0; i < nlocal; i++) { - proc = hashlittle(&tag[i],sizeof(tagint),0) % nprocs; - proclist[m] = proc; - inbuf[m].me = me; - inbuf[m].atomID = tag[i]; - inbuf[m].partnerID = 0; - m++; - - for (j = 0; j < num_bond[i]; j++) { - proclist[m] = proc; - inbuf[m].me = -1; - inbuf[m].atomID = tag[i]; - inbuf[m].partnerID = bond_atom[i][j]; - m++; - } - if (newton_bond) { - for (j = 0; j < num_bond[i]; j++) { - proclist[m] = hashlittle(&bond_atom[i][j],sizeof(tagint),0) % nprocs; - inbuf[m].me = -1; - inbuf[m].atomID = bond_atom[i][j]; - inbuf[m].partnerID = tag[i]; - m++; - } - } - } - - // perform rendezvous operation - // each proc owns random subset of atoms - // receives all info to form and return their onetwo lists - - int nreturn = comm->rendezvous(ncount,proclist,(char *) inbuf,sizeof(InRvous), - rendezvous_1234, - buf,sizeof(OutRvous),(void *) this); - OutRvous *outbuf = (OutRvous *) buf; - - memory->destroy(proclist); - memory->sfree(inbuf); - - // set nspecial[0] and onetwo for all owned atoms based on output info - // output datums = pairs of atoms that are 1-2 neighbors - - MPI_Allreduce(&max_rvous,&maxall,1,MPI_INT,MPI_MAX,world); - memory->create(onetwo,nlocal,maxall,"special:onetwo"); - - for (i = 0; i < nlocal; i++) nspecial[i][0] = 0; - - for (m = 0; m < nreturn; m++) { - i = atom->map(outbuf[m].atomID); - onetwo[i][nspecial[i][0]++] = outbuf[m].partnerID; - } - - memory->sfree(outbuf); - - // compute and print max # of 1-2 neighbors + // print max # of 1-2 neighbors if (me == 0) { if (screen) fprintf(screen," %d = max # of 1-2 neighbors\n",maxall); if (logfile) fprintf(logfile," %d = max # of 1-2 neighbors\n",maxall); } - // ----------------------------------------------------- // done if special_bond weights for 1-3, 1-4 are set to 1.0 - // ----------------------------------------------------- if (force->special_lj[2] == 1.0 && force->special_coul[2] == 1.0 && force->special_lj[3] == 1.0 && force->special_coul[3] == 1.0) { dedup(); combine(); fix_alteration(); + memory->destroy(procowner); + memory->destroy(atomIDs); timer_output(time1); return; } - // ----------------------------------------------------- - // compute nspecial[i][1] = # of 1-3 neighbors of atom i + // tally nspecial[i][1] = # of 1-3 neighbors of atom i // create onethree[i] = list of 1-3 neighbors for atom i - // ----------------------------------------------------- - // ncount = # of my datums to send - // include nlocal datums with owner of each atom + onethree_build(); - ncount = nlocal; - for (i = 0; i < nlocal; i++) ncount += nspecial[i][0]*(nspecial[i][0]-1); - - memory->create(proclist,ncount,"special:proclist"); - inbuf = (InRvous *) - memory->smalloc((bigint) ncount*sizeof(InRvous),"special:inbuf"); - - // setup input buf to rendezvous comm - // input datums = all pairs of onetwo atoms (they are 1-3 neighbors) - // owning proc for each datum = random hash of atomID - // one datum for each owned atom: datum = owning proc, atomID - // one datum for each onetwo pair: datum = atomID1, atomID2 - - m = 0; - for (i = 0; i < nlocal; i++) { - proclist[m] = hashlittle(&tag[i],sizeof(tagint),0) % nprocs; - inbuf[m].me = me; - inbuf[m].atomID = tag[i]; - inbuf[m].partnerID = 0; - m++; - - for (j = 0; j < nspecial[i][0]; j++) { - proc = hashlittle(&onetwo[i][j],sizeof(tagint),0) % nprocs; - for (k = 0; k < nspecial[i][0]; k++) { - if (j == k) continue; - proclist[m] = proc; - inbuf[m].me = -1; - inbuf[m].atomID = onetwo[i][j]; - inbuf[m].partnerID = onetwo[i][k]; - m++; - } - } - } - - // perform rendezvous operation - // each proc owns random subset of atoms - // receives all info to form and return their onethree lists - - nreturn = comm->rendezvous(ncount,proclist,(char *) inbuf,sizeof(InRvous), - rendezvous_1234, - buf,sizeof(OutRvous),(void *) this); - outbuf = (OutRvous *) buf; - - memory->destroy(proclist); - memory->sfree(inbuf); - - // set nspecial[1] and onethree for all owned atoms based on output info - // output datums = pairs of atoms that are 1-3 neighbors - - MPI_Allreduce(&max_rvous,&maxall,1,MPI_INT,MPI_MAX,world); - memory->create(onethree,nlocal,maxall,"special:onethree"); - - for (i = 0; i < nlocal; i++) nspecial[i][1] = 0; - - for (m = 0; m < nreturn; m++) { - i = atom->map(outbuf[m].atomID); - onethree[i][nspecial[i][1]++] = outbuf[m].partnerID; - } - - memory->destroy(outbuf); - - // compute and print max # of 1-3 neighbors + // print max # of 1-3 neighbors if (me == 0) { if (screen) fprintf(screen," %d = max # of 1-3 neighbors\n",maxall); @@ -271,79 +128,18 @@ void Special::build() if (force->special_angle) angle_trim(); combine(); fix_alteration(); + memory->destroy(procowner); + memory->destroy(atomIDs); timer_output(time1); return; } - // ----------------------------------------------------- - // compute nspecial[i][2] = # of 1-4 neighbors of atom i + // tally nspecial[i][2] = # of 1-4 neighbors of atom i // create onefour[i] = list of 1-4 neighbors for atom i - // ----------------------------------------------------- - // ncount = # of my datums to send - // include nlocal datums with owner of each atom + onefour_build(); - ncount = nlocal; - for (i = 0; i < nlocal; i++) ncount += nspecial[i][1]*nspecial[i][0]; - - memory->create(proclist,ncount,"special:proclist"); - inbuf = (InRvous *) - memory->smalloc((bigint) ncount*sizeof(InRvous),"special:inbuf"); - - // setup input buf to rendezvous comm - // input datums = all pairs of onethree and onetwo atoms (they're 1-4 neighbors) - // owning proc for each datum = random hash of atomID - // one datum for each owned atom: datum = owning proc, atomID - // one datum for each onethree/onetwo pair: datum = atomID1, atomID2 - - m = 0; - for (i = 0; i < nlocal; i++) { - proclist[m] = hashlittle(&tag[i],sizeof(tagint),0) % nprocs; - inbuf[m].me = me; - inbuf[m].atomID = tag[i]; - inbuf[m].partnerID = 0; - m++; - - for (j = 0; j < nspecial[i][1]; j++) { - proc = hashlittle(&onethree[i][j],sizeof(tagint),0) % nprocs; - for (k = 0; k < nspecial[i][0]; k++) { - proclist[m] = proc; - inbuf[m].me = -1; - inbuf[m].atomID = onethree[i][j]; - inbuf[m].partnerID = onetwo[i][k]; - m++; - } - } - } - - // perform rendezvous operation - // each proc owns random subset of atoms - // receives all info to form and return their onefour lists - - nreturn = comm->rendezvous(ncount,proclist,(char *) inbuf,sizeof(InRvous), - rendezvous_1234, - buf,sizeof(OutRvous),(void *) this); - outbuf = (OutRvous *) buf; - - memory->destroy(proclist); - memory->sfree(inbuf); - - // set nspecial[2] and onefour for all owned atoms based on output info - // output datums = pairs of atoms that are 1-4 neighbors - - MPI_Allreduce(&max_rvous,&maxall,1,MPI_INT,MPI_MAX,world); - memory->create(onefour,nlocal,maxall,"special:onefour"); - - for (i = 0; i < nlocal; i++) nspecial[i][2] = 0; - - for (m = 0; m < nreturn; m++) { - i = atom->map(outbuf[m].atomID); - onefour[i][nspecial[i][2]++] = outbuf[m].partnerID; - } - - memory->destroy(outbuf); - - // compute and print max # of 1-4 neighbors + // print max # of 1-4 neighbors if (me == 0) { if (screen) fprintf(screen," %d = max # of 1-4 neighbors\n",maxall); @@ -357,9 +153,380 @@ void Special::build() if (force->special_dihedral) dihedral_trim(); combine(); fix_alteration(); + memory->destroy(procowner); + memory->destroy(atomIDs); + timer_output(time1); } +/* ---------------------------------------------------------------------- + setup atomIDs and procowner +------------------------------------------------------------------------- */ + +void Special::atom_owners() +{ + tagint *tag = atom->tag; + int nlocal = atom->nlocal; + + int *proclist; + memory->create(proclist,nlocal,"special:proclist"); + IDRvous *idbuf = (IDRvous *) + memory->smalloc((bigint) nlocal*sizeof(IDRvous),"special:idbuf"); + + // setup input buf to rendezvous comm + // input datums = pairs of bonded atoms + // owning proc for each datum = random hash of atomID + // one datum for each owned atom: datum = owning proc, atomID + // one datum for each bond partner: datum = atomID, bond partner ID + // add inverted datum when netwon_bond on + + for (int i = 0; i < nlocal; i++) { + //proc = hashlittle(&tag[i],sizeof(tagint),0) % nprocs; + proclist[i] = tag[i] % nprocs; + idbuf[i].me = me; + idbuf[i].atomID = tag[i]; + } + + // perform rendezvous operation + // each proc owns random subset of atoms + // receives all info to form and return their onetwo lists + + char *buf; + comm->rendezvous(nlocal,proclist, + (char *) idbuf,sizeof(PairRvous), + rendezvous_ids,buf,sizeof(PairRvous), + (void *) this); + + memory->destroy(proclist); + memory->sfree(idbuf); +} + +/* ---------------------------------------------------------------------- + onetwo build +------------------------------------------------------------------------- */ + +void Special::onetwo_build_newton() +{ + int i,j,m; + + tagint *tag = atom->tag; + int *num_bond = atom->num_bond; + tagint **bond_atom = atom->bond_atom; + int **nspecial = atom->nspecial; + int nlocal = atom->nlocal; + + // ncount = # of my datums to send + // include nlocal datums with owner of each atom + + int ncount = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < num_bond[i]; j++) { + m = atom->map(bond_atom[i][j]); + if (m < 0 || m >= nlocal) ncount++; + } + } + + int *proclist; + memory->create(proclist,ncount,"special:proclist"); + PairRvous *inbuf = (PairRvous *) + memory->smalloc((bigint) ncount*sizeof(PairRvous),"special:inbuf"); + + // setup input buf to rendezvous comm + // input datums = pairs of bonded atoms + // owning proc for each datum = random hash of atomID + // one datum for each owned atom: datum = owning proc, atomID + // one datum for each bond partner: datum = atomID, bond partner ID + // add inverted datum when netwon_bond on + + ncount = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < num_bond[i]; j++) { + m = atom->map(bond_atom[i][j]); + if (m >= 0 && m < nlocal) continue; + proclist[ncount] = bond_atom[i][j] % nprocs; + inbuf[ncount].atomID = bond_atom[i][j]; + inbuf[ncount].partnerID = tag[i]; + ncount++; + } + } + + // perform rendezvous operation + // each proc owns random subset of atoms + // receives all info to form and return their onetwo lists + + char *buf; + int nreturn = comm->rendezvous(ncount,proclist, + (char *) inbuf,sizeof(PairRvous), + rendezvous_1234,buf,sizeof(PairRvous), + (void *) this); + PairRvous *outbuf = (PairRvous *) buf; + + memory->destroy(proclist); + memory->sfree(inbuf); + + // set nspecial[0] and onetwo for all owned atoms based on output info + // output datums = pairs of atoms that are 1-2 neighbors + + for (i = 0; i < nlocal; i++) { + nspecial[i][0] = num_bond[i]; + for (j = 0; j < num_bond[i]; j++) { + m = atom->map(bond_atom[i][j]); + if (m >= 0 && m < nlocal) nspecial[m][0]++; + } + } + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + nspecial[i][0]++; + } + + int max = 0; + for (i = 0; i < nlocal; i++) + max = MAX(max,nspecial[i][0]); + + MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); + memory->create(onetwo,nlocal,maxall,"special:onetwo"); + + for (i = 0; i < nlocal; i++) nspecial[i][0] = 0; + + for (i = 0; i < nlocal; i++) { + for (j = 0; j < num_bond[i]; j++) { + onetwo[i][nspecial[i][0]++] = bond_atom[i][j]; + m = atom->map(bond_atom[i][j]); + if (m >= 0 && m < nlocal) onetwo[m][nspecial[m][0]++] = tag[i]; + } + } + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + onetwo[i][nspecial[i][0]++] = outbuf[m].partnerID; + } + + memory->sfree(outbuf); +} + +/* ---------------------------------------------------------------------- + onetwo build with newton_bond flag off + no need for rendezvous comm +------------------------------------------------------------------------- */ + +void Special::onetwo_build_newton_off() +{ +} + +/* ---------------------------------------------------------------------- + onetwo build with newton_bond flag off + no need for rendezvous comm +------------------------------------------------------------------------- */ + +void Special::onethree_build() +{ + int i,j,k,m,proc; + + tagint *tag = atom->tag; + int **nspecial = atom->nspecial; + int nlocal = atom->nlocal; + + // ncount = # of my datums to send + // include nlocal datums with owner of each atom + + int ncount = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < nspecial[i][0]; j++) { + m = atom->map(onetwo[i][j]); + if (m < 0 || m >= nlocal) ncount += nspecial[i][0]-1; + } + } + + int *proclist; + memory->create(proclist,ncount,"special:proclist"); + PairRvous *inbuf = (PairRvous *) + memory->smalloc((bigint) ncount*sizeof(PairRvous),"special:inbuf"); + + // setup input buf to rendezvous comm + // input datums = all pairs of onetwo atoms (they are 1-3 neighbors) + // owning proc for each datum = random hash of atomID + // one datum for each owned atom: datum = owning proc, atomID + // one datum for each onetwo pair: datum = atomID1, atomID2 + + ncount = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < nspecial[i][0]; j++) { + m = atom->map(onetwo[i][j]); + if (m >= 0 && m < nlocal) continue; + proc = onetwo[i][j] % nprocs; + for (k = 0; k < nspecial[i][0]; k++) { + if (j == k) continue; + proclist[ncount] = proc; + inbuf[ncount].atomID = onetwo[i][j]; + inbuf[ncount].partnerID = onetwo[i][k]; + ncount++; + } + } + } + + // perform rendezvous operation + // each proc owns random subset of atoms + // receives all info to form and return their onethree lists + + char *buf; + int nreturn = comm->rendezvous(ncount,proclist, + (char *) inbuf,sizeof(PairRvous), + rendezvous_1234,buf,sizeof(PairRvous), + (void *) this); + PairRvous *outbuf = (PairRvous *) buf; + + memory->destroy(proclist); + memory->sfree(inbuf); + + // set nspecial[1] and onethree for all owned atoms based on output info + // output datums = pairs of atoms that are 1-3 neighbors + + for (i = 0; i < nlocal; i++) { + for (j = 0; j < nspecial[i][0]; j++) { + m = atom->map(onetwo[i][j]); + if (m >= 0 && m < nlocal) nspecial[m][1] += nspecial[i][0]-1; + } + } + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + nspecial[i][1]++; + } + + int max = 0; + for (i = 0; i < nlocal; i++) + max = MAX(max,nspecial[i][1]); + + MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); + memory->create(onethree,nlocal,maxall,"special:onethree"); + + for (i = 0; i < nlocal; i++) nspecial[i][1] = 0; + + for (i = 0; i < nlocal; i++) { + for (j = 0; j < nspecial[i][0]; j++) { + m = atom->map(onetwo[i][j]); + if (m < 0 || m >= nlocal) continue; + for (k = 0; k < nspecial[i][0]; k++) { + if (j == k) continue; + onethree[m][nspecial[m][1]++] = onetwo[i][k]; + } + } + } + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + onethree[i][nspecial[i][1]++] = outbuf[m].partnerID; + } + + memory->sfree(outbuf); +} + +/* ---------------------------------------------------------------------- + remove duplicates within each of onetwo, onethree, onefour individually +------------------------------------------------------------------------- */ + +void Special::onefour_build() +{ + int i,j,k,m,proc; + + tagint *tag = atom->tag; + int **nspecial = atom->nspecial; + int nlocal = atom->nlocal; + + // ncount = # of my datums to send + // include nlocal datums with owner of each atom + + int ncount = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < nspecial[i][1]; j++) { + m = atom->map(onethree[i][j]); + if (m < 0 || m >= nlocal) ncount += nspecial[i][0]; + } + } + + int *proclist; + memory->create(proclist,ncount,"special:proclist"); + PairRvous *inbuf = (PairRvous *) + memory->smalloc((bigint) ncount*sizeof(PairRvous),"special:inbuf"); + + // setup input buf to rendezvous comm + // input datums = all pairs of onethree and onetwo atoms (they're 1-4 neighbors) + // owning proc for each datum = random hash of atomID + // one datum for each owned atom: datum = owning proc, atomID + // one datum for each onethree/onetwo pair: datum = atomID1, atomID2 + + ncount = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < nspecial[i][1]; j++) { + m = atom->map(onethree[i][j]); + if (m >= 0 && m < nlocal) continue; + proc = onethree[i][j] % nprocs; + for (k = 0; k < nspecial[i][0]; k++) { + proclist[ncount] = proc; + inbuf[ncount].atomID = onethree[i][j]; + inbuf[ncount].partnerID = onetwo[i][k]; + ncount++; + } + } + } + + // perform rendezvous operation + // each proc owns random subset of atoms + // receives all info to form and return their onefour lists + + char *buf; + int nreturn = comm->rendezvous(ncount,proclist, + (char *) inbuf,sizeof(PairRvous), + rendezvous_1234,buf,sizeof(PairRvous), + (void *) this); + PairRvous *outbuf = (PairRvous *) buf; + + memory->destroy(proclist); + memory->sfree(inbuf); + + // set nspecial[2] and onefour for all owned atoms based on output info + // output datums = pairs of atoms that are 1-4 neighbors + + for (i = 0; i < nlocal; i++) { + for (j = 0; j < nspecial[i][1]; j++) { + m = atom->map(onethree[i][j]); + if (m >= 0 && m < nlocal) nspecial[m][2] += nspecial[i][0]; + } + } + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + nspecial[i][2]++; + } + + int max = 0; + for (i = 0; i < nlocal; i++) + max = MAX(max,nspecial[i][2]); + + MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); + memory->create(onefour,nlocal,maxall,"special:onefour"); + + for (i = 0; i < nlocal; i++) nspecial[i][2] = 0; + + for (i = 0; i < nlocal; i++) { + for (j = 0; j < nspecial[i][1]; j++) { + m = atom->map(onethree[i][j]); + if (m < 0 || m >= nlocal) continue; + for (k = 0; k < nspecial[i][0]; k++) { + onefour[m][nspecial[m][2]++] = onetwo[i][k]; + } + } + } + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + onefour[i][nspecial[i][2]++] = outbuf[m].partnerID; + } + + memory->sfree(outbuf); +} + /* ---------------------------------------------------------------------- remove duplicates within each of onetwo, onethree, onefour individually ------------------------------------------------------------------------- */ @@ -662,8 +829,8 @@ void Special::angle_trim() int *proclist; memory->create(proclist,ncount,"special:proclist"); - InRvous *inbuf = (InRvous *) - memory->smalloc((bigint) ncount*sizeof(InRvous),"special:inbuf"); + PairRvous *inbuf = (PairRvous *) + memory->smalloc((bigint) ncount*sizeof(PairRvous),"special:inbuf"); // setup input buf to rendezvous comm // one datum for each owned atom: datum = proc, atomID @@ -675,16 +842,9 @@ void Special::angle_trim() m = 0; for (i = 0; i < nlocal; i++) { - proc = hashlittle(&tag[i],sizeof(tagint),0) % nprocs; - proclist[m] = proc; - inbuf[m].me = me; - inbuf[m].atomID = tag[i]; - inbuf[m].partnerID = 0; - m++; - for (j = 0; j < nspecial[i][1]; j++) { proclist[m] = proc; - inbuf[m].me = -1; + //inbuf[m].me = -1; inbuf[m].atomID = tag[i]; inbuf[m].partnerID = onethree[i][j]; m++; @@ -695,13 +855,13 @@ void Special::angle_trim() if (index < 0 || index >= nlocal) continue; proclist[m] = hashlittle(&angle_atom1[i][j],sizeof(tagint),0) % nprocs; - inbuf[m].me = -2; + //inbuf[m].me = -2; inbuf[m].atomID = angle_atom1[i][j]; inbuf[m].partnerID = angle_atom3[i][j]; m++; proclist[m] = hashlittle(&angle_atom3[i][j],sizeof(tagint),0) % nprocs; - inbuf[m].me = -2; + //inbuf[m].me = -2; inbuf[m].atomID = angle_atom3[i][j]; inbuf[m].partnerID = angle_atom1[i][j]; m++; @@ -712,25 +872,25 @@ void Special::angle_trim() if (index < 0 || index >= nlocal) continue; proclist[m] = hashlittle(&dihedral_atom1[i][j],sizeof(tagint),0) % nprocs; - inbuf[m].me = -2; + //inbuf[m].me = -2; inbuf[m].atomID = dihedral_atom1[i][j]; inbuf[m].partnerID = dihedral_atom3[i][j]; m++; proclist[m] = hashlittle(&dihedral_atom2[i][j],sizeof(tagint),0) % nprocs; - inbuf[m].me = -2; + //inbuf[m].me = -2; inbuf[m].atomID = dihedral_atom2[i][j]; inbuf[m].partnerID = dihedral_atom4[i][j]; m++; proclist[m] = hashlittle(&dihedral_atom3[i][j],sizeof(tagint),0) % nprocs; - inbuf[m].me = -2; + //inbuf[m].me = -2; inbuf[m].atomID = dihedral_atom3[i][j]; inbuf[m].partnerID = dihedral_atom1[i][j]; m++; proclist[m] = hashlittle(&dihedral_atom4[i][j],sizeof(tagint),0) % nprocs; - inbuf[m].me = -2; + //inbuf[m].me = -2; inbuf[m].atomID = dihedral_atom4[i][j]; inbuf[m].partnerID = dihedral_atom2[i][j]; m++; @@ -743,10 +903,11 @@ void Special::angle_trim() // when done: each atom has atom ID of owning atom of its body char *buf; - int nreturn = comm->rendezvous(ncount,proclist,(char *) inbuf,sizeof(InRvous), - rendezvous_trim, - buf,sizeof(OutRvous),(void *) this); - OutRvous *outbuf = (OutRvous *) buf; + int nreturn = comm->rendezvous(ncount,proclist, + (char *) inbuf,sizeof(PairRvous), + rendezvous_trim,buf,sizeof(PairRvous), + (void *) this); + PairRvous *outbuf = (PairRvous *) buf; memory->destroy(proclist); memory->sfree(inbuf); @@ -836,8 +997,8 @@ void Special::dihedral_trim() int *proclist; memory->create(proclist,ncount,"special:proclist"); - InRvous *inbuf = (InRvous *) - memory->smalloc((bigint) ncount*sizeof(InRvous),"special:inbuf"); + PairRvous *inbuf = (PairRvous *) + memory->smalloc((bigint) ncount*sizeof(PairRvous),"special:inbuf"); // setup input buf to rendezvous comm // one datum for each owned atom: datum = proc, atomID @@ -851,14 +1012,14 @@ void Special::dihedral_trim() for (i = 0; i < nlocal; i++) { proc = hashlittle(&tag[i],sizeof(tagint),0) % nprocs; proclist[m] = proc; - inbuf[m].me = me; + //inbuf[m].me = me; inbuf[m].atomID = tag[i]; inbuf[m].partnerID = 0; m++; for (j = 0; j < nspecial[i][2]; j++) { proclist[m] = proc; - inbuf[m].me = -1; + //inbuf[m].me = -1; inbuf[m].atomID = tag[i]; inbuf[m].partnerID = onefour[i][j]; m++; @@ -869,13 +1030,13 @@ void Special::dihedral_trim() if (index < 0 || index >= nlocal) continue; proclist[m] = hashlittle(&dihedral_atom1[i][j],sizeof(tagint),0) % nprocs; - inbuf[m].me = -2; + //inbuf[m].me = -2; inbuf[m].atomID = dihedral_atom1[i][j]; inbuf[m].partnerID = dihedral_atom4[i][j]; m++; proclist[m] = hashlittle(&dihedral_atom4[i][j],sizeof(tagint),0) % nprocs; - inbuf[m].me = -2; + //inbuf[m].me = -2; inbuf[m].atomID = dihedral_atom4[i][j]; inbuf[m].partnerID = dihedral_atom1[i][j]; m++; @@ -888,10 +1049,11 @@ void Special::dihedral_trim() // when done: each atom has atom ID of owning atom of its body char *buf; - int nreturn = comm->rendezvous(ncount,proclist,(char *) inbuf,sizeof(InRvous), - rendezvous_trim, - buf,sizeof(OutRvous),(void *) this); - OutRvous *outbuf = (OutRvous *) buf; + int nreturn = comm->rendezvous(ncount,proclist, + (char *) inbuf,sizeof(PairRvous), + rendezvous_trim,buf,sizeof(PairRvous), + (void *) this); + PairRvous *outbuf = (PairRvous *) buf; memory->destroy(proclist); memory->sfree(inbuf); @@ -931,16 +1093,55 @@ void Special::dihedral_trim() /* ---------------------------------------------------------------------- process data for atoms assigned to me in rendezvous decomposition - inbuf = list of N InRvous datums - create outbuf = list of Nout OutRvous datums + inbuf = list of N PairRvous datums + outbuf = empty +------------------------------------------------------------------------- */ + +int Special::rendezvous_ids(int n, char *inbuf, + int &flag, int *&proclist, char *&outbuf, + void *ptr) +{ + Special *sptr = (Special *) ptr; + Memory *memory = sptr->memory; + + int *procowner; + tagint *atomIDs; + + memory->create(procowner,n,"special:procowner"); + memory->create(atomIDs,n,"special:atomIDs"); + // NOTE: when to free these vectors + + IDRvous *in = (IDRvous *) inbuf; + + for (int i = 0; i < n; i++) { + procowner[i] = in[i].me; + atomIDs[i] = in[i].atomID; + } + + // store rendezvous data in Special class + + sptr->ncount = n; + sptr->procowner = procowner; + sptr->atomIDs = atomIDs; + + proclist = NULL; + outbuf = NULL; + + flag = 0; + return 0; +} + + +/* ---------------------------------------------------------------------- + process data for atoms assigned to me in rendezvous decomposition + inbuf = list of N PairRvous datums + outbuf = same list of N PairRvous datums, routed to different procs ------------------------------------------------------------------------- */ int Special::rendezvous_1234(int n, char *inbuf, - int *&proclist, char *&outbuf, + int &flag, int *&proclist, char *&outbuf, void *ptr) { - int i,j,m; - Special *sptr = (Special *) ptr; Atom *atom = sptr->atom; Memory *memory = sptr->memory; @@ -950,105 +1151,52 @@ int Special::rendezvous_1234(int n, char *inbuf, atom->map_clear(); - // initialize hash - // ncount = number of atoms assigned to me - // key = atom ID - // value = index into Ncount-length data structure - - InRvous *in = (InRvous *) inbuf; - //std::map hash; - tagint id; + // hash atom IDs stored in rendezvous decomposition - int ncount = 0; - for (i = 0; i < n; i++) - if (in[i].me >= 0) - //hash[in[i].atomID] = ncount++; - atom->map_one(in[i].atomID,ncount++); + int ncount = sptr->ncount; + tagint *atomIDs = sptr->atomIDs; - // procowner = caller proc that owns each atom - // atomID = ID of each rendezvous atom I own + for (int i = 0; i < ncount; i++) + atom->map_one(atomIDs[i],i); - int *procowner,*npartner; - tagint *atomID; - memory->create(procowner,ncount,"special:procowner"); - memory->create(atomID,ncount,"special:atomID"); - memory->create(npartner,ncount,"special:npartner"); - for (m = 0; m < ncount; m++) npartner[m] = 0; + // proclist = owner of atomID in caller decomposition + + PairRvous *in = (PairRvous *) inbuf; + int *procowner = sptr->procowner; + memory->create(proclist,n,"special:proclist"); - for (i = 0; i < n; i++) { - //m = hash.find(in[i].atomID)->second; + int m; + for (int i = 0; i < n; i++) { m = atom->map(in[i].atomID); - if (in[i].me >= 0) { - procowner[m] = in[i].me; - atomID[m] = in[i].atomID; - } else npartner[m]++; + proclist[i] = procowner[m]; } - int max = 0; - for (m = 0; m < ncount; m++) - max = MAX(max,npartner[m]); - sptr->max_rvous = max; - - int **partner; - memory->create(partner,ncount,max,"special:partner"); - for (m = 0; m < ncount; m++) npartner[m] = 0; - - for (i = 0; i < n; i++) { - if (in[i].me >= 0) continue; - //m = hash.find(in[i].atomID)->second; - m = atom->map(in[i].atomID); - partner[m][npartner[m]++] = in[i].partnerID; - } - - // pass list of OutRvous datums back to comm->rendezvous - - int nout = 0; - for (m = 0; m < ncount; m++) nout += npartner[m]; - - memory->create(proclist,nout,"special:proclist"); - OutRvous *out = (OutRvous *) - memory->smalloc((bigint) nout*sizeof(OutRvous),"special:out"); - - nout = 0; - for (m = 0; m < ncount; m++) - for (j = 0; j < npartner[m]; j++) { - proclist[nout] = procowner[m]; - out[nout].atomID = atomID[m]; - out[nout].partnerID = partner[m][j]; - nout++; - } - - outbuf = (char *) out; - - // clean up - // Comm::rendezvous will delete proclist and out (outbuf) - - memory->destroy(procowner); - memory->destroy(atomID); - memory->destroy(npartner); - memory->destroy(partner); - + outbuf = inbuf; + // NOTE: set out = in flag + // re-create atom map atom->map_init(0); atom->nghost = 0; atom->map_set(); - return nout; + flag = 1; + return n; } /* ---------------------------------------------------------------------- process data for atoms assigned to me in rendezvous decomposition - inbuf = list of N InRvous datums - create outbuf = list of Nout OutRvous datums + inbuf = list of N PairRvous datums + create outbuf = list of Nout PairRvous datums ------------------------------------------------------------------------- */ int Special::rendezvous_trim(int n, char *inbuf, - int *&proclist, char *&outbuf, + int &flag, int *&proclist, char *&outbuf, void *ptr) { int i,j,m; + /* Special *sptr = (Special *) ptr; Atom *atom = sptr->atom; Memory *memory = sptr->memory; @@ -1063,7 +1211,7 @@ int Special::rendezvous_trim(int n, char *inbuf, // key = atom ID // value = index into Ncount-length data structure - InRvous *in = (InRvous *) inbuf; + PairRvous *in = (PairRvous *) inbuf; //std::map hash; tagint id; @@ -1131,14 +1279,14 @@ int Special::rendezvous_trim(int n, char *inbuf, } } - // pass list of OutRvous datums back to comm->rendezvous + // pass list of PairRvous datums back to comm->rendezvous int nout = 0; for (m = 0; m < ncount; m++) nout += npartner[m]; memory->create(proclist,nout,"special:proclist"); - OutRvous *out = (OutRvous *) - memory->smalloc((bigint) nout*sizeof(OutRvous),"special:out"); + PairRvous *out = (PairRvous *) + memory->smalloc((bigint) nout*sizeof(PairRvous),"special:out"); nout = 0; for (m = 0; m < ncount; m++) @@ -1167,7 +1315,11 @@ int Special::rendezvous_trim(int n, char *inbuf, atom->nghost = 0; atom->map_set(); - return nout; + */ + + //return nout; + flag = 2; + return 0; } /* ---------------------------------------------------------------------- diff --git a/src/special.h b/src/special.h index f7892075ac..772ba613ac 100644 --- a/src/special.h +++ b/src/special.h @@ -26,20 +26,29 @@ class Special : protected Pointers { private: int me,nprocs; + int maxall; tagint **onetwo,**onethree,**onefour; // data used by rendezvous callback methods - int max_rvous; + int ncount; + tagint *atomIDs; + int *procowner; - struct InRvous { + struct IDRvous { int me; + tagint atomID; + }; + + struct PairRvous { tagint atomID,partnerID; }; - struct OutRvous { - tagint atomID,partnerID; - }; + void atom_owners(); + void onetwo_build_newton(); + void onetwo_build_newton_off(); + void onethree_build(); + void onefour_build(); void dedup(); void angle_trim(); @@ -48,10 +57,11 @@ class Special : protected Pointers { void fix_alteration(); void timer_output(double); - // callback function for rendezvous communication + // callback functions for rendezvous communication - static int rendezvous_1234(int, char *, int *&, char *&, void *); - static int rendezvous_trim(int, char *, int *&, char *&, void *); + static int rendezvous_ids(int, char *, int &, int *&, char *&, void *); + static int rendezvous_1234(int, char *, int &, int *&, char *&, void *); + static int rendezvous_trim(int, char *, int &, int *&, char *&, void *); }; } From 7c3d6dc051976664acf315a321e2846c93d407e2 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Fri, 7 Dec 2018 16:43:01 -0700 Subject: [PATCH 04/12] propagate rendezvous changes to fix rigid/small --- src/RIGID/fix_rigid_small.cpp | 3 ++- src/RIGID/fix_rigid_small.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp index fc51e0aa4f..3da516894c 100644 --- a/src/RIGID/fix_rigid_small.cpp +++ b/src/RIGID/fix_rigid_small.cpp @@ -1611,7 +1611,7 @@ void FixRigidSmall::create_bodies(tagint *bodyID) ------------------------------------------------------------------------- */ int FixRigidSmall::rendezvous_body(int n, char *inbuf, - int *&proclist, char *&outbuf, + int &rflag, int *&proclist, char *&outbuf, void *ptr) { int i,j,m; @@ -1749,6 +1749,7 @@ int FixRigidSmall::rendezvous_body(int n, char *inbuf, memory->destroy(iclose); memory->destroy(rsqclose); + rflag = 2; return nout; } diff --git a/src/RIGID/fix_rigid_small.h b/src/RIGID/fix_rigid_small.h index a820efcdea..f6ad1b7206 100644 --- a/src/RIGID/fix_rigid_small.h +++ b/src/RIGID/fix_rigid_small.h @@ -209,7 +209,7 @@ class FixRigidSmall : public Fix { // callback function for rendezvous communication - static int rendezvous_body(int, char *, int *&, char *&, void *); + static int rendezvous_body(int, char *, int &, int *&, char *&, void *); // debug From e538fd5c6d9594edc15554f8bded8cd9baa32704 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Wed, 12 Dec 2018 17:14:56 -0700 Subject: [PATCH 05/12] added rendezvous alg to fix shake --- src/RIGID/fix_rigid_small.cpp | 6 +- src/RIGID/fix_shake.cpp | 795 +++++++++++++++++++++++----------- src/RIGID/fix_shake.h | 43 +- src/special.cpp | 633 +++++++++++++-------------- src/special.h | 7 +- 5 files changed, 899 insertions(+), 585 deletions(-) diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp index 3da516894c..e20c64487b 100644 --- a/src/RIGID/fix_rigid_small.cpp +++ b/src/RIGID/fix_rigid_small.cpp @@ -414,9 +414,9 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : if (comm->me == 0) { if (screen) - fprintf(screen," create_bodies CPU = %g secs\n",time2-time1); + fprintf(screen," create bodies CPU = %g secs\n",time2-time1); if (logfile) - fprintf(logfile," create_bodies CPU = %g secs\n",time2-time1); + fprintf(logfile," create bodies CPU = %g secs\n",time2-time1); } // set nlocal_body and allocate bodies I own @@ -1749,6 +1749,8 @@ int FixRigidSmall::rendezvous_body(int n, char *inbuf, memory->destroy(iclose); memory->destroy(rsqclose); + // flag = 2: new outbuf + rflag = 2; return nout; } diff --git a/src/RIGID/fix_shake.cpp b/src/RIGID/fix_shake.cpp index e0d1bf132b..66c92d42c5 100644 --- a/src/RIGID/fix_shake.cpp +++ b/src/RIGID/fix_shake.cpp @@ -219,8 +219,19 @@ FixShake::FixShake(LAMMPS *lmp, int narg, char **arg) : // identify all SHAKE clusters + double time1 = MPI_Wtime(); + find_clusters(); + double time2 = MPI_Wtime(); + + if (comm->me == 0) { + if (screen) + fprintf(screen," find clusters CPU = %g secs\n",time2-time1); + if (logfile) + fprintf(logfile," find clusters CPU = %g secs\n",time2-time1); + } + // initialize list of SHAKE clusters to constrain maxlist = 0; @@ -707,13 +718,6 @@ void FixShake::find_clusters() int nlocal = atom->nlocal; int angles_allow = atom->avec->angles_allow; - // setup ring of procs - - int next = me + 1; - int prev = me -1; - if (next == nprocs) next = 0; - if (prev < 0) prev = nprocs - 1; - // ----------------------------------------------------- // allocate arrays for self (1d) and bond partners (2d) // max = max # of bond partners for owned atoms = 2nd dim of partner arrays @@ -755,6 +759,10 @@ void FixShake::find_clusters() memory->create(partner_shake,nlocal,max,"shake:partner_shake"); memory->create(partner_nshake,nlocal,max,"shake:partner_nshake"); + // setup atomIDs and procowner vectors in rendezvous decomposition + + atom_owners(); + // ----------------------------------------------------- // set npartner and partner_tag from special arrays // ----------------------------------------------------- @@ -778,86 +786,13 @@ void FixShake::find_clusters() } // ----------------------------------------------------- - // set partner_mask, partner_type, partner_massflag, partner_bondtype - // for bonded partners - // requires communication for off-proc partners + // set partner_mask, partner_type, partner_massflag, + // partner_bondtype for all my bonded partners + // requires rendezvous communication for off-proc partners // ----------------------------------------------------- - // fill in mask, type, massflag, bondtype if own bond partner - // info to store in buf for each off-proc bond = nper = 6 - // 2 atoms IDs in bond, space for mask, type, massflag, bondtype - // nbufmax = largest buffer needed to hold info from any proc - - int nper = 6; - - nbuf = 0; - for (i = 0; i < nlocal; i++) { - for (j = 0; j < npartner[i]; j++) { - partner_mask[i][j] = 0; - partner_type[i][j] = 0; - partner_massflag[i][j] = 0; - partner_bondtype[i][j] = 0; - - m = atom->map(partner_tag[i][j]); - if (m >= 0 && m < nlocal) { - partner_mask[i][j] = mask[m]; - partner_type[i][j] = type[m]; - if (nmass) { - if (rmass) massone = rmass[m]; - else massone = mass[type[m]]; - partner_massflag[i][j] = masscheck(massone); - } - n = bondtype_findset(i,tag[i],partner_tag[i][j],0); - if (n) partner_bondtype[i][j] = n; - else { - n = bondtype_findset(m,tag[i],partner_tag[i][j],0); - if (n) partner_bondtype[i][j] = n; - } - } else nbuf += nper; - } - } - - memory->create(buf,nbuf,"shake:buf"); - - // fill buffer with info - - size = 0; - for (i = 0; i < nlocal; i++) { - for (j = 0; j < npartner[i]; j++) { - m = atom->map(partner_tag[i][j]); - if (m < 0 || m >= nlocal) { - buf[size] = tag[i]; - buf[size+1] = partner_tag[i][j]; - buf[size+2] = 0; - buf[size+3] = 0; - buf[size+4] = 0; - n = bondtype_findset(i,tag[i],partner_tag[i][j],0); - if (n) buf[size+5] = n; - else buf[size+5] = 0; - size += nper; - } - } - } - - // cycle buffer around ring of procs back to self - - comm->ring(size,sizeof(tagint),buf,1,ring_bonds,buf,(void *)this); - - // store partner info returned to me - - m = 0; - while (m < size) { - i = atom->map(buf[m]); - for (j = 0; j < npartner[i]; j++) - if (buf[m+1] == partner_tag[i][j]) break; - partner_mask[i][j] = buf[m+2]; - partner_type[i][j] = buf[m+3]; - partner_massflag[i][j] = buf[m+4]; - partner_bondtype[i][j] = buf[m+5]; - m += nper; - } - - memory->destroy(buf); + partner_info(npartner,partner_tag,partner_mask,partner_type, + partner_massflag,partner_bondtype); // error check for unfilled partner info // if partner_type not set, is an error @@ -868,17 +803,18 @@ void FixShake::find_clusters() // else it's an error flag = 0; + int flag2 = 0; for (i = 0; i < nlocal; i++) for (j = 0; j < npartner[i]; j++) { - if (partner_type[i][j] == 0) flag = 1; + if (partner_type[i][j] == 0) flag++; if (!(mask[i] & groupbit)) continue; if (!(partner_mask[i][j] & groupbit)) continue; - if (partner_bondtype[i][j] == 0) flag = 1; + if (partner_bondtype[i][j] == 0) flag2++; } MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); if (flag_all) error->all(FLERR,"Did not find fix shake partner info"); - + // ----------------------------------------------------- // identify SHAKEable bonds // set nshake[i] = # of SHAKE bonds attached to atom i @@ -931,56 +867,11 @@ void FixShake::find_clusters() // ----------------------------------------------------- // set partner_nshake for bonded partners - // requires communication for off-proc partners + // requires rendezvous communication for off-proc partners // ----------------------------------------------------- - // fill in partner_nshake if own bond partner - // info to store in buf for each off-proc bond = - // 2 atoms IDs in bond, space for nshake value - // nbufmax = largest buffer needed to hold info from any proc - - nbuf = 0; - for (i = 0; i < nlocal; i++) { - for (j = 0; j < npartner[i]; j++) { - m = atom->map(partner_tag[i][j]); - if (m >= 0 && m < nlocal) partner_nshake[i][j] = nshake[m]; - else nbuf += 3; - } - } - - memory->create(buf,nbuf,"shake:buf"); - - // fill buffer with info - - size = 0; - for (i = 0; i < nlocal; i++) { - for (j = 0; j < npartner[i]; j++) { - m = atom->map(partner_tag[i][j]); - if (m < 0 || m >= nlocal) { - buf[size] = tag[i]; - buf[size+1] = partner_tag[i][j]; - size += 3; - } - } - } - - // cycle buffer around ring of procs back to self - - comm->ring(size,sizeof(tagint),buf,2,ring_nshake,buf,(void *)this); - - // store partner info returned to me - - m = 0; - while (m < size) { - i = atom->map(buf[m]); - for (j = 0; j < npartner[i]; j++) - if (buf[m+1] == partner_tag[i][j]) break; - partner_nshake[i][j] = buf[m+2]; - m += 3; - } - - memory->destroy(buf); - + nshake_info(npartner,partner_tag,partner_nshake); + // ----------------------------------------------------- // error checks // no atom with nshake > 3 @@ -988,7 +879,7 @@ void FixShake::find_clusters() // ----------------------------------------------------- flag = 0; - for (i = 0; i < nlocal; i++) if (nshake[i] > 3) flag = 1; + for (i = 0; i < nlocal; i++) if (nshake[i] > 3) flag++; MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); if (flag_all) error->all(FLERR,"Shake cluster of more than 4 atoms"); @@ -996,7 +887,7 @@ void FixShake::find_clusters() for (i = 0; i < nlocal; i++) { if (nshake[i] <= 1) continue; for (j = 0; j < npartner[i]; j++) - if (partner_shake[i][j] && partner_nshake[i][j] > 1) flag = 1; + if (partner_shake[i][j] && partner_nshake[i][j] > 1) flag++; } MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); if (flag_all) error->all(FLERR,"Shake clusters are connected"); @@ -1067,60 +958,7 @@ void FixShake::find_clusters() // requires communication for off-proc atoms // ----------------------------------------------------- - // fill in shake arrays for each bond partner I own - // info to store in buf for each off-proc bond = - // all values from shake_flag, shake_atom, shake_type - // nbufmax = largest buffer needed to hold info from any proc - - nbuf = 0; - for (i = 0; i < nlocal; i++) { - if (shake_flag[i] == 0) continue; - for (j = 0; j < npartner[i]; j++) { - if (partner_shake[i][j] == 0) continue; - m = atom->map(partner_tag[i][j]); - if (m >= 0 && m < nlocal) { - shake_flag[m] = shake_flag[i]; - shake_atom[m][0] = shake_atom[i][0]; - shake_atom[m][1] = shake_atom[i][1]; - shake_atom[m][2] = shake_atom[i][2]; - shake_atom[m][3] = shake_atom[i][3]; - shake_type[m][0] = shake_type[i][0]; - shake_type[m][1] = shake_type[i][1]; - shake_type[m][2] = shake_type[i][2]; - } else nbuf += 9; - } - } - - memory->create(buf,nbuf,"shake:buf"); - - // fill buffer with info - - size = 0; - for (i = 0; i < nlocal; i++) { - if (shake_flag[i] == 0) continue; - for (j = 0; j < npartner[i]; j++) { - if (partner_shake[i][j] == 0) continue; - m = atom->map(partner_tag[i][j]); - if (m < 0 || m >= nlocal) { - buf[size] = partner_tag[i][j]; - buf[size+1] = shake_flag[i]; - buf[size+2] = shake_atom[i][0]; - buf[size+3] = shake_atom[i][1]; - buf[size+4] = shake_atom[i][2]; - buf[size+5] = shake_atom[i][3]; - buf[size+6] = shake_type[i][0]; - buf[size+7] = shake_type[i][1]; - buf[size+8] = shake_type[i][2]; - size += 9; - } - } - } - - // cycle buffer around ring of procs back to self - - comm->ring(size,sizeof(tagint),buf,3,ring_shake,NULL,(void *)this); - - memory->destroy(buf); + shake_info(npartner,partner_tag,partner_shake); // ----------------------------------------------------- // free local memory @@ -1199,98 +1037,549 @@ void FixShake::find_clusters() } /* ---------------------------------------------------------------------- - when receive buffer, scan bond partner IDs for atoms I own - if I own partner: - fill in mask and type and massflag - search for bond with 1st atom and fill in bondtype + setup atomIDs and procowner ------------------------------------------------------------------------- */ -void FixShake::ring_bonds(int ndatum, char *cbuf, void *ptr) +void FixShake::atom_owners() { - FixShake *fsptr = (FixShake *)ptr; - Atom *atom = fsptr->atom; + tagint *tag = atom->tag; + int nlocal = atom->nlocal; + + int *proclist; + memory->create(proclist,nlocal,"shake:proclist"); + IDRvous *idbuf = (IDRvous *) + memory->smalloc((bigint) nlocal*sizeof(IDRvous),"shake:idbuf"); + + // setup input buf to rendezvous comm + // input datums = pairs of bonded atoms + // owning proc for each datum = random hash of atomID + // one datum for each owned atom: datum = owning proc, atomID + + for (int i = 0; i < nlocal; i++) { + proclist[i] = tag[i] % nprocs; + idbuf[i].me = me; + idbuf[i].atomID = tag[i]; + } + + // perform rendezvous operation + // each proc assigned every 1/Pth atom + + char *buf; + comm->rendezvous(nlocal,proclist, + (char *) idbuf,sizeof(IDRvous), + rendezvous_ids,buf,0,(void *) this); + + memory->destroy(proclist); + memory->sfree(idbuf); +} + +/* ---------------------------------------------------------------------- + setup partner_mask, partner_type, partner_massflag, partner_bondtype +------------------------------------------------------------------------- */ + +void FixShake::partner_info(int *npartner, tagint **partner_tag, + int **partner_mask, int **partner_type, + int **partner_massflag, int **partner_bondtype) +{ + int i,j,m,n; + int nlocal = atom->nlocal; + + // nsend = # of my datums to send + // one datum for every off-processor partner + + int nsend = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < npartner[i]; j++) { + m = atom->map(partner_tag[i][j]); + if (m < 0 || m >= nlocal) nsend++; + } + } + + int *proclist; + memory->create(proclist,nsend,"special:proclist"); + PartnerInfo *inbuf = (PartnerInfo *) + memory->smalloc((bigint) nsend*sizeof(PartnerInfo),"special:inbuf"); + + // set values in 4 partner arrays for all partner atoms I own + // also setup input buf to rendezvous comm + // input datums = pair of bonded atoms where I do not own partner + // owning proc for each datum = partner_tag % nprocs + // datum: atomID = partner_tag (off-proc), partnerID = tag (on-proc) + // 4 values for my owned atom + double *rmass = atom->rmass; double *mass = atom->mass; - int *mask = atom->mask; int *type = atom->type; + int *mask = atom->mask; + tagint *tag = atom->tag; + + double massone; + + nsend = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < npartner[i]; j++) { + partner_mask[i][j] = 0; + partner_type[i][j] = 0; + partner_massflag[i][j] = 0; + partner_bondtype[i][j] = 0; + + m = atom->map(partner_tag[i][j]); + + if (m >= 0 && m < nlocal) { + partner_mask[i][j] = mask[m]; + partner_type[i][j] = type[m]; + if (nmass) { + if (rmass) massone = rmass[m]; + else massone = mass[type[m]]; + partner_massflag[i][j] = masscheck(massone); + } + n = bondtype_findset(i,tag[i],partner_tag[i][j],0); + if (n) partner_bondtype[i][j] = n; + else { + n = bondtype_findset(m,tag[i],partner_tag[i][j],0); + if (n) partner_bondtype[i][j] = n; + } + + } else { + proclist[nsend] = partner_tag[i][j] % nprocs; + inbuf[nsend].atomID = partner_tag[i][j]; + inbuf[nsend].partnerID = tag[i]; + inbuf[nsend].mask = mask[i]; + inbuf[nsend].type = type[i]; + if (nmass) { + if (rmass) massone = rmass[i]; + else massone = mass[type[i]]; + inbuf[nsend].massflag = masscheck(massone); + } else inbuf[nsend].massflag = 0; + + // my atom may own bond, in which case set partner_bondtype + // else receiver of this datum will own the bond and return the value + + n = bondtype_findset(i,tag[i],partner_tag[i][j],0); + if (n) { + partner_bondtype[i][j] = n; + inbuf[nsend].bondtype = n; + } else inbuf[nsend].bondtype = 0; + + nsend++; + } + } + } + + // perform rendezvous operation + // each proc owns random subset of atoms + // receives all data needed to populate un-owned partner 4 values + + char *buf; + int nreturn = comm->rendezvous(nsend,proclist, + (char *) inbuf,sizeof(PartnerInfo), + rendezvous_partners_info,buf,sizeof(PartnerInfo), + (void *) this); + PartnerInfo *outbuf = (PartnerInfo *) buf; + + memory->destroy(proclist); + memory->sfree(inbuf); + + // set partner 4 values for un-onwed partners based on output info + // outbuf.atomID = my owned atom, outbuf.partnerID = partner the info is for + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + for (j = 0; j < npartner[i]; j++) + if (partner_tag[i][j] == outbuf[m].partnerID) break; + partner_mask[i][j] = outbuf[m].mask; + partner_type[i][j] = outbuf[m].type; + partner_massflag[i][j] = outbuf[m].massflag; + + // only set partner_bondtype if my atom did not set it when setting up rendezvous + // if this proc set it, then sender of this datum set outbuf.bondtype = 0 + + if (partner_bondtype[i][j] == 0) + partner_bondtype[i][j] = outbuf[m].bondtype; + } + + memory->sfree(outbuf); +} + +/* ---------------------------------------------------------------------- + setup partner_nshake +------------------------------------------------------------------------- */ + +void FixShake::nshake_info(int *npartner, tagint **partner_tag, + int **partner_nshake) +{ + int i,j,m,n; int nlocal = atom->nlocal; + + // nsend = # of my datums to send + // one datum for every off-processor partner + + int nsend = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < npartner[i]; j++) { + m = atom->map(partner_tag[i][j]); + if (m < 0 || m >= nlocal) nsend++; + } + } + + int *proclist; + memory->create(proclist,nsend,"special:proclist"); + NShakeInfo *inbuf = (NShakeInfo *) + memory->smalloc((bigint) nsend*sizeof(NShakeInfo),"special:inbuf"); + + // set partner_nshake for all partner atoms I own + // also setup input buf to rendezvous comm + // input datums = pair of bonded atoms where I do not own partner + // owning proc for each datum = partner_tag % nprocs + // datum: atomID = partner_tag (off-proc), partnerID = tag (on-proc) + // nshake value for my owned atom + + tagint *tag = atom->tag; + + nsend = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < npartner[i]; j++) { + partner_nshake[i][j] = 0; + m = atom->map(partner_tag[i][j]); + if (m >= 0 && m < nlocal) { + partner_nshake[i][j] = nshake[m]; + } else { + proclist[nsend] = partner_tag[i][j] % nprocs; + inbuf[nsend].atomID = partner_tag[i][j]; + inbuf[nsend].partnerID = tag[i]; + inbuf[nsend].nshake = nshake[i]; + nsend++; + } + } + } + + // perform rendezvous operation + // each proc owns random subset of atoms + // receives all data needed to populate un-owned partner nshake + + char *buf; + int nreturn = comm->rendezvous(nsend,proclist, + (char *) inbuf,sizeof(NShakeInfo), + rendezvous_nshake,buf,sizeof(NShakeInfo), + (void *) this); + NShakeInfo *outbuf = (NShakeInfo *) buf; + + memory->destroy(proclist); + memory->sfree(inbuf); + + // set partner nshake for un-onwed partners based on output info + // outbuf.atomID = my owned atom, outbuf.partnerID = partner the info is for + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + for (j = 0; j < npartner[i]; j++) + if (partner_tag[i][j] == outbuf[m].partnerID) break; + partner_nshake[i][j] = outbuf[m].nshake; + } + + memory->sfree(outbuf); +} + +/* ---------------------------------------------------------------------- + setup shake_flag, shake_atom, shake_type +------------------------------------------------------------------------- */ + +void FixShake::shake_info(int *npartner, tagint **partner_tag, + int **partner_shake) +{ + int i,j,m,n; + int nlocal = atom->nlocal; + + // nsend = # of my datums to send + // one datum for every off-processor partner + + int nsend = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < npartner[i]; j++) { + m = atom->map(partner_tag[i][j]); + if (m < 0 || m >= nlocal) nsend++; + } + } + + int *proclist; + memory->create(proclist,nsend,"special:proclist"); + ShakeInfo *inbuf = (ShakeInfo *) + memory->smalloc((bigint) nsend*sizeof(ShakeInfo),"special:inbuf"); + + // set 3 shake arrays for all partner atoms I own + // also setup input buf to rendezvous comm + // input datums = partner atom where I do not own partner + // owning proc for each datum = partner_tag % nprocs + // datum: atomID = partner_tag (off-proc) + // values in 3 shake arrays + + nsend = 0; + for (i = 0; i < nlocal; i++) { + if (shake_flag[i] == 0) continue; + for (j = 0; j < npartner[i]; j++) { + if (partner_shake[i][j] == 0) continue; + m = atom->map(partner_tag[i][j]); + + if (m >= 0 && m < nlocal) { + shake_flag[m] = shake_flag[i]; + shake_atom[m][0] = shake_atom[i][0]; + shake_atom[m][1] = shake_atom[i][1]; + shake_atom[m][2] = shake_atom[i][2]; + shake_atom[m][3] = shake_atom[i][3]; + shake_type[m][0] = shake_type[i][0]; + shake_type[m][1] = shake_type[i][1]; + shake_type[m][2] = shake_type[i][2]; + + } else { + proclist[nsend] = partner_tag[i][j] % nprocs; + inbuf[nsend].atomID = partner_tag[i][j]; + inbuf[nsend].shake_flag = shake_flag[i]; + inbuf[nsend].shake_atom[0] = shake_atom[i][0]; + inbuf[nsend].shake_atom[1] = shake_atom[i][1]; + inbuf[nsend].shake_atom[2] = shake_atom[i][2]; + inbuf[nsend].shake_atom[3] = shake_atom[i][3]; + inbuf[nsend].shake_type[0] = shake_type[i][0]; + inbuf[nsend].shake_type[1] = shake_type[i][1]; + inbuf[nsend].shake_type[2] = shake_type[i][2]; + nsend++; + } + } + } + + // perform rendezvous operation + // each proc owns random subset of atoms + // receives all data needed to populate un-owned shake info + + char *buf; + int nreturn = comm->rendezvous(nsend,proclist, + (char *) inbuf,sizeof(ShakeInfo), + rendezvous_shake,buf,sizeof(ShakeInfo), + (void *) this); + ShakeInfo *outbuf = (ShakeInfo *) buf; + + memory->destroy(proclist); + memory->sfree(inbuf); + + // set shake info for un-onwed partners based on output info + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + shake_flag[i] = outbuf[m].shake_flag; + shake_atom[i][0] = outbuf[m].shake_atom[0]; + shake_atom[i][1] = outbuf[m].shake_atom[1]; + shake_atom[i][2] = outbuf[m].shake_atom[2]; + shake_atom[i][3] = outbuf[m].shake_atom[3]; + shake_type[i][0] = outbuf[m].shake_type[0]; + shake_type[i][1] = outbuf[m].shake_type[1]; + shake_type[i][2] = outbuf[m].shake_type[2]; + } + + memory->sfree(outbuf); +} + +/* ---------------------------------------------------------------------- + process data for atoms assigned to me in rendezvous decomposition + inbuf = list of N IDRvous datums + no outbuf +------------------------------------------------------------------------- */ + +int FixShake::rendezvous_ids(int n, char *inbuf, + int &flag, int *&proclist, char *&outbuf, + void *ptr) +{ + FixShake *fsptr = (FixShake *) ptr; + Memory *memory = fsptr->memory; + + int *procowner; + tagint *atomIDs; + + memory->create(procowner,n,"special:procowner"); + memory->create(atomIDs,n,"special:atomIDs"); + + IDRvous *in = (IDRvous *) inbuf; + + for (int i = 0; i < n; i++) { + procowner[i] = in[i].me; + atomIDs[i] = in[i].atomID; + } + + // store rendezvous data in FixShake class + + fsptr->nrvous = n; + fsptr->procowner = procowner; + fsptr->atomIDs = atomIDs; + + // flag = 0: no 2nd irregular comm needed in comm->rendezvous + + flag = 0; + return 0; +} + +/* ---------------------------------------------------------------------- + process data for atoms assigned to me in rendezvous decomposition + inbuf = list of N PairRvous datums + outbuf = same list of N PairRvous datums, routed to different procs +------------------------------------------------------------------------- */ + +int FixShake::rendezvous_partners_info(int n, char *inbuf, + int &flag, int *&proclist, char *&outbuf, + void *ptr) +{ + int i,m; + + FixShake *fsptr = (FixShake *) ptr; + Atom *atom = fsptr->atom; + Memory *memory = fsptr->memory; + + // clear atom map so it can be here as a hash table + // faster than an STL map for large atom counts + + atom->map_clear(); + + // hash atom IDs stored in rendezvous decomposition + + int nrvous = fsptr->nrvous; + tagint *atomIDs = fsptr->atomIDs; + + for (i = 0; i < nrvous; i++) + atom->map_one(atomIDs[i],i); + + // proclist = owner of atomID in caller decomposition + // outbuf = info about owned atomID = 4 values + + PartnerInfo *in = (PartnerInfo *) inbuf; + int *procowner = fsptr->procowner; + memory->create(proclist,n,"shake:proclist"); + + double massone; int nmass = fsptr->nmass; - tagint *buf = (tagint *) cbuf; - int m,n; - double massone; - - for (int i = 0; i < ndatum; i += 6) { - m = atom->map(buf[i+1]); - if (m >= 0 && m < nlocal) { - buf[i+2] = mask[m]; - buf[i+3] = type[m]; - if (nmass) { - if (rmass) massone = rmass[m]; - else massone = mass[type[m]]; - buf[i+4] = fsptr->masscheck(massone); - } - if (buf[i+5] == 0) { - n = fsptr->bondtype_findset(m,buf[i],buf[i+1],0); - if (n) buf[i+5] = n; - } - } + for (i = 0; i < n; i++) { + m = atom->map(in[i].atomID); + proclist[i] = procowner[m]; } + + outbuf = inbuf; + + // re-create atom map + + atom->map_init(0); + atom->nghost = 0; + atom->map_set(); + + // flag = 1: outbuf = inbuf + + flag = 1; + return n; } /* ---------------------------------------------------------------------- - when receive buffer, scan bond partner IDs for atoms I own - if I own partner, fill in nshake value + process data for atoms assigned to me in rendezvous decomposition + inbuf = list of N NShakeInfo datums + outbuf = same list of N NShakeInfo datums, routed to different procs ------------------------------------------------------------------------- */ -void FixShake::ring_nshake(int ndatum, char *cbuf, void *ptr) +int FixShake::rendezvous_nshake(int n, char *inbuf, + int &flag, int *&proclist, char *&outbuf, + void *ptr) { - FixShake *fsptr = (FixShake *)ptr; + int i,j,m; + + FixShake *fsptr = (FixShake *) ptr; Atom *atom = fsptr->atom; - int nlocal = atom->nlocal; + Memory *memory = fsptr->memory; - int *nshake = fsptr->nshake; + // clear atom map so it can be here as a hash table + // faster than an STL map for large atom counts - tagint *buf = (tagint *) cbuf; - int m; + atom->map_clear(); - for (int i = 0; i < ndatum; i += 3) { - m = atom->map(buf[i+1]); - if (m >= 0 && m < nlocal) buf[i+2] = nshake[m]; + // hash atom IDs stored in rendezvous decomposition + + int nrvous = fsptr->nrvous; + tagint *atomIDs = fsptr->atomIDs; + + for (i = 0; i < nrvous; i++) + atom->map_one(atomIDs[i],i); + + // proclist = owner of atomID in caller decomposition + // outbuf = info about owned atomID + + NShakeInfo *in = (NShakeInfo *) inbuf; + int *procowner = fsptr->procowner; + memory->create(proclist,n,"shake:proclist"); + + for (i = 0; i < n; i++) { + m = atom->map(in[i].atomID); + proclist[i] = procowner[m]; } + + outbuf = inbuf; + + // re-create atom map + + atom->map_init(0); + atom->nghost = 0; + atom->map_set(); + + // flag = 1: outbuf = inbuf + + flag = 1; + return n; } - /* ---------------------------------------------------------------------- - when receive buffer, scan bond partner IDs for atoms I own - if I own partner, fill in nshake value + process data for atoms assigned to me in rendezvous decomposition + inbuf = list of N PairRvous datums + outbuf = same list of N PairRvous datums, routed to different procs ------------------------------------------------------------------------- */ -void FixShake::ring_shake(int ndatum, char *cbuf, void *ptr) +int FixShake::rendezvous_shake(int n, char *inbuf, + int &flag, int *&proclist, char *&outbuf, + void *ptr) { - FixShake *fsptr = (FixShake *)ptr; + int i,j,m; + + FixShake *fsptr = (FixShake *) ptr; Atom *atom = fsptr->atom; - int nlocal = atom->nlocal; + Memory *memory = fsptr->memory; - int *shake_flag = fsptr->shake_flag; - tagint **shake_atom = fsptr->shake_atom; - int **shake_type = fsptr->shake_type; + // clear atom map so it can be here as a hash table + // faster than an STL map for large atom counts - tagint *buf = (tagint *) cbuf; - int m; + atom->map_clear(); - for (int i = 0; i < ndatum; i += 9) { - m = atom->map(buf[i]); - if (m >= 0 && m < nlocal) { - shake_flag[m] = buf[i+1]; - shake_atom[m][0] = buf[i+2]; - shake_atom[m][1] = buf[i+3]; - shake_atom[m][2] = buf[i+4]; - shake_atom[m][3] = buf[i+5]; - shake_type[m][0] = buf[i+6]; - shake_type[m][1] = buf[i+7]; - shake_type[m][2] = buf[i+8]; - } + // hash atom IDs stored in rendezvous decomposition + + int nrvous = fsptr->nrvous; + tagint *atomIDs = fsptr->atomIDs; + + for (i = 0; i < nrvous; i++) + atom->map_one(atomIDs[i],i); + + // proclist = owner of atomID in caller decomposition + // outbuf = info about owned atomID + + ShakeInfo *in = (ShakeInfo *) inbuf; + int *procowner = fsptr->procowner; + memory->create(proclist,n,"shake:proclist"); + + for (i = 0; i < n; i++) { + m = atom->map(in[i].atomID); + proclist[i] = procowner[m]; } + + outbuf = inbuf; + + // re-create atom map + + atom->map_init(0); + atom->nghost = 0; + atom->map_set(); + + // flag = 1: outbuf = inbuf; + + flag = 1; + return n; } /* ---------------------------------------------------------------------- diff --git a/src/RIGID/fix_shake.h b/src/RIGID/fix_shake.h index d4e7b85ec4..2baea90a4a 100644 --- a/src/RIGID/fix_shake.h +++ b/src/RIGID/fix_shake.h @@ -120,6 +120,11 @@ class FixShake : public Fix { int nmol; void find_clusters(); + void atom_owners(); + void partner_info(int *, tagint **, int **, int **, int **, int **); + void nshake_info(int *, tagint **, int **); + void shake_info(int *, tagint **, int **); + int masscheck(double); void unconstrained_update(); void unconstrained_update_respa(int); @@ -131,12 +136,40 @@ class FixShake : public Fix { int bondtype_findset(int, tagint, tagint, int); int angletype_findset(int, tagint, tagint, int); - // static variable for ring communication callback to access class data - // callback functions for ring communication + // data used by rendezvous callback methods - static void ring_bonds(int, char *, void *); - static void ring_nshake(int, char *, void *); - static void ring_shake(int, char *, void *); + int nrvous; + tagint *atomIDs; + int *procowner; + + struct IDRvous { + int me; + tagint atomID; + }; + + struct PartnerInfo { + tagint atomID,partnerID; + int mask,type,massflag,bondtype; + }; + + struct NShakeInfo { + tagint atomID,partnerID; + int nshake; + }; + + struct ShakeInfo { + tagint atomID; + int shake_flag; + int shake_atom[4]; + int shake_type[3]; + }; + + // callback functions for rendezvous communication + + static int rendezvous_ids(int, char *, int &, int *&, char *&, void *); + static int rendezvous_partners_info(int, char *, int &, int *&, char *&, void *); + static int rendezvous_nshake(int, char *, int &, int *&, char *&, void *); + static int rendezvous_shake(int, char *, int &, int *&, char *&, void *); }; } diff --git a/src/special.cpp b/src/special.cpp index 79d2f77e46..b0d5bc7dca 100644 --- a/src/special.cpp +++ b/src/special.cpp @@ -21,7 +21,6 @@ #include "modify.h" #include "fix.h" #include "accelerator_kokkos.h" -#include "hashlittle.h" #include "atom_masks.h" #include "memory.h" #include "error.h" @@ -177,25 +176,20 @@ void Special::atom_owners() // input datums = pairs of bonded atoms // owning proc for each datum = random hash of atomID // one datum for each owned atom: datum = owning proc, atomID - // one datum for each bond partner: datum = atomID, bond partner ID - // add inverted datum when netwon_bond on for (int i = 0; i < nlocal; i++) { - //proc = hashlittle(&tag[i],sizeof(tagint),0) % nprocs; proclist[i] = tag[i] % nprocs; idbuf[i].me = me; idbuf[i].atomID = tag[i]; } // perform rendezvous operation - // each proc owns random subset of atoms - // receives all info to form and return their onetwo lists + // each proc assigned every 1/Pth atom char *buf; comm->rendezvous(nlocal,proclist, - (char *) idbuf,sizeof(PairRvous), - rendezvous_ids,buf,sizeof(PairRvous), - (void *) this); + (char *) idbuf,sizeof(IDRvous), + rendezvous_ids,buf,0,(void *) this); memory->destroy(proclist); memory->sfree(idbuf); @@ -215,49 +209,45 @@ void Special::onetwo_build_newton() int **nspecial = atom->nspecial; int nlocal = atom->nlocal; - // ncount = # of my datums to send - // include nlocal datums with owner of each atom + // nsend = # of my datums to send - int ncount = 0; + int nsend = 0; for (i = 0; i < nlocal; i++) { for (j = 0; j < num_bond[i]; j++) { m = atom->map(bond_atom[i][j]); - if (m < 0 || m >= nlocal) ncount++; + if (m < 0 || m >= nlocal) nsend++; } } int *proclist; - memory->create(proclist,ncount,"special:proclist"); + memory->create(proclist,nsend,"special:proclist"); PairRvous *inbuf = (PairRvous *) - memory->smalloc((bigint) ncount*sizeof(PairRvous),"special:inbuf"); + memory->smalloc((bigint) nsend*sizeof(PairRvous),"special:inbuf"); // setup input buf to rendezvous comm // input datums = pairs of bonded atoms - // owning proc for each datum = random hash of atomID - // one datum for each owned atom: datum = owning proc, atomID - // one datum for each bond partner: datum = atomID, bond partner ID - // add inverted datum when netwon_bond on + // owning proc for each datum = atomID % nprocs + // one datum for each bond partner: bond partner ID, atomID - ncount = 0; + nsend = 0; for (i = 0; i < nlocal; i++) { for (j = 0; j < num_bond[i]; j++) { m = atom->map(bond_atom[i][j]); if (m >= 0 && m < nlocal) continue; - proclist[ncount] = bond_atom[i][j] % nprocs; - inbuf[ncount].atomID = bond_atom[i][j]; - inbuf[ncount].partnerID = tag[i]; - ncount++; + proclist[nsend] = bond_atom[i][j] % nprocs; + inbuf[nsend].atomID = bond_atom[i][j]; + inbuf[nsend].partnerID = tag[i]; + nsend++; } } // perform rendezvous operation // each proc owns random subset of atoms - // receives all info to form and return their onetwo lists char *buf; - int nreturn = comm->rendezvous(ncount,proclist, + int nreturn = comm->rendezvous(nsend,proclist, (char *) inbuf,sizeof(PairRvous), - rendezvous_1234,buf,sizeof(PairRvous), + rendezvous_pairs,buf,sizeof(PairRvous), (void *) this); PairRvous *outbuf = (PairRvous *) buf; @@ -312,6 +302,28 @@ void Special::onetwo_build_newton() void Special::onetwo_build_newton_off() { + int i,j; + + int *num_bond = atom->num_bond; + tagint **bond_atom = atom->bond_atom; + int **nspecial = atom->nspecial; + int nlocal = atom->nlocal; + + int max = 0; + for (i = 0; i < nlocal; i++) + max = MAX(max,num_bond[i]); + + MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); + memory->create(onetwo,nlocal,maxall,"special:onetwo"); + + // nsend = # of my datums to send + // include nlocal datums with owner of each atom + + for (i = 0; i < nlocal; i++) { + nspecial[i][0] = num_bond[i]; + for (j = 0; j < num_bond[i]; j++) + onetwo[i][j] = bond_atom[i][j]; + } } /* ---------------------------------------------------------------------- @@ -327,21 +339,20 @@ void Special::onethree_build() int **nspecial = atom->nspecial; int nlocal = atom->nlocal; - // ncount = # of my datums to send - // include nlocal datums with owner of each atom + // nsend = # of my datums to send - int ncount = 0; + int nsend = 0; for (i = 0; i < nlocal; i++) { for (j = 0; j < nspecial[i][0]; j++) { m = atom->map(onetwo[i][j]); - if (m < 0 || m >= nlocal) ncount += nspecial[i][0]-1; + if (m < 0 || m >= nlocal) nsend += nspecial[i][0]-1; } } int *proclist; - memory->create(proclist,ncount,"special:proclist"); + memory->create(proclist,nsend,"special:proclist"); PairRvous *inbuf = (PairRvous *) - memory->smalloc((bigint) ncount*sizeof(PairRvous),"special:inbuf"); + memory->smalloc((bigint) nsend*sizeof(PairRvous),"special:inbuf"); // setup input buf to rendezvous comm // input datums = all pairs of onetwo atoms (they are 1-3 neighbors) @@ -349,7 +360,7 @@ void Special::onethree_build() // one datum for each owned atom: datum = owning proc, atomID // one datum for each onetwo pair: datum = atomID1, atomID2 - ncount = 0; + nsend = 0; for (i = 0; i < nlocal; i++) { for (j = 0; j < nspecial[i][0]; j++) { m = atom->map(onetwo[i][j]); @@ -357,10 +368,10 @@ void Special::onethree_build() proc = onetwo[i][j] % nprocs; for (k = 0; k < nspecial[i][0]; k++) { if (j == k) continue; - proclist[ncount] = proc; - inbuf[ncount].atomID = onetwo[i][j]; - inbuf[ncount].partnerID = onetwo[i][k]; - ncount++; + proclist[nsend] = proc; + inbuf[nsend].atomID = onetwo[i][j]; + inbuf[nsend].partnerID = onetwo[i][k]; + nsend++; } } } @@ -370,9 +381,9 @@ void Special::onethree_build() // receives all info to form and return their onethree lists char *buf; - int nreturn = comm->rendezvous(ncount,proclist, + int nreturn = comm->rendezvous(nsend,proclist, (char *) inbuf,sizeof(PairRvous), - rendezvous_1234,buf,sizeof(PairRvous), + rendezvous_pairs,buf,sizeof(PairRvous), (void *) this); PairRvous *outbuf = (PairRvous *) buf; @@ -434,21 +445,21 @@ void Special::onefour_build() int **nspecial = atom->nspecial; int nlocal = atom->nlocal; - // ncount = # of my datums to send + // nsend = # of my datums to send // include nlocal datums with owner of each atom - int ncount = 0; + int nsend = 0; for (i = 0; i < nlocal; i++) { for (j = 0; j < nspecial[i][1]; j++) { m = atom->map(onethree[i][j]); - if (m < 0 || m >= nlocal) ncount += nspecial[i][0]; + if (m < 0 || m >= nlocal) nsend += nspecial[i][0]; } } int *proclist; - memory->create(proclist,ncount,"special:proclist"); + memory->create(proclist,nsend,"special:proclist"); PairRvous *inbuf = (PairRvous *) - memory->smalloc((bigint) ncount*sizeof(PairRvous),"special:inbuf"); + memory->smalloc((bigint) nsend*sizeof(PairRvous),"special:inbuf"); // setup input buf to rendezvous comm // input datums = all pairs of onethree and onetwo atoms (they're 1-4 neighbors) @@ -456,17 +467,17 @@ void Special::onefour_build() // one datum for each owned atom: datum = owning proc, atomID // one datum for each onethree/onetwo pair: datum = atomID1, atomID2 - ncount = 0; + nsend = 0; for (i = 0; i < nlocal; i++) { for (j = 0; j < nspecial[i][1]; j++) { m = atom->map(onethree[i][j]); if (m >= 0 && m < nlocal) continue; proc = onethree[i][j] % nprocs; for (k = 0; k < nspecial[i][0]; k++) { - proclist[ncount] = proc; - inbuf[ncount].atomID = onethree[i][j]; - inbuf[ncount].partnerID = onetwo[i][k]; - ncount++; + proclist[nsend] = proc; + inbuf[nsend].atomID = onethree[i][j]; + inbuf[nsend].partnerID = onetwo[i][k]; + nsend++; } } } @@ -476,9 +487,9 @@ void Special::onefour_build() // receives all info to form and return their onefour lists char *buf; - int nreturn = comm->rendezvous(ncount,proclist, + int nreturn = comm->rendezvous(nsend,proclist, (char *) inbuf,sizeof(PairRvous), - rendezvous_1234,buf,sizeof(PairRvous), + rendezvous_pairs,buf,sizeof(PairRvous), (void *) this); PairRvous *outbuf = (PairRvous *) buf; @@ -773,7 +784,7 @@ void Special::combine() void Special::angle_trim() { - int i,j,m,n,proc,index; + int i,j,k,m;; int *num_angle = atom->num_angle; int *num_dihedral = atom->num_dihedral; @@ -804,96 +815,89 @@ void Special::angle_trim() " %g = # of 1-3 neighbors before angle trim\n",allcount); } - // if angles or dihedrals are defined, - // flag each 1-3 neigh if it appears in an angle or dihedral + // if angles or dihedrals are defined + // rendezvous angle 1-3 and dihedral 1-3,2-4 pairs if ((num_angle && atom->nangles) || (num_dihedral && atom->ndihedrals)) { - // ncount = # of my datums to send in 3 parts for each owned atom - // proc owner, onethree list, angle end points - // angle end points are from angle list and 1-3 and 2-4 pairs in dihedrals - // latter is only for angles or dihedrlas where I own atom2 + // nsend = # of my datums to send + // latter is only for angles or dihedrlas where I own atom2 (newton bond off) - int ncount = nlocal; - for (i = 0; i < nlocal; i++) ncount += nspecial[i][1]; + int nsend = 0; for (i = 0; i < nlocal; i++) { for (j = 0; j < num_angle[i]; j++) { - index = atom->map(angle_atom2[i][j]); - if (index >= 0 && index < nlocal) ncount += 2; + if (tag[i] != angle_atom2[i][j]) continue; + m = atom->map(angle_atom1[i][j]); + if (m < 0 || m >= nlocal) nsend++; + m = atom->map(angle_atom3[i][j]); + if (m < 0 || m >= nlocal) nsend++; } for (j = 0; j < num_dihedral[i]; j++) { - index = atom->map(dihedral_atom2[i][j]); - if (index >= 0 && index < nlocal) ncount += 4; + if (tag[i] != dihedral_atom2[i][j]) continue; + m = atom->map(dihedral_atom1[i][j]); + if (m < 0 || m >= nlocal) nsend++; + m = atom->map(dihedral_atom3[i][j]); + if (m < 0 || m >= nlocal) nsend++; + m = atom->map(dihedral_atom4[i][j]); + if (m < 0 || m >= nlocal) nsend++; } } int *proclist; - memory->create(proclist,ncount,"special:proclist"); + memory->create(proclist,nsend,"special:proclist"); PairRvous *inbuf = (PairRvous *) - memory->smalloc((bigint) ncount*sizeof(PairRvous),"special:inbuf"); + memory->smalloc((bigint) nsend*sizeof(PairRvous),"special:inbuf"); // setup input buf to rendezvous comm - // one datum for each owned atom: datum = proc, atomID - // sent to owner of atomID - // one datum for each 1-4 partner: datum = atomID, ID - // sent to owner of atomID - // two datums for each dihedral 1-4 endatoms : datum = atomID, ID - // sent to owner of atomID - m = 0; + nsend = 0; for (i = 0; i < nlocal; i++) { - for (j = 0; j < nspecial[i][1]; j++) { - proclist[m] = proc; - //inbuf[m].me = -1; - inbuf[m].atomID = tag[i]; - inbuf[m].partnerID = onethree[i][j]; - m++; - } - for (j = 0; j < num_angle[i]; j++) { - index = atom->map(angle_atom2[i][j]); - if (index < 0 || index >= nlocal) continue; + if (tag[i] != angle_atom2[i][j]) continue; - proclist[m] = hashlittle(&angle_atom1[i][j],sizeof(tagint),0) % nprocs; - //inbuf[m].me = -2; - inbuf[m].atomID = angle_atom1[i][j]; - inbuf[m].partnerID = angle_atom3[i][j]; - m++; + m = atom->map(angle_atom1[i][j]); + if (m < 0 || m >= nlocal) { + proclist[nsend] = angle_atom1[i][j] % nprocs; + inbuf[nsend].atomID = angle_atom1[i][j]; + inbuf[nsend].partnerID = angle_atom3[i][j]; + nsend++; + } - proclist[m] = hashlittle(&angle_atom3[i][j],sizeof(tagint),0) % nprocs; - //inbuf[m].me = -2; - inbuf[m].atomID = angle_atom3[i][j]; - inbuf[m].partnerID = angle_atom1[i][j]; - m++; + m = atom->map(angle_atom3[i][j]); + if (m < 0 || m >= nlocal) { + proclist[nsend] = angle_atom3[i][j] % nprocs; + inbuf[nsend].atomID = angle_atom3[i][j]; + inbuf[nsend].partnerID = angle_atom1[i][j]; + nsend++; + } } for (j = 0; j < num_dihedral[i]; j++) { - index = atom->map(dihedral_atom2[i][j]); - if (index < 0 || index >= nlocal) continue; + if (tag[i] != dihedral_atom2[i][j]) continue; - proclist[m] = hashlittle(&dihedral_atom1[i][j],sizeof(tagint),0) % nprocs; - //inbuf[m].me = -2; - inbuf[m].atomID = dihedral_atom1[i][j]; - inbuf[m].partnerID = dihedral_atom3[i][j]; - m++; + m = atom->map(dihedral_atom1[i][j]); + if (m < 0 || m >= nlocal) { + proclist[nsend] = dihedral_atom1[i][j] % nprocs; + inbuf[nsend].atomID = dihedral_atom1[i][j]; + inbuf[nsend].partnerID = dihedral_atom3[i][j]; + nsend++; + } - proclist[m] = hashlittle(&dihedral_atom2[i][j],sizeof(tagint),0) % nprocs; - //inbuf[m].me = -2; - inbuf[m].atomID = dihedral_atom2[i][j]; - inbuf[m].partnerID = dihedral_atom4[i][j]; - m++; + m = atom->map(dihedral_atom3[i][j]); + if (m < 0 || m >= nlocal) { + proclist[nsend] = dihedral_atom3[i][j] % nprocs; + inbuf[nsend].atomID = dihedral_atom3[i][j]; + inbuf[nsend].partnerID = dihedral_atom1[i][j]; + nsend++; + } - proclist[m] = hashlittle(&dihedral_atom3[i][j],sizeof(tagint),0) % nprocs; - //inbuf[m].me = -2; - inbuf[m].atomID = dihedral_atom3[i][j]; - inbuf[m].partnerID = dihedral_atom1[i][j]; - m++; - - proclist[m] = hashlittle(&dihedral_atom4[i][j],sizeof(tagint),0) % nprocs; - //inbuf[m].me = -2; - inbuf[m].atomID = dihedral_atom4[i][j]; - inbuf[m].partnerID = dihedral_atom2[i][j]; - m++; + m = atom->map(dihedral_atom4[i][j]); + if (m < 0 || m >= nlocal) { + proclist[nsend] = dihedral_atom4[i][j] % nprocs; + inbuf[nsend].atomID = dihedral_atom4[i][j]; + inbuf[nsend].partnerID = dihedral_atom2[i][j]; + nsend++; + } } } @@ -903,26 +907,112 @@ void Special::angle_trim() // when done: each atom has atom ID of owning atom of its body char *buf; - int nreturn = comm->rendezvous(ncount,proclist, + int nreturn = comm->rendezvous(nsend,proclist, (char *) inbuf,sizeof(PairRvous), - rendezvous_trim,buf,sizeof(PairRvous), + rendezvous_pairs,buf,sizeof(PairRvous), (void *) this); PairRvous *outbuf = (PairRvous *) buf; memory->destroy(proclist); memory->sfree(inbuf); - // reset nspecial[1] and onethree for all owned atoms based on output info + // flag all onethree atoms to keep - for (i = 0; i < nlocal; i++) nspecial[i][1] = 0; + int max = 0; + for (i = 0; i < nlocal; i++) + max = MAX(max,nspecial[i][1]); + MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); + + int **flag; + memory->create(flag,nlocal,maxall,"special:flag"); + + for (i = 0; i < nlocal; i++) + for (j = 0; j < nspecial[i][1]; j++) + flag[i][j] = 0; + + // reset nspecial[1] and onethree for all owned atoms based on output info + + for (i = 0; i < nlocal; i++) { + for (j = 0; j < num_angle[i]; j++) { + if (tag[i] != angle_atom2[i][j]) continue; + + m = atom->map(angle_atom1[i][j]); + if (m >= 0 && m < nlocal) { + for (k = 0; k < nspecial[m][1]; k++) + if (onethree[m][k] == angle_atom3[i][j]) { + flag[m][k] = 1; + break; + } + } + + m = atom->map(angle_atom3[i][j]); + if (m >= 0 && m < nlocal) { + for (k = 0; k < nspecial[m][1]; k++) + if (onethree[m][k] == angle_atom1[i][j]) { + flag[m][k] = 1; + break; + } + } + } + + for (j = 0; j < num_dihedral[i]; j++) { + if (tag[i] != dihedral_atom2[i][j]) continue; + + m = atom->map(dihedral_atom1[i][j]); + if (m >= 0 && m < nlocal) { + for (k = 0; k < nspecial[m][1]; k++) + if (onethree[m][k] == dihedral_atom3[i][j]) { + flag[m][k] = 1; + break; + } + } + + m = atom->map(dihedral_atom3[i][j]); + if (m >= 0 && m < nlocal) { + for (k = 0; k < nspecial[m][1]; k++) + if (onethree[m][k] == dihedral_atom1[i][j]) { + flag[m][k] = 1; + break; + } + } + + m = atom->map(dihedral_atom4[i][j]); + if (m >= 0 && m < nlocal) { + for (k = 0; k < nspecial[m][1]; k++) + if (onethree[m][k] == dihedral_atom2[i][j]) { + flag[m][k] = 1; + break; + } + } + } + } for (m = 0; m < nreturn; m++) { i = atom->map(outbuf[m].atomID); - onethree[i][nspecial[i][1]++] = outbuf[m].partnerID; + for (k = 0; k < nspecial[i][1]; k++) + if (onethree[i][k] == outbuf[m].partnerID) { + flag[i][k] = 1; + break; + } } - + memory->destroy(outbuf); + // use flag values to compress onefour list for each atom + + for (i = 0; i < nlocal; i++) { + j = 0; + while (j < nspecial[i][1]) { + if (flag[i][j] == 0) { + onethree[i][j] = onethree[i][nspecial[i][1]-1]; + flag[i][j] = flag[i][nspecial[i][1]-1]; + nspecial[i][1]--; + } else j++; + } + } + + memory->destroy(flag); + // if no angles or dihedrals are defined, delete all 1-3 neighs } else { @@ -952,7 +1042,7 @@ void Special::angle_trim() void Special::dihedral_trim() { - int i,j,m,n,proc,index; + int i,j,k,m; int *num_dihedral = atom->num_dihedral; tagint **dihedral_atom1 = atom->dihedral_atom1; @@ -978,68 +1068,51 @@ void Special::dihedral_trim() " %g = # of 1-4 neighbors before dihedral trim\n",allcount); } - // if dihedrals are defined, rendezvous onefour list with dihedral 1-4 pairs + // if dihedrals are defined, rendezvous dihedral 1-4 pairs if (num_dihedral && atom->ndihedrals) { - // ncount = # of my datums to send in 3 parts for each owned atom - // onefour list, proc owner, dihedral end points - // latter is only for dihedrals where I own atom2 + // nsend = # of my datums to send + // latter is only for dihedrals where I own atom2 (newton bond off) - int ncount = nlocal; - for (i = 0; i < nlocal; i++) ncount += nspecial[i][2]; + int nsend = 0; for (i = 0; i < nlocal; i++) { for (j = 0; j < num_dihedral[i]; j++) { - index = atom->map(dihedral_atom2[i][j]); - if (index >= 0 && index < nlocal) ncount += 2; + if (tag[i] != dihedral_atom2[i][j]) continue; + m = atom->map(dihedral_atom1[i][j]); + if (m < 0 || m >= nlocal) nsend++; + m = atom->map(dihedral_atom4[i][j]); + if (m < 0 || m >= nlocal) nsend++; } } int *proclist; - memory->create(proclist,ncount,"special:proclist"); + memory->create(proclist,nsend,"special:proclist"); PairRvous *inbuf = (PairRvous *) - memory->smalloc((bigint) ncount*sizeof(PairRvous),"special:inbuf"); - + memory->smalloc((bigint) nsend*sizeof(PairRvous),"special:inbuf"); + // setup input buf to rendezvous comm - // one datum for each owned atom: datum = proc, atomID - // sent to owner of atomID - // one datum for each 1-4 partner: datum = atomID, ID - // sent to owner of atomID - // two datums for each dihedral 1-4 endatoms : datum = atomID, ID - // sent to owner of atomID - m = 0; + nsend = 0; for (i = 0; i < nlocal; i++) { - proc = hashlittle(&tag[i],sizeof(tagint),0) % nprocs; - proclist[m] = proc; - //inbuf[m].me = me; - inbuf[m].atomID = tag[i]; - inbuf[m].partnerID = 0; - m++; - - for (j = 0; j < nspecial[i][2]; j++) { - proclist[m] = proc; - //inbuf[m].me = -1; - inbuf[m].atomID = tag[i]; - inbuf[m].partnerID = onefour[i][j]; - m++; - } - for (j = 0; j < num_dihedral[i]; j++) { - index = atom->map(dihedral_atom2[i][j]); - if (index < 0 || index >= nlocal) continue; + if (tag[i] != dihedral_atom2[i][j]) continue; - proclist[m] = hashlittle(&dihedral_atom1[i][j],sizeof(tagint),0) % nprocs; - //inbuf[m].me = -2; - inbuf[m].atomID = dihedral_atom1[i][j]; - inbuf[m].partnerID = dihedral_atom4[i][j]; - m++; + m = atom->map(dihedral_atom1[i][j]); + if (m < 0 || m >= nlocal) { + proclist[nsend] = dihedral_atom1[i][j] % nprocs; + inbuf[nsend].atomID = dihedral_atom1[i][j]; + inbuf[nsend].partnerID = dihedral_atom4[i][j]; + nsend++; + } - proclist[m] = hashlittle(&dihedral_atom4[i][j],sizeof(tagint),0) % nprocs; - //inbuf[m].me = -2; - inbuf[m].atomID = dihedral_atom4[i][j]; - inbuf[m].partnerID = dihedral_atom1[i][j]; - m++; + m = atom->map(dihedral_atom4[i][j]); + if (m < 0 || m >= nlocal) { + proclist[nsend] = dihedral_atom4[i][j] % nprocs; + inbuf[nsend].atomID = dihedral_atom4[i][j]; + inbuf[nsend].partnerID = dihedral_atom1[i][j]; + nsend++; + } } } @@ -1049,26 +1122,81 @@ void Special::dihedral_trim() // when done: each atom has atom ID of owning atom of its body char *buf; - int nreturn = comm->rendezvous(ncount,proclist, + int nreturn = comm->rendezvous(nsend,proclist, (char *) inbuf,sizeof(PairRvous), - rendezvous_trim,buf,sizeof(PairRvous), + rendezvous_pairs,buf,sizeof(PairRvous), (void *) this); PairRvous *outbuf = (PairRvous *) buf; memory->destroy(proclist); memory->sfree(inbuf); + // flag all onefour atoms to keep + + int max = 0; + for (i = 0; i < nlocal; i++) + max = MAX(max,nspecial[i][2]); + MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); + + int **flag; + memory->create(flag,nlocal,maxall,"special:flag"); + + for (i = 0; i < nlocal; i++) + for (j = 0; j < nspecial[i][2]; j++) + flag[i][j] = 0; + // reset nspecial[2] and onefour for all owned atoms based on output info - for (i = 0; i < nlocal; i++) nspecial[i][2] = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < num_dihedral[i]; j++) { + if (tag[i] != dihedral_atom2[i][j]) continue; + + m = atom->map(dihedral_atom1[i][j]); + if (m >= 0 && m < nlocal) { + for (k = 0; k < nspecial[m][2]; k++) + if (onefour[m][k] == dihedral_atom4[i][j]) { + flag[m][k] = 1; + break; + } + } + + m = atom->map(dihedral_atom4[i][j]); + if (m >= 0 && m < nlocal) { + for (k = 0; k < nspecial[m][2]; k++) + if (onefour[m][k] == dihedral_atom1[i][j]) { + flag[m][k] = 1; + break; + } + } + } + } for (m = 0; m < nreturn; m++) { i = atom->map(outbuf[m].atomID); - onefour[i][nspecial[i][2]++] = outbuf[m].partnerID; + for (k = 0; k < nspecial[i][2]; k++) + if (onefour[i][k] == outbuf[m].partnerID) { + flag[i][k] = 1; + break; + } } memory->destroy(outbuf); + // use flag values to compress onefour list for each atom + + for (i = 0; i < nlocal; i++) { + j = 0; + while (j < nspecial[i][2]) { + if (flag[i][j] == 0) { + onefour[i][j] = onefour[i][nspecial[i][2]-1]; + flag[i][j] = flag[i][nspecial[i][2]-1]; + nspecial[i][2]--; + } else j++; + } + } + + memory->destroy(flag); + // if no dihedrals are defined, delete all 1-4 neighs } else { @@ -1093,8 +1221,8 @@ void Special::dihedral_trim() /* ---------------------------------------------------------------------- process data for atoms assigned to me in rendezvous decomposition - inbuf = list of N PairRvous datums - outbuf = empty + inbuf = list of N IDRvous datums + no outbuf ------------------------------------------------------------------------- */ int Special::rendezvous_ids(int n, char *inbuf, @@ -1109,7 +1237,6 @@ int Special::rendezvous_ids(int n, char *inbuf, memory->create(procowner,n,"special:procowner"); memory->create(atomIDs,n,"special:atomIDs"); - // NOTE: when to free these vectors IDRvous *in = (IDRvous *) inbuf; @@ -1120,13 +1247,12 @@ int Special::rendezvous_ids(int n, char *inbuf, // store rendezvous data in Special class - sptr->ncount = n; + sptr->nrvous = n; sptr->procowner = procowner; sptr->atomIDs = atomIDs; - proclist = NULL; - outbuf = NULL; - + // flag = 0: no 2nd irregular comm needed in comm->rendezvous + flag = 0; return 0; } @@ -1138,7 +1264,7 @@ int Special::rendezvous_ids(int n, char *inbuf, outbuf = same list of N PairRvous datums, routed to different procs ------------------------------------------------------------------------- */ -int Special::rendezvous_1234(int n, char *inbuf, +int Special::rendezvous_pairs(int n, char *inbuf, int &flag, int *&proclist, char *&outbuf, void *ptr) { @@ -1153,10 +1279,10 @@ int Special::rendezvous_1234(int n, char *inbuf, // hash atom IDs stored in rendezvous decomposition - int ncount = sptr->ncount; + int nrvous = sptr->nrvous; tagint *atomIDs = sptr->atomIDs; - for (int i = 0; i < ncount; i++) + for (int i = 0; i < nrvous; i++) atom->map_one(atomIDs[i],i); // proclist = owner of atomID in caller decomposition @@ -1172,7 +1298,6 @@ int Special::rendezvous_1234(int n, char *inbuf, } outbuf = inbuf; - // NOTE: set out = in flag // re-create atom map @@ -1180,148 +1305,12 @@ int Special::rendezvous_1234(int n, char *inbuf, atom->nghost = 0; atom->map_set(); + // flag = 1: outbuf = inbuf + flag = 1; return n; } -/* ---------------------------------------------------------------------- - process data for atoms assigned to me in rendezvous decomposition - inbuf = list of N PairRvous datums - create outbuf = list of Nout PairRvous datums -------------------------------------------------------------------------- */ - -int Special::rendezvous_trim(int n, char *inbuf, - int &flag, int *&proclist, char *&outbuf, - void *ptr) -{ - int i,j,m; - - /* - Special *sptr = (Special *) ptr; - Atom *atom = sptr->atom; - Memory *memory = sptr->memory; - - // clear atom map so it can be here as a hash table - // faster than an STL map for large atom counts - - atom->map_clear(); - - // initialize hash - // ncount = number of atoms assigned to me - // key = atom ID - // value = index into Ncount-length data structure - - PairRvous *in = (PairRvous *) inbuf; - //std::map hash; - tagint id; - - int ncount = 0; - for (i = 0; i < n; i++) - if (in[i].me >= 0) - //hash[in[i].atomID] = ncount++; - atom->map_one(in[i].atomID,ncount++); - - // procowner = caller proc that owns each atom - // atomID = ID of each rendezvous atom I own - // npartner = # of 1-3 partners for each atom I own - - int *procowner,*npartner; - tagint *atomID; - memory->create(procowner,ncount,"special:procowner"); - memory->create(atomID,ncount,"special:atomID"); - memory->create(npartner,ncount,"special:npartner"); - for (m = 0; m < ncount; m++) npartner[m] = 0; - - for (i = 0; i < n; i++) { - //m = hash.find(in[i].atomID)->second; - m = atom->map(in[i].atomID); - if (in[i].me >= 0) { - procowner[m] = in[i].me; - atomID[m] = in[i].atomID; - } else if (in[i].me == -1) npartner[m]++; - } - - int max = 0; - for (m = 0; m < ncount; m++) max = MAX(max,npartner[m]); - - // partner = list of 1-3 or 1-4 partners for each atom I own - - int **partner; - memory->create(partner,ncount,max,"special:partner"); - for (m = 0; m < ncount; m++) npartner[m] = 0; - - for (i = 0; i < n; i++) { - if (in[i].me >= 0 || in[i].me == -2) continue; - //m = hash.find(in[i].atomID)->second; - m = atom->map(in[i].atomID); - partner[m][npartner[m]++] = in[i].partnerID; - } - - // flag = 1 if partner is in an actual angle or in a dihedral - - int **flag; - memory->create(flag,ncount,max,"special:flag"); - - for (i = 0; i < ncount; i++) - for (j = 0; j < npartner[i]; j++) - flag[i][j] = 0; - - tagint actual; - for (i = 0; i < n; i++) { - if (in[i].me != -2) continue; - actual = in[i].partnerID; - //m = hash.find(in[i].atomID)->second; - m = atom->map(in[i].atomID); - for (j = 0; j < npartner[m]; j++) - if (partner[m][j] == actual) { - flag[m][j] = 1; - break; - } - } - - // pass list of PairRvous datums back to comm->rendezvous - - int nout = 0; - for (m = 0; m < ncount; m++) nout += npartner[m]; - - memory->create(proclist,nout,"special:proclist"); - PairRvous *out = (PairRvous *) - memory->smalloc((bigint) nout*sizeof(PairRvous),"special:out"); - - nout = 0; - for (m = 0; m < ncount; m++) - for (j = 0; j < npartner[m]; j++) { - if (flag[m][j] == 0) continue; - proclist[nout] = procowner[m]; - out[nout].atomID = atomID[m]; - out[nout].partnerID = partner[m][j]; - nout++; - } - - outbuf = (char *) out; - - // clean up - // Comm::rendezvous will delete proclist and out (outbuf) - - memory->destroy(procowner); - memory->destroy(atomID); - memory->destroy(npartner); - memory->destroy(partner); - memory->destroy(flag); - - // re-create atom map - - atom->map_init(0); - atom->nghost = 0; - atom->map_set(); - - */ - - //return nout; - flag = 2; - return 0; -} - /* ---------------------------------------------------------------------- allow fixes to alter special list currently, only fix drude does this diff --git a/src/special.h b/src/special.h index 772ba613ac..d02a8522f6 100644 --- a/src/special.h +++ b/src/special.h @@ -31,7 +31,7 @@ class Special : protected Pointers { // data used by rendezvous callback methods - int ncount; + int nrvous; tagint *atomIDs; int *procowner; @@ -44,6 +44,8 @@ class Special : protected Pointers { tagint atomID,partnerID; }; + // private methods + void atom_owners(); void onetwo_build_newton(); void onetwo_build_newton_off(); @@ -60,8 +62,7 @@ class Special : protected Pointers { // callback functions for rendezvous communication static int rendezvous_ids(int, char *, int &, int *&, char *&, void *); - static int rendezvous_1234(int, char *, int &, int *&, char *&, void *); - static int rendezvous_trim(int, char *, int &, int *&, char *&, void *); + static int rendezvous_pairs(int, char *, int &, int *&, char *&, void *); }; } From fd8130859b9dc840af8d4b459c7de3d055be5c60 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Wed, 19 Dec 2018 17:40:35 -0700 Subject: [PATCH 06/12] fix a small memory leak in SHAKE setup --- src/RIGID/fix_shake.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/RIGID/fix_shake.cpp b/src/RIGID/fix_shake.cpp index 66c92d42c5..51121f0853 100644 --- a/src/RIGID/fix_shake.cpp +++ b/src/RIGID/fix_shake.cpp @@ -955,7 +955,7 @@ void FixShake::find_clusters() // ----------------------------------------------------- // set shake_flag,shake_atom,shake_type for non-central atoms - // requires communication for off-proc atoms + // requires rendezvous communication for off-proc atoms // ----------------------------------------------------- shake_info(npartner,partner_tag,partner_shake); @@ -964,6 +964,9 @@ void FixShake::find_clusters() // free local memory // ----------------------------------------------------- + memory->destroy(atomIDs); + memory->destroy(procowner); + memory->destroy(npartner); memory->destroy(nshake); memory->destroy(partner_tag); @@ -1390,24 +1393,24 @@ int FixShake::rendezvous_ids(int n, char *inbuf, FixShake *fsptr = (FixShake *) ptr; Memory *memory = fsptr->memory; - int *procowner; tagint *atomIDs; + int *procowner; - memory->create(procowner,n,"special:procowner"); memory->create(atomIDs,n,"special:atomIDs"); + memory->create(procowner,n,"special:procowner"); IDRvous *in = (IDRvous *) inbuf; for (int i = 0; i < n; i++) { - procowner[i] = in[i].me; atomIDs[i] = in[i].atomID; + procowner[i] = in[i].me; } // store rendezvous data in FixShake class fsptr->nrvous = n; - fsptr->procowner = procowner; fsptr->atomIDs = atomIDs; + fsptr->procowner = procowner; // flag = 0: no 2nd irregular comm needed in comm->rendezvous From 9de0262155461bcaee5132df2c0719486812d5d3 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Wed, 23 Jan 2019 14:49:52 -0700 Subject: [PATCH 07/12] added rendezvous via all2all --- src/RIGID/fix_rigid_small.cpp | 5 +- src/RIGID/fix_shake.cpp | 30 ++-- src/comm.cpp | 318 +++++++++++++++++++++++++++++++--- src/comm.h | 12 +- src/irregular.cpp | 185 +++++++++++++++++++- src/irregular.h | 2 + src/special.cpp | 119 +++++++------ 7 files changed, 566 insertions(+), 105 deletions(-) diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp index e20c64487b..957438fc55 100644 --- a/src/RIGID/fix_rigid_small.cpp +++ b/src/RIGID/fix_rigid_small.cpp @@ -1576,8 +1576,9 @@ void FixRigidSmall::create_bodies(tagint *bodyID) // func = compute bbox of each body, find atom closest to geometric center char *buf; - int nreturn = comm->rendezvous(ncount,proclist,(char *) inbuf,sizeof(InRvous), - rendezvous_body,buf,sizeof(OutRvous), + int nreturn = comm->rendezvous(1,ncount,(char *) inbuf,sizeof(InRvous), + 0,proclist, + rendezvous_body,0,buf,sizeof(OutRvous), (void *) this); OutRvous *outbuf = (OutRvous *) buf; diff --git a/src/RIGID/fix_shake.cpp b/src/RIGID/fix_shake.cpp index 51121f0853..35153de839 100644 --- a/src/RIGID/fix_shake.cpp +++ b/src/RIGID/fix_shake.cpp @@ -1068,9 +1068,9 @@ void FixShake::atom_owners() // each proc assigned every 1/Pth atom char *buf; - comm->rendezvous(nlocal,proclist, - (char *) idbuf,sizeof(IDRvous), - rendezvous_ids,buf,0,(void *) this); + comm->rendezvous(1,nlocal,(char *) idbuf,sizeof(IDRvous), + 0,proclist, + rendezvous_ids,0,buf,0,(void *) this); memory->destroy(proclist); memory->sfree(idbuf); @@ -1174,9 +1174,10 @@ void FixShake::partner_info(int *npartner, tagint **partner_tag, // receives all data needed to populate un-owned partner 4 values char *buf; - int nreturn = comm->rendezvous(nsend,proclist, - (char *) inbuf,sizeof(PartnerInfo), - rendezvous_partners_info,buf,sizeof(PartnerInfo), + int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(PartnerInfo), + 0,proclist, + rendezvous_partners_info, + 0,buf,sizeof(PartnerInfo), (void *) this); PartnerInfo *outbuf = (PartnerInfo *) buf; @@ -1194,7 +1195,8 @@ void FixShake::partner_info(int *npartner, tagint **partner_tag, partner_type[i][j] = outbuf[m].type; partner_massflag[i][j] = outbuf[m].massflag; - // only set partner_bondtype if my atom did not set it when setting up rendezvous + // only set partner_bondtype if my atom did not set it + // when setting up rendezvous // if this proc set it, then sender of this datum set outbuf.bondtype = 0 if (partner_bondtype[i][j] == 0) @@ -1261,9 +1263,9 @@ void FixShake::nshake_info(int *npartner, tagint **partner_tag, // receives all data needed to populate un-owned partner nshake char *buf; - int nreturn = comm->rendezvous(nsend,proclist, - (char *) inbuf,sizeof(NShakeInfo), - rendezvous_nshake,buf,sizeof(NShakeInfo), + int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(NShakeInfo), + 0,proclist, + rendezvous_nshake,0,buf,sizeof(NShakeInfo), (void *) this); NShakeInfo *outbuf = (NShakeInfo *) buf; @@ -1354,9 +1356,9 @@ void FixShake::shake_info(int *npartner, tagint **partner_tag, // receives all data needed to populate un-owned shake info char *buf; - int nreturn = comm->rendezvous(nsend,proclist, - (char *) inbuf,sizeof(ShakeInfo), - rendezvous_shake,buf,sizeof(ShakeInfo), + int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(ShakeInfo), + 0,proclist, + rendezvous_shake,0,buf,sizeof(ShakeInfo), (void *) this); ShakeInfo *outbuf = (ShakeInfo *) buf; @@ -1412,7 +1414,7 @@ int FixShake::rendezvous_ids(int n, char *inbuf, fsptr->atomIDs = atomIDs; fsptr->procowner = procowner; - // flag = 0: no 2nd irregular comm needed in comm->rendezvous + // flag = 0: no second comm needed in rendezvous flag = 0; return 0; diff --git a/src/comm.cpp b/src/comm.cpp index 9bdaf0798a..39d4311aa2 100644 --- a/src/comm.cpp +++ b/src/comm.cpp @@ -729,34 +729,78 @@ void Comm::ring(int n, int nper, void *inbuf, int messtag, /* ---------------------------------------------------------------------- rendezvous communication operation three stages: - first Irregular converts inbuf from caller decomp to rvous decomp + first comm sends inbuf from caller decomp to rvous decomp callback operates on data in rendevous decomp - last Irregular converts outbuf from rvous decomp back to caller decomp + second comm sends outbuf from rvous decomp back to caller decomp inputs: - n = # of input datums - proclist = proc that owns each input datum in rendezvous decomposition - inbuf = list of input datums - insize = size in bytes of each input datum + which = perform (0) irregular or (1) MPI_All2allv communication + n = # of datums in inbuf + inbuf = vector of input datums + insize = byte size of each input datum + inorder = 0 for inbuf in random proc order, 1 for datums ordered by proc + procs: inorder 0 = proc to send each datum to, 1 = # of datums/proc, callback = caller function to invoke in rendezvous decomposition + takes input datums, returns output datums + outorder = same as inorder, but for datums returned by callback() + ptr = pointer to caller class, passed to callback() outputs: nout = # of output datums (function return) - outbuf = list of output datums - outsize = size in bytes of each output datum + outbuf = vector of output datums + outsize = byte size of each output datum + callback inputs: + nrvous = # of rvous decomp datums in inbuf_rvous + inbuf_rvous = vector of rvous decomp input datums + ptr = pointer to caller class + callback outputs: + nrvous_out = # of rvous decomp output datums (function return) + flag = 0 for no second comm, 1 for outbuf_rvous = inbuf_rvous, + 2 for second comm with new outbuf_rvous + procs_rvous = outorder 0 = proc to send each datum to, 1 = # of datums/proc + allocated + outbuf_rvous = vector of rvous decomp output datums + NOTE: could use MPI_INT or MPI_DOUBLE insead of MPI_CHAR + to avoid checked-for overflow in MPI_Alltoallv? ------------------------------------------------------------------------- */ -int Comm::rendezvous(int n, int *proclist, char *inbuf, int insize, - int (*callback)(int, char *, int &, int *&, char *&, void *), - char *&outbuf, int outsize, void *ptr) +int Comm:: +rendezvous(int which, int n, char *inbuf, int insize, + int inorder, int *procs, + int (*callback)(int, char *, int &, int *&, char *&, void *), + int outorder, char *&outbuf, int outsize, void *ptr) { - // comm inbuf from caller decomposition to rendezvous decomposition + int nout; + if (which == 0) + nout = rendezvous_irregular(n,inbuf,insize,inorder,procs,callback, + outorder,outbuf,outsize,ptr); + else + nout = rendezvous_all2all(n,inbuf,insize,inorder,procs,callback, + outorder,outbuf,outsize,ptr); + + return nout; +} + +/* ---------------------------------------------------------------------- */ + +int Comm:: +rendezvous_irregular(int n, char *inbuf, int insize, int inorder, int *procs, + int (*callback)(int, char *, int &, int *&, char *&, void *), + int outorder, char *&outbuf, + int outsize, void *ptr) +{ + // irregular comm of inbuf from caller decomp to rendezvous decomp + Irregular *irregular = new Irregular(lmp); - int n_rvous = irregular->create_data(n,proclist); // add sort - char *inbuf_rvous = (char *) memory->smalloc((bigint) n_rvous*insize, - "rendezvous:inbuf_rvous"); + int nrvous; + if (inorder) nrvous = irregular->create_data_grouped(n,procs); + else nrvous = irregular->create_data(n,procs); + + char *inbuf_rvous = (char *) memory->smalloc((bigint) nrvous*insize, + "rendezvous:inbuf"); irregular->exchange_data(inbuf,insize,inbuf_rvous); + bigint irregular1_bytes = 0; //irregular->irregular_bytes; irregular->destroy_data(); delete irregular; @@ -764,29 +808,253 @@ int Comm::rendezvous(int n, int *proclist, char *inbuf, int insize, // callback() allocates/populates proclist_rvous and outbuf_rvous int flag; - int *proclist_rvous; + int *procs_rvous; char *outbuf_rvous; - - int nout_rvous = - callback(n_rvous,inbuf_rvous,flag,proclist_rvous,outbuf_rvous,ptr); + int nrvous_out = callback(nrvous,inbuf_rvous,flag, + procs_rvous,outbuf_rvous,ptr); if (flag != 1) memory->sfree(inbuf_rvous); // outbuf_rvous = inbuf_vous - if (flag == 0) return 0; // all nout_rvous are 0, no 2nd irregular + if (flag == 0) return 0; // all nout_rvous are 0, no 2nd comm stage - // comm outbuf from rendezvous decomposition back to caller + // irregular comm of outbuf from rendezvous decomp back to caller decomp // caller will free outbuf irregular = new Irregular(lmp); - int nout = irregular->create_data(nout_rvous,proclist_rvous); - outbuf = (char *) memory->smalloc((bigint) nout*outsize,"rendezvous:outbuf"); + int nout; + if (outorder) + nout = irregular->create_data_grouped(nrvous_out,procs_rvous); + else nout = irregular->create_data(nrvous_out,procs_rvous); + + outbuf = (char *) memory->smalloc((bigint) nout*outsize, + "rendezvous:outbuf"); irregular->exchange_data(outbuf_rvous,outsize,outbuf); - + + bigint irregular2_bytes = 0; //irregular->irregular_bytes; irregular->destroy_data(); delete irregular; - memory->destroy(proclist_rvous); + + memory->destroy(procs_rvous); memory->sfree(outbuf_rvous); + // approximate memory tally + + bigint rvous_bytes = 0; + rvous_bytes += n*insize; // inbuf + rvous_bytes += nout*outsize; // outbuf + rvous_bytes += nrvous*insize; // inbuf_rvous + rvous_bytes += nrvous_out*outsize; // outbuf_rvous + rvous_bytes += nrvous_out*sizeof(int); // procs_rvous + rvous_bytes += MAX(irregular1_bytes,irregular2_bytes); // max of 2 comms + + // return number of output datums + + return nout; +} + +/* ---------------------------------------------------------------------- */ + +int Comm:: +rendezvous_all2all(int n, char *inbuf, int insize, int inorder, int *procs, + int (*callback)(int, char *, int &, int *&, char *&, void *), + int outorder, char *&outbuf, int outsize, void *ptr) +{ + int iproc; + bigint all2all1_bytes,all2all2_bytes; + int *sendcount,*sdispls,*recvcount,*rdispls; + int *procs_a2a; + bigint *offsets; + char *inbuf_a2a,*outbuf_a2a; + + // create procs and inbuf for All2all if necesary + + if (!inorder) { + memory->create(procs_a2a,nprocs,"rendezvous:procs"); + inbuf_a2a = (char *) memory->smalloc((bigint) n*insize, + "rendezvous:inbuf"); + memory->create(offsets,nprocs,"rendezvous:offsets"); + + for (int i = 0; i < nprocs; i++) procs_a2a[i] = 0; + for (int i = 0; i < n; i++) procs_a2a[procs[i]]++; + + offsets[0] = 0; + for (int i = 1; i < nprocs; i++) + offsets[i] = offsets[i-1] + insize*procs_a2a[i-1]; + + bigint offset = 0; + for (int i = 0; i < n; i++) { + iproc = procs[i]; + memcpy(&inbuf_a2a[offsets[iproc]],&inbuf[offset],insize); + offsets[iproc] += insize; + offset += insize; + } + + all2all1_bytes = nprocs*sizeof(int) + nprocs*sizeof(bigint) + n*insize; + + } else { + procs_a2a = procs; + inbuf_a2a = inbuf; + all2all1_bytes = 0; + } + + // create args for MPI_Alltoallv() on input data + + memory->create(sendcount,nprocs,"rendezvous:sendcount"); + memcpy(sendcount,procs_a2a,nprocs*sizeof(int)); + + memory->create(recvcount,nprocs,"rendezvous:recvcount"); + MPI_Alltoall(sendcount,1,MPI_INT,recvcount,1,MPI_INT,world); + + memory->create(sdispls,nprocs,"rendezvous:sdispls"); + memory->create(rdispls,nprocs,"rendezvous:rdispls"); + sdispls[0] = rdispls[0] = 0; + for (int i = 1; i < nprocs; i++) { + sdispls[i] = sdispls[i-1] + sendcount[i-1]; + rdispls[i] = rdispls[i-1] + recvcount[i-1]; + } + int nrvous = rdispls[nprocs-1] + recvcount[nprocs-1]; + + // test for overflow of input data due to imbalance or insize + // means that individual sdispls or rdispls values overflow + + int overflow = 0; + if ((bigint) n*insize > MAXSMALLINT) overflow = 1; + if ((bigint) nrvous*insize > MAXSMALLINT) overflow = 1; + int overflowall; + MPI_Allreduce(&overflow,&overflowall,1,MPI_INT,MPI_MAX,world); + if (overflowall) error->all(FLERR,"Overflow input size in rendezvous_a2a"); + + for (int i = 0; i < nprocs; i++) { + sendcount[i] *= insize; + sdispls[i] *= insize; + recvcount[i] *= insize; + rdispls[i] *= insize; + } + + // all2all comm of inbuf from caller decomp to rendezvous decomp + + char *inbuf_rvous = (char *) memory->smalloc((bigint) nrvous*insize, + "rendezvous:inbuf"); + + MPI_Alltoallv(inbuf_a2a,sendcount,sdispls,MPI_CHAR, + inbuf_rvous,recvcount,rdispls,MPI_CHAR,world); + + if (!inorder) { + memory->destroy(procs_a2a); + memory->sfree(inbuf_a2a); + memory->destroy(offsets); + } + + // peform rendezvous computation via callback() + // callback() allocates/populates proclist_rvous and outbuf_rvous + + int flag; + int *procs_rvous; + char *outbuf_rvous; + + int nrvous_out = callback(nrvous,inbuf_rvous,flag, + procs_rvous,outbuf_rvous,ptr); + + if (flag != 1) memory->sfree(inbuf_rvous); // outbuf_rvous = inbuf_vous + if (flag == 0) return 0; // all nout_rvous are 0, no 2nd irregular + + // create procs and outbuf for All2all if necesary + + if (!outorder) { + memory->create(procs_a2a,nprocs,"rendezvous_a2a:procs"); + + outbuf_a2a = (char *) memory->smalloc((bigint) nrvous_out*outsize, + "rendezvous:outbuf"); + memory->create(offsets,nprocs,"rendezvous:offsets"); + + for (int i = 0; i < nprocs; i++) procs_a2a[i] = 0; + for (int i = 0; i < nrvous_out; i++) procs_a2a[procs_rvous[i]]++; + + offsets[0] = 0; + for (int i = 1; i < nprocs; i++) + offsets[i] = offsets[i-1] + outsize*procs_a2a[i-1]; + + bigint offset = 0; + for (int i = 0; i < nrvous_out; i++) { + iproc = procs_rvous[i]; + memcpy(&outbuf_a2a[offsets[iproc]],&outbuf_rvous[offset],outsize); + offsets[iproc] += outsize; + offset += outsize; + } + + all2all2_bytes = nprocs*sizeof(int) + nprocs*sizeof(bigint) + + nrvous_out*outsize; + + } else { + procs_a2a = procs_rvous; + outbuf_a2a = outbuf_rvous; + all2all2_bytes = 0; + } + + // comm outbuf from rendezvous decomposition back to caller + + memcpy(sendcount,procs_a2a,nprocs*sizeof(int)); + + MPI_Alltoall(sendcount,1,MPI_INT,recvcount,1,MPI_INT,world); + + sdispls[0] = rdispls[0] = 0; + for (int i = 1; i < nprocs; i++) { + sdispls[i] = sdispls[i-1] + sendcount[i-1]; + rdispls[i] = rdispls[i-1] + recvcount[i-1]; + } + int nout = rdispls[nprocs-1] + recvcount[nprocs-1]; + + // test for overflow of outbuf due to imbalance or outsize + // means that individual sdispls or rdispls values overflow + + overflow = 0; + if ((bigint) nrvous*outsize > MAXSMALLINT) overflow = 1; + if ((bigint) nout*outsize > MAXSMALLINT) overflow = 1; + MPI_Allreduce(&overflow,&overflowall,1,MPI_INT,MPI_MAX,world); + if (overflowall) error->all(FLERR,"Overflow output in rendezvous_a2a"); + + for (int i = 0; i < nprocs; i++) { + sendcount[i] *= outsize; + sdispls[i] *= outsize; + recvcount[i] *= outsize; + rdispls[i] *= outsize; + } + + // all2all comm of outbuf from rendezvous decomp back to caller decomp + // caller will free outbuf + + outbuf = (char *) memory->smalloc((bigint) nout*outsize,"rendezvous:outbuf"); + + MPI_Alltoallv(outbuf_a2a,sendcount,sdispls,MPI_CHAR, + outbuf,recvcount,rdispls,MPI_CHAR,world); + + memory->destroy(procs_rvous); + memory->sfree(outbuf_rvous); + + if (!outorder) { + memory->destroy(procs_a2a); + memory->sfree(outbuf_a2a); + memory->destroy(offsets); + } + + // clean up + + memory->destroy(sendcount); + memory->destroy(recvcount); + memory->destroy(sdispls); + memory->destroy(rdispls); + + // approximate memory tally + + bigint rvous_bytes = 0; + rvous_bytes += n*insize; // inbuf + rvous_bytes += nout*outsize; // outbuf + rvous_bytes += nrvous*insize; // inbuf_rvous + rvous_bytes += nrvous_out*outsize; // outbuf_rvous + rvous_bytes += nrvous_out*sizeof(int); // procs_rvous + rvous_bytes += 4*nprocs*sizeof(int); // all2all vectors + rvous_bytes += MAX(all2all1_bytes,all2all2_bytes); // reorder ops + // return number of datums return nout; diff --git a/src/comm.h b/src/comm.h index a1bac53ac8..807da9bf0d 100644 --- a/src/comm.h +++ b/src/comm.h @@ -109,9 +109,9 @@ class Comm : protected Pointers { void ring(int, int, void *, int, void (*)(int, char *, void *), void *, void *, int self = 1); - int rendezvous(int, int *, char *, int, + int rendezvous(int, int, char *, int, int, int *, int (*)(int, char *, int &, int *&, char *&, void *), - char *&, int, void *); + int, char *&, int, void *); int read_lines_from_file(FILE *, int, int, char *); int read_lines_from_file_universe(FILE *, int, int, char *); @@ -146,6 +146,14 @@ class Comm : protected Pointers { int ncores; // # of cores per node int coregrid[3]; // 3d grid of cores within a node int user_coregrid[3]; // user request for cores in each dim + + int rendezvous_irregular(int, char *, int, int, int *, + int (*)(int, char *, int &, int *&, char *&, void *), + int, char *&, int, void *); + int rendezvous_all2all(int, char *, int, int, int *, + int (*)(int, char *, int &, int *&, char *&, void *), + int, char *&, int, void *); + public: enum{MULTIPLE}; }; diff --git a/src/irregular.cpp b/src/irregular.cpp index 60025249cf..77278ec4c5 100644 --- a/src/irregular.cpp +++ b/src/irregular.cpp @@ -622,6 +622,7 @@ int Irregular::create_data(int n, int *proclist, int sortflag) num_send = new int[nsend_proc]; index_send = new int[n-work1[me]]; index_self = new int[work1[me]]; + maxindex = n; // proc_send = procs I send to // num_send = # of datums I send to each proc @@ -679,8 +680,182 @@ int Irregular::create_data(int n, int *proclist, int sortflag) // receive incoming messages // proc_recv = procs I recv from - // num_recv = total size of message each proc sends me - // nrecvdatum = total size of data I recv + // num_recv = # of datums each proc sends me + // nrecvdatum = total # of datums I recv + + int nrecvdatum = 0; + for (i = 0; i < nrecv_proc; i++) { + MPI_Recv(&num_recv[i],1,MPI_INT,MPI_ANY_SOURCE,0,world,status); + proc_recv[i] = status->MPI_SOURCE; + nrecvdatum += num_recv[i]; + } + nrecvdatum += num_self; + + // sort proc_recv and num_recv by proc ID if requested + // useful for debugging to insure reproducible ordering of received datums + + if (sortflag) { + int *order = new int[nrecv_proc]; + int *proc_recv_ordered = new int[nrecv_proc]; + int *num_recv_ordered = new int[nrecv_proc]; + + for (i = 0; i < nrecv_proc; i++) order[i] = i; + +#if defined(LMP_QSORT) + proc_recv_copy = proc_recv; + qsort(order,nrecv_proc,sizeof(int),compare_standalone); +#else + merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone); +#endif + + int j; + for (i = 0; i < nrecv_proc; i++) { + j = order[i]; + proc_recv_ordered[i] = proc_recv[j]; + num_recv_ordered[i] = num_recv[j]; + } + + memcpy(proc_recv,proc_recv_ordered,nrecv_proc*sizeof(int)); + memcpy(num_recv,num_recv_ordered,nrecv_proc*sizeof(int)); + delete [] order; + delete [] proc_recv_ordered; + delete [] num_recv_ordered; + } + + // barrier to insure all MPI_ANY_SOURCE messages are received + // else another proc could proceed to exchange_data() and send to me + + MPI_Barrier(world); + + // return # of datums I will receive + + return nrecvdatum; +} + +/* ---------------------------------------------------------------------- + create communication plan based on list of datums of uniform size + n = # of datums to send + procs = how many datums to send to each proc, must include self + sort = flag for sorting order of received messages by proc ID + return total # of datums I will recv, including any to self +------------------------------------------------------------------------- */ + +int Irregular::create_data_grouped(int n, int *procs, int sortflag) +{ + int i,j,k,m; + + // setup for collective comm + // work1 = # of datums I send to each proc, set self to 0 + // work2 = 1 for all procs, used for ReduceScatter + + for (i = 0; i < nprocs; i++) { + work1[i] = procs[i]; + work2[i] = 1; + } + work1[me] = 0; + + // nrecv_proc = # of procs I receive messages from, not including self + // options for performing ReduceScatter operation + // some are more efficient on some machines at big sizes + +#ifdef LAMMPS_RS_ALLREDUCE_INPLACE + MPI_Allreduce(MPI_IN_PLACE,work1,nprocs,MPI_INT,MPI_SUM,world); + nrecv_proc = work1[me]; +#else +#ifdef LAMMPS_RS_ALLREDUCE + MPI_Allreduce(work1,work2,nprocs,MPI_INT,MPI_SUM,world); + nrecv_proc = work2[me]; +#else + MPI_Reduce_scatter(work1,&nrecv_proc,work2,MPI_INT,MPI_SUM,world); +#endif +#endif + + // allocate receive arrays + + proc_recv = new int[nrecv_proc]; + num_recv = new int[nrecv_proc]; + request = new MPI_Request[nrecv_proc]; + status = new MPI_Status[nrecv_proc]; + + // work1 = # of datums I send to each proc, including self + // nsend_proc = # of procs I send messages to, not including self + + for (i = 0; i < nprocs; i++) work1[i] = procs[i]; + + nsend_proc = 0; + for (i = 0; i < nprocs; i++) + if (work1[i]) nsend_proc++; + if (work1[me]) nsend_proc--; + + // allocate send and self arrays + + proc_send = new int[nsend_proc]; + num_send = new int[nsend_proc]; + index_send = new int[n-work1[me]]; + index_self = new int[work1[me]]; + maxindex = n; + + // proc_send = procs I send to + // num_send = # of datums I send to each proc + // num_self = # of datums I copy to self + // to balance pattern of send messages: + // each proc begins with iproc > me, continues until iproc = me + // reset work1 to store which send message each proc corresponds to + + int iproc = me; + int isend = 0; + for (i = 0; i < nprocs; i++) { + iproc++; + if (iproc == nprocs) iproc = 0; + if (iproc == me) { + num_self = work1[iproc]; + work1[iproc] = 0; + } else if (work1[iproc] > 0) { + proc_send[isend] = iproc; + num_send[isend] = work1[iproc]; + work1[iproc] = isend; + isend++; + } + } + + // work2 = offsets into index_send for each proc I send to + // m = ptr into index_self + // index_send = list of which datums to send to each proc + // 1st N1 values are datum indices for 1st proc, + // next N2 values are datum indices for 2nd proc, etc + // index_self = list of which datums to copy to self + + work2[0] = 0; + for (i = 1; i < nsend_proc; i++) work2[i] = work2[i-1] + num_send[i-1]; + + m = 0; + i = 0; + for (iproc = 0; iproc < nprocs; iproc++) { + k = procs[iproc]; + for (j = 0; j < k; j++) { + if (iproc == me) index_self[m++] = i++; + else { + isend = work1[iproc]; + index_send[work2[isend]++] = i++; + } + } + } + + // tell receivers how much data I send + // sendmax_proc = largest # of datums I send in a single message + + sendmax_proc = 0; + for (i = 0; i < nsend_proc; i++) { + MPI_Request tmpReq; // Use non-blocking send to avoid possible deadlock + MPI_Isend(&num_send[i],1,MPI_INT,proc_send[i],0,world,&tmpReq); + MPI_Request_free(&tmpReq); // the MPI_Barrier below marks completion + sendmax_proc = MAX(sendmax_proc,num_send[i]); + } + + // receive incoming messages + // proc_recv = procs I recv from + // num_recv = # of datums each proc sends me + // nrecvdatum = total # of datums I recv int nrecvdatum = 0; for (i = 0; i < nrecv_proc; i++) { @@ -789,6 +964,12 @@ void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf) // wait on all incoming messages if (nrecv_proc) MPI_Waitall(nrecv_proc,request,status); + + // approximate memory tally + + bigint irregular_bytes = 2*nprocs*sizeof(int); + irregular_bytes += maxindex*sizeof(int); + irregular_bytes += maxbuf; } /* ---------------------------------------------------------------------- diff --git a/src/irregular.h b/src/irregular.h index 1f74fe801b..d56bcb253d 100644 --- a/src/irregular.h +++ b/src/irregular.h @@ -33,6 +33,7 @@ class Irregular : protected Pointers { int *procassign = NULL); int migrate_check(); int create_data(int, int *, int sortflag = 0); + int create_data_grouped(int, int *, int sortflag = 0); void exchange_data(char *, int, char *); void destroy_data(); bigint memory_usage(); @@ -48,6 +49,7 @@ class Irregular : protected Pointers { double *dbuf; // double buf for largest single atom send int maxbuf; // size of char buf in bytes char *buf; // char buf for largest single data send + int maxindex; // combined size of index_send + index_self int *mproclist,*msizes; // persistent vectors in migrate_atoms int maxlocal; // allocated size of mproclist and msizes diff --git a/src/special.cpp b/src/special.cpp index b0d5bc7dca..34685d8c65 100644 --- a/src/special.cpp +++ b/src/special.cpp @@ -172,10 +172,9 @@ void Special::atom_owners() IDRvous *idbuf = (IDRvous *) memory->smalloc((bigint) nlocal*sizeof(IDRvous),"special:idbuf"); - // setup input buf to rendezvous comm - // input datums = pairs of bonded atoms - // owning proc for each datum = random hash of atomID + // setup input buf for rendezvous comm // one datum for each owned atom: datum = owning proc, atomID + // each proc assigned every 1/Pth atom for (int i = 0; i < nlocal; i++) { proclist[i] = tag[i] % nprocs; @@ -184,19 +183,18 @@ void Special::atom_owners() } // perform rendezvous operation - // each proc assigned every 1/Pth atom char *buf; - comm->rendezvous(nlocal,proclist, - (char *) idbuf,sizeof(IDRvous), - rendezvous_ids,buf,0,(void *) this); + comm->rendezvous(1,nlocal,(char *) idbuf,sizeof(IDRvous),0,proclist, + rendezvous_ids,0,buf,0,(void *) this); memory->destroy(proclist); memory->sfree(idbuf); } /* ---------------------------------------------------------------------- - onetwo build + onetwo build when newton_bond flag on + uses rendezvous comm ------------------------------------------------------------------------- */ void Special::onetwo_build_newton() @@ -225,9 +223,8 @@ void Special::onetwo_build_newton() memory->smalloc((bigint) nsend*sizeof(PairRvous),"special:inbuf"); // setup input buf to rendezvous comm - // input datums = pairs of bonded atoms - // owning proc for each datum = atomID % nprocs - // one datum for each bond partner: bond partner ID, atomID + // one datum for each unowned bond partner: bond partner ID, atomID + // owning proc for each datum = bond partner ID % nprocs nsend = 0; for (i = 0; i < nlocal; i++) { @@ -242,19 +239,19 @@ void Special::onetwo_build_newton() } // perform rendezvous operation - // each proc owns random subset of atoms char *buf; - int nreturn = comm->rendezvous(nsend,proclist, - (char *) inbuf,sizeof(PairRvous), - rendezvous_pairs,buf,sizeof(PairRvous), + int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(PairRvous), + 0,proclist, + rendezvous_pairs,0,buf,sizeof(PairRvous), (void *) this); PairRvous *outbuf = (PairRvous *) buf; memory->destroy(proclist); memory->sfree(inbuf); - // set nspecial[0] and onetwo for all owned atoms based on output info + // set nspecial[0] and onetwo for all owned atoms + // based on owned info plus rendezvous output info // output datums = pairs of atoms that are 1-2 neighbors for (i = 0; i < nlocal; i++) { @@ -327,8 +324,8 @@ void Special::onetwo_build_newton_off() } /* ---------------------------------------------------------------------- - onetwo build with newton_bond flag off - no need for rendezvous comm + onethree build + uses rendezvous comm ------------------------------------------------------------------------- */ void Special::onethree_build() @@ -355,10 +352,10 @@ void Special::onethree_build() memory->smalloc((bigint) nsend*sizeof(PairRvous),"special:inbuf"); // setup input buf to rendezvous comm - // input datums = all pairs of onetwo atoms (they are 1-3 neighbors) - // owning proc for each datum = random hash of atomID - // one datum for each owned atom: datum = owning proc, atomID - // one datum for each onetwo pair: datum = atomID1, atomID2 + // datums = pairs of onetwo partners where either is unknown + // these pairs are onethree neighbors + // datum = onetwo ID, onetwo ID + // owning proc for each datum = onetwo ID % nprocs nsend = 0; for (i = 0; i < nlocal; i++) { @@ -377,20 +374,19 @@ void Special::onethree_build() } // perform rendezvous operation - // each proc owns random subset of atoms - // receives all info to form and return their onethree lists char *buf; - int nreturn = comm->rendezvous(nsend,proclist, - (char *) inbuf,sizeof(PairRvous), - rendezvous_pairs,buf,sizeof(PairRvous), + int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(PairRvous), + 0,proclist, + rendezvous_pairs,0,buf,sizeof(PairRvous), (void *) this); PairRvous *outbuf = (PairRvous *) buf; memory->destroy(proclist); memory->sfree(inbuf); - // set nspecial[1] and onethree for all owned atoms based on output info + // set nspecial[1] and onethree for all owned atoms + // based on owned info plus rendezvous output info // output datums = pairs of atoms that are 1-3 neighbors for (i = 0; i < nlocal; i++) { @@ -434,7 +430,8 @@ void Special::onethree_build() } /* ---------------------------------------------------------------------- - remove duplicates within each of onetwo, onethree, onefour individually + onefour build + uses rendezvous comm ------------------------------------------------------------------------- */ void Special::onefour_build() @@ -446,7 +443,6 @@ void Special::onefour_build() int nlocal = atom->nlocal; // nsend = # of my datums to send - // include nlocal datums with owner of each atom int nsend = 0; for (i = 0; i < nlocal; i++) { @@ -462,10 +458,10 @@ void Special::onefour_build() memory->smalloc((bigint) nsend*sizeof(PairRvous),"special:inbuf"); // setup input buf to rendezvous comm - // input datums = all pairs of onethree and onetwo atoms (they're 1-4 neighbors) - // owning proc for each datum = random hash of atomID - // one datum for each owned atom: datum = owning proc, atomID - // one datum for each onethree/onetwo pair: datum = atomID1, atomID2 + // datums = pairs of onethree and onetwo partners where onethree is unknown + // these pairs are onefour neighbors + // datum = onetwo ID, onetwo ID + // owning proc for each datum = onethree ID % nprocs nsend = 0; for (i = 0; i < nlocal; i++) { @@ -483,20 +479,19 @@ void Special::onefour_build() } // perform rendezvous operation - // each proc owns random subset of atoms - // receives all info to form and return their onefour lists char *buf; - int nreturn = comm->rendezvous(nsend,proclist, - (char *) inbuf,sizeof(PairRvous), - rendezvous_pairs,buf,sizeof(PairRvous), + int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(PairRvous), + 0,proclist, + rendezvous_pairs,0,buf,sizeof(PairRvous), (void *) this); PairRvous *outbuf = (PairRvous *) buf; memory->destroy(proclist); memory->sfree(inbuf); - // set nspecial[2] and onefour for all owned atoms based on output info + // set nspecial[2] and onefour for all owned atoms + // based on owned info plus rendezvous output info // output datums = pairs of atoms that are 1-4 neighbors for (i = 0; i < nlocal; i++) { @@ -780,6 +775,7 @@ void Special::combine() trim list of 1-3 neighbors by checking defined angles delete a 1-3 neigh if they are not end atoms of a defined angle and if they are not 1,3 or 2,4 atoms of a defined dihedral + uses rendezvous comm ------------------------------------------------------------------------- */ void Special::angle_trim() @@ -849,6 +845,10 @@ void Special::angle_trim() memory->smalloc((bigint) nsend*sizeof(PairRvous),"special:inbuf"); // setup input buf to rendezvous comm + // datums = pairs of onetwo partners where either is unknown + // these pairs are onethree neighbors + // datum = onetwo ID, onetwo ID + // owning proc for each datum = onetwo ID % nprocs nsend = 0; for (i = 0; i < nlocal; i++) { @@ -902,14 +902,11 @@ void Special::angle_trim() } // perform rendezvous operation - // each proc owns random subset of atoms - // func = compute bbox of each body, flag atom closest to geometric center - // when done: each atom has atom ID of owning atom of its body char *buf; - int nreturn = comm->rendezvous(nsend,proclist, - (char *) inbuf,sizeof(PairRvous), - rendezvous_pairs,buf,sizeof(PairRvous), + int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(PairRvous), + 0,proclist, + rendezvous_pairs,0,buf,sizeof(PairRvous), (void *) this); PairRvous *outbuf = (PairRvous *) buf; @@ -931,6 +928,8 @@ void Special::angle_trim() flag[i][j] = 0; // reset nspecial[1] and onethree for all owned atoms based on output info + // based on owned info plus rendezvous output info + // output datums = pairs of atoms that are 1-3 neighbors for (i = 0; i < nlocal; i++) { for (j = 0; j < num_angle[i]; j++) { @@ -1036,8 +1035,9 @@ void Special::angle_trim() } /* ---------------------------------------------------------------------- - trim list of 1-4 neighbors by checking defined dihedrals + trim list of 1-4 neighbors by checking all defined dihedrals delete a 1-4 neigh if they are not end atoms of a defined dihedral + uses rendezvous comm ------------------------------------------------------------------------- */ void Special::dihedral_trim() @@ -1068,12 +1068,11 @@ void Special::dihedral_trim() " %g = # of 1-4 neighbors before dihedral trim\n",allcount); } - // if dihedrals are defined, rendezvous dihedral 1-4 pairs + // if dihedrals are defined, rendezvous the dihedral 1-4 pairs if (num_dihedral && atom->ndihedrals) { // nsend = # of my datums to send - // latter is only for dihedrals where I own atom2 (newton bond off) int nsend = 0; for (i = 0; i < nlocal; i++) { @@ -1092,6 +1091,11 @@ void Special::dihedral_trim() memory->smalloc((bigint) nsend*sizeof(PairRvous),"special:inbuf"); // setup input buf to rendezvous comm + // datums = pairs of onefour atom IDs in a dihedral defined for my atoms + // only dihedrals where I own atom2 (in case newton_bond off) + // datum = atom1 ID and atom4 ID + // send the datum twice, to owner of atom1 ID and atom4 ID + // owning procs for each datum = atom1 or atom4 ID % nprocs nsend = 0; for (i = 0; i < nlocal; i++) { @@ -1117,21 +1121,18 @@ void Special::dihedral_trim() } // perform rendezvous operation - // each proc owns random subset of atoms - // func = compute bbox of each body, flag atom closest to geometric center - // when done: each atom has atom ID of owning atom of its body char *buf; - int nreturn = comm->rendezvous(nsend,proclist, - (char *) inbuf,sizeof(PairRvous), - rendezvous_pairs,buf,sizeof(PairRvous), + int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(PairRvous), + 0,proclist, + rendezvous_pairs,0,buf,sizeof(PairRvous), (void *) this); PairRvous *outbuf = (PairRvous *) buf; memory->destroy(proclist); memory->sfree(inbuf); - // flag all onefour atoms to keep + // flag all of my onefour IDs to keep int max = 0; for (i = 0; i < nlocal; i++) @@ -1145,8 +1146,6 @@ void Special::dihedral_trim() for (j = 0; j < nspecial[i][2]; j++) flag[i][j] = 0; - // reset nspecial[2] and onefour for all owned atoms based on output info - for (i = 0; i < nlocal; i++) { for (j = 0; j < num_dihedral[i]; j++) { if (tag[i] != dihedral_atom2[i][j]) continue; @@ -1251,7 +1250,7 @@ int Special::rendezvous_ids(int n, char *inbuf, sptr->procowner = procowner; sptr->atomIDs = atomIDs; - // flag = 0: no 2nd irregular comm needed in comm->rendezvous + // flag = 0: no second comm needed in rendezvous flag = 0; return 0; @@ -1272,7 +1271,7 @@ int Special::rendezvous_pairs(int n, char *inbuf, Atom *atom = sptr->atom; Memory *memory = sptr->memory; - // clear atom map so it can be here as a hash table + // clear atom map so it can be used here as a hash table // faster than an STL map for large atom counts atom->map_clear(); From 4ce68cf5fd776bd52a07befb93de7f197be49b69 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Wed, 23 Jan 2019 16:01:10 -0700 Subject: [PATCH 08/12] added diagnostic info for memory and balance info --- src/RIGID/fix_shake.cpp | 8 +- src/comm.cpp | 185 ++++++++++++++++++++++++++++++++++++---- src/comm.h | 6 +- src/irregular.cpp | 7 +- src/special.cpp | 12 +-- 5 files changed, 184 insertions(+), 34 deletions(-) diff --git a/src/RIGID/fix_shake.cpp b/src/RIGID/fix_shake.cpp index 35153de839..46e478064c 100644 --- a/src/RIGID/fix_shake.cpp +++ b/src/RIGID/fix_shake.cpp @@ -1070,7 +1070,7 @@ void FixShake::atom_owners() char *buf; comm->rendezvous(1,nlocal,(char *) idbuf,sizeof(IDRvous), 0,proclist, - rendezvous_ids,0,buf,0,(void *) this); + rendezvous_ids,0,buf,0,(void *) this,1); memory->destroy(proclist); memory->sfree(idbuf); @@ -1178,7 +1178,7 @@ void FixShake::partner_info(int *npartner, tagint **partner_tag, 0,proclist, rendezvous_partners_info, 0,buf,sizeof(PartnerInfo), - (void *) this); + (void *) this,1); PartnerInfo *outbuf = (PartnerInfo *) buf; memory->destroy(proclist); @@ -1266,7 +1266,7 @@ void FixShake::nshake_info(int *npartner, tagint **partner_tag, int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(NShakeInfo), 0,proclist, rendezvous_nshake,0,buf,sizeof(NShakeInfo), - (void *) this); + (void *) this,1); NShakeInfo *outbuf = (NShakeInfo *) buf; memory->destroy(proclist); @@ -1359,7 +1359,7 @@ void FixShake::shake_info(int *npartner, tagint **partner_tag, int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(ShakeInfo), 0,proclist, rendezvous_shake,0,buf,sizeof(ShakeInfo), - (void *) this); + (void *) this,1); ShakeInfo *outbuf = (ShakeInfo *) buf; memory->destroy(proclist); diff --git a/src/comm.cpp b/src/comm.cpp index 39d4311aa2..54476078be 100644 --- a/src/comm.cpp +++ b/src/comm.cpp @@ -766,18 +766,14 @@ int Comm:: rendezvous(int which, int n, char *inbuf, int insize, int inorder, int *procs, int (*callback)(int, char *, int &, int *&, char *&, void *), - int outorder, char *&outbuf, int outsize, void *ptr) + int outorder, char *&outbuf, int outsize, void *ptr, int statflag) { - int nout; - if (which == 0) - nout = rendezvous_irregular(n,inbuf,insize,inorder,procs,callback, - outorder,outbuf,outsize,ptr); + return rendezvous_irregular(n,inbuf,insize,inorder,procs,callback, + outorder,outbuf,outsize,ptr,statflag); else - nout = rendezvous_all2all(n,inbuf,insize,inorder,procs,callback, - outorder,outbuf,outsize,ptr); - - return nout; + return rendezvous_all2all(n,inbuf,insize,inorder,procs,callback, + outorder,outbuf,outsize,ptr,statflag); } /* ---------------------------------------------------------------------- */ @@ -786,7 +782,7 @@ int Comm:: rendezvous_irregular(int n, char *inbuf, int insize, int inorder, int *procs, int (*callback)(int, char *, int &, int *&, char *&, void *), int outorder, char *&outbuf, - int outsize, void *ptr) + int outsize, void *ptr, int statflag) { // irregular comm of inbuf from caller decomp to rendezvous decomp @@ -837,8 +833,82 @@ rendezvous_irregular(int n, char *inbuf, int insize, int inorder, int *procs, memory->destroy(procs_rvous); memory->sfree(outbuf_rvous); - // approximate memory tally + // return number of output datums + if (!statflag) return nout; + + // memory info for caller and rendezvous decompositions + + bigint size_in_all,size_in_max,size_in_min; + bigint size_out_all,size_out_max,size_out_min; + bigint size_inrvous_all,size_inrvous_max,size_inrvous_min; + bigint size_outrvous_all,size_outrvous_max,size_outrvous_min; + + bigint size = (bigint) n*insize; + MPI_Allreduce(&size,&size_in_all,1,MPI_LMP_BIGINT,MPI_SUM,world); + MPI_Allreduce(&size,&size_in_max,1,MPI_LMP_BIGINT,MPI_MAX,world); + MPI_Allreduce(&size,&size_in_min,1,MPI_LMP_BIGINT,MPI_MIN,world); + + size = (bigint) nout*outsize; + MPI_Allreduce(&size,&size_out_all,1,MPI_LMP_BIGINT,MPI_SUM,world); + MPI_Allreduce(&size,&size_out_max,1,MPI_LMP_BIGINT,MPI_MAX,world); + MPI_Allreduce(&size,&size_out_min,1,MPI_LMP_BIGINT,MPI_MIN,world); + + size = (bigint) nrvous*insize; + MPI_Allreduce(&size,&size_inrvous_all,1,MPI_LMP_BIGINT,MPI_SUM,world); + MPI_Allreduce(&size,&size_inrvous_max,1,MPI_LMP_BIGINT,MPI_MAX,world); + MPI_Allreduce(&size,&size_inrvous_min,1,MPI_LMP_BIGINT,MPI_MIN,world); + + size = (bigint) nrvous_out*insize; + MPI_Allreduce(&size,&size_outrvous_all,1,MPI_LMP_BIGINT,MPI_SUM,world); + MPI_Allreduce(&size,&size_outrvous_max,1,MPI_LMP_BIGINT,MPI_MAX,world); + MPI_Allreduce(&size,&size_outrvous_min,1,MPI_LMP_BIGINT,MPI_MIN,world); + + int mbytes = 1024*1024; + + if (me == 0) { + if (screen) { + fprintf(screen,"Rendezvous balance and memory info:\n"); + fprintf(screen," input datum count " + "(tot,ave,max,min): " BIGINT_FORMAT " %g " + BIGINT_FORMAT " " BIGINT_FORMAT "\n", + size_in_all/insize,1.0*size_in_all/nprocs/insize, + size_in_max/insize,size_in_min/insize); + fprintf(screen," input data (MB) " + "(tot,ave,max,min): %g %g %g %g\n", + 1.0*size_in_all/mbytes,1.0*size_in_all/nprocs/mbytes, + 1.0*size_in_max/mbytes,1.0*size_in_min/mbytes); + fprintf(screen," output datum count " + "(tot,ave,max,min): " BIGINT_FORMAT " %g " + BIGINT_FORMAT " " BIGINT_FORMAT "\n", + size_out_all/outsize,1.0*size_out_all/nprocs/outsize, + size_out_max/outsize,size_out_min/outsize); + fprintf(screen," output data (MB) " + "(tot,ave,max,min): %g %g %g %g\n", + 1.0*size_out_all/mbytes,1.0*size_out_all/nprocs/mbytes, + 1.0*size_out_max/mbytes,1.0*size_out_min/mbytes); + fprintf(screen," input rvous datum count " + "(tot,ave,max,min): " BIGINT_FORMAT " %g " + BIGINT_FORMAT " " BIGINT_FORMAT "\n", + size_inrvous_all/insize,1.0*size_inrvous_all/nprocs/insize, + size_inrvous_max/insize,size_inrvous_min/insize); + fprintf(screen," input rvous data (MB) " + "(tot,ave,max,min): %g %g %g %g\n", + 1.0*size_inrvous_all/mbytes,1.0*size_inrvous_all/nprocs/mbytes, + 1.0*size_inrvous_max/mbytes,1.0*size_inrvous_min/mbytes); + fprintf(screen," output rvous datum count " + "(tot,ave,max,min): " BIGINT_FORMAT " %g " + BIGINT_FORMAT " " BIGINT_FORMAT "\n", + size_outrvous_all/outsize,1.0*size_outrvous_all/nprocs/outsize, + size_outrvous_max/outsize,size_outrvous_min/outsize); + fprintf(screen," output rvous data (MB) " + "(tot,ave,max,min): %g %g %g %g\n", + 1.0*size_outrvous_all/mbytes,1.0*size_outrvous_all/nprocs/mbytes, + 1.0*size_outrvous_max/mbytes,1.0*size_outrvous_min/mbytes); + } + } + + /* bigint rvous_bytes = 0; rvous_bytes += n*insize; // inbuf rvous_bytes += nout*outsize; // outbuf @@ -846,8 +916,7 @@ rendezvous_irregular(int n, char *inbuf, int insize, int inorder, int *procs, rvous_bytes += nrvous_out*outsize; // outbuf_rvous rvous_bytes += nrvous_out*sizeof(int); // procs_rvous rvous_bytes += MAX(irregular1_bytes,irregular2_bytes); // max of 2 comms - - // return number of output datums + */ return nout; } @@ -857,7 +926,8 @@ rendezvous_irregular(int n, char *inbuf, int insize, int inorder, int *procs, int Comm:: rendezvous_all2all(int n, char *inbuf, int insize, int inorder, int *procs, int (*callback)(int, char *, int &, int *&, char *&, void *), - int outorder, char *&outbuf, int outsize, void *ptr) + int outorder, char *&outbuf, int outsize, void *ptr, + int statflag) { int iproc; bigint all2all1_bytes,all2all2_bytes; @@ -956,7 +1026,13 @@ rendezvous_all2all(int n, char *inbuf, int insize, int inorder, int *procs, procs_rvous,outbuf_rvous,ptr); if (flag != 1) memory->sfree(inbuf_rvous); // outbuf_rvous = inbuf_vous - if (flag == 0) return 0; // all nout_rvous are 0, no 2nd irregular + if (flag == 0) { + memory->destroy(sendcount); + memory->destroy(recvcount); + memory->destroy(sdispls); + memory->destroy(rdispls); + return 0; // all nout_rvous are 0, no 2nd irregular + } // create procs and outbuf for All2all if necesary @@ -1044,8 +1120,82 @@ rendezvous_all2all(int n, char *inbuf, int insize, int inorder, int *procs, memory->destroy(sdispls); memory->destroy(rdispls); - // approximate memory tally + // return number of output datums + if (!statflag) return nout; + + // memory info for caller and rendezvous decompositions + + bigint size_in_all,size_in_max,size_in_min; + bigint size_out_all,size_out_max,size_out_min; + bigint size_inrvous_all,size_inrvous_max,size_inrvous_min; + bigint size_outrvous_all,size_outrvous_max,size_outrvous_min; + + bigint size = (bigint) n*insize; + MPI_Allreduce(&size,&size_in_all,1,MPI_LMP_BIGINT,MPI_SUM,world); + MPI_Allreduce(&size,&size_in_max,1,MPI_LMP_BIGINT,MPI_MAX,world); + MPI_Allreduce(&size,&size_in_min,1,MPI_LMP_BIGINT,MPI_MIN,world); + + size = (bigint) nout*outsize; + MPI_Allreduce(&size,&size_out_all,1,MPI_LMP_BIGINT,MPI_SUM,world); + MPI_Allreduce(&size,&size_out_max,1,MPI_LMP_BIGINT,MPI_MAX,world); + MPI_Allreduce(&size,&size_out_min,1,MPI_LMP_BIGINT,MPI_MIN,world); + + size = (bigint) nrvous*insize; + MPI_Allreduce(&size,&size_inrvous_all,1,MPI_LMP_BIGINT,MPI_SUM,world); + MPI_Allreduce(&size,&size_inrvous_max,1,MPI_LMP_BIGINT,MPI_MAX,world); + MPI_Allreduce(&size,&size_inrvous_min,1,MPI_LMP_BIGINT,MPI_MIN,world); + + size = (bigint) nrvous_out*insize; + MPI_Allreduce(&size,&size_outrvous_all,1,MPI_LMP_BIGINT,MPI_SUM,world); + MPI_Allreduce(&size,&size_outrvous_max,1,MPI_LMP_BIGINT,MPI_MAX,world); + MPI_Allreduce(&size,&size_outrvous_min,1,MPI_LMP_BIGINT,MPI_MIN,world); + + int mbytes = 1024*1024; + + if (me == 0) { + if (screen) { + fprintf(screen,"Rendezvous balance and memory info:\n"); + fprintf(screen," input datum count " + "(tot,ave,max,min): " BIGINT_FORMAT " %g " + BIGINT_FORMAT " " BIGINT_FORMAT "\n", + size_in_all/insize,1.0*size_in_all/nprocs/insize, + size_in_max/insize,size_in_min/insize); + fprintf(screen," input data (MB) " + "(tot,ave,max,min): %g %g %g %g\n", + 1.0*size_in_all/mbytes,1.0*size_in_all/nprocs/mbytes, + 1.0*size_in_max/mbytes,1.0*size_in_min/mbytes); + fprintf(screen," output datum count " + "(tot,ave,max,min): " BIGINT_FORMAT " %g " + BIGINT_FORMAT " " BIGINT_FORMAT "\n", + size_out_all/outsize,1.0*size_out_all/nprocs/outsize, + size_out_max/outsize,size_out_min/outsize); + fprintf(screen," output data (MB) " + "(tot,ave,max,min): %g %g %g %g\n", + 1.0*size_out_all/mbytes,1.0*size_out_all/nprocs/mbytes, + 1.0*size_out_max/mbytes,1.0*size_out_min/mbytes); + fprintf(screen," input rvous datum count " + "(tot,ave,max,min): " BIGINT_FORMAT " %g " + BIGINT_FORMAT " " BIGINT_FORMAT "\n", + size_inrvous_all/insize,1.0*size_inrvous_all/nprocs/insize, + size_inrvous_max/insize,size_inrvous_min/insize); + fprintf(screen," input rvous data (MB) " + "(tot,ave,max,min): %g %g %g %g\n", + 1.0*size_inrvous_all/mbytes,1.0*size_inrvous_all/nprocs/mbytes, + 1.0*size_inrvous_max/mbytes,1.0*size_inrvous_min/mbytes); + fprintf(screen," output rvous datum count " + "(tot,ave,max,min): " BIGINT_FORMAT " %g " + BIGINT_FORMAT " " BIGINT_FORMAT "\n", + size_outrvous_all/outsize,1.0*size_outrvous_all/nprocs/outsize, + size_outrvous_max/outsize,size_outrvous_min/outsize); + fprintf(screen," output rvous data (MB) " + "(tot,ave,max,min): %g %g %g %g\n", + 1.0*size_outrvous_all/mbytes,1.0*size_outrvous_all/nprocs/mbytes, + 1.0*size_outrvous_max/mbytes,1.0*size_outrvous_min/mbytes); + } + } + + /* bigint rvous_bytes = 0; rvous_bytes += n*insize; // inbuf rvous_bytes += nout*outsize; // outbuf @@ -1054,8 +1204,7 @@ rendezvous_all2all(int n, char *inbuf, int insize, int inorder, int *procs, rvous_bytes += nrvous_out*sizeof(int); // procs_rvous rvous_bytes += 4*nprocs*sizeof(int); // all2all vectors rvous_bytes += MAX(all2all1_bytes,all2all2_bytes); // reorder ops - - // return number of datums + */ return nout; } diff --git a/src/comm.h b/src/comm.h index 807da9bf0d..89889e3ebe 100644 --- a/src/comm.h +++ b/src/comm.h @@ -111,7 +111,7 @@ class Comm : protected Pointers { void *, void *, int self = 1); int rendezvous(int, int, char *, int, int, int *, int (*)(int, char *, int &, int *&, char *&, void *), - int, char *&, int, void *); + int, char *&, int, void *, int statflag=0); int read_lines_from_file(FILE *, int, int, char *); int read_lines_from_file_universe(FILE *, int, int, char *); @@ -149,10 +149,10 @@ class Comm : protected Pointers { int rendezvous_irregular(int, char *, int, int, int *, int (*)(int, char *, int &, int *&, char *&, void *), - int, char *&, int, void *); + int, char *&, int, void *, int); int rendezvous_all2all(int, char *, int, int, int *, int (*)(int, char *, int &, int *&, char *&, void *), - int, char *&, int, void *); + int, char *&, int, void *, int); public: enum{MULTIPLE}; diff --git a/src/irregular.cpp b/src/irregular.cpp index 77278ec4c5..c27a8c8e18 100644 --- a/src/irregular.cpp +++ b/src/irregular.cpp @@ -966,10 +966,11 @@ void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf) if (nrecv_proc) MPI_Waitall(nrecv_proc,request,status); // approximate memory tally + // DEBUG lines - bigint irregular_bytes = 2*nprocs*sizeof(int); - irregular_bytes += maxindex*sizeof(int); - irregular_bytes += maxbuf; + //bigint irregular_bytes = 2*nprocs*sizeof(int); + //irregular_bytes += maxindex*sizeof(int); + //irregular_bytes += maxbuf; } /* ---------------------------------------------------------------------- diff --git a/src/special.cpp b/src/special.cpp index 34685d8c65..5e0f865488 100644 --- a/src/special.cpp +++ b/src/special.cpp @@ -186,7 +186,7 @@ void Special::atom_owners() char *buf; comm->rendezvous(1,nlocal,(char *) idbuf,sizeof(IDRvous),0,proclist, - rendezvous_ids,0,buf,0,(void *) this); + rendezvous_ids,0,buf,0,(void *) this,1); memory->destroy(proclist); memory->sfree(idbuf); @@ -244,7 +244,7 @@ void Special::onetwo_build_newton() int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(PairRvous), 0,proclist, rendezvous_pairs,0,buf,sizeof(PairRvous), - (void *) this); + (void *) this,1); PairRvous *outbuf = (PairRvous *) buf; memory->destroy(proclist); @@ -379,7 +379,7 @@ void Special::onethree_build() int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(PairRvous), 0,proclist, rendezvous_pairs,0,buf,sizeof(PairRvous), - (void *) this); + (void *) this,1); PairRvous *outbuf = (PairRvous *) buf; memory->destroy(proclist); @@ -484,7 +484,7 @@ void Special::onefour_build() int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(PairRvous), 0,proclist, rendezvous_pairs,0,buf,sizeof(PairRvous), - (void *) this); + (void *) this,1); PairRvous *outbuf = (PairRvous *) buf; memory->destroy(proclist); @@ -907,7 +907,7 @@ void Special::angle_trim() int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(PairRvous), 0,proclist, rendezvous_pairs,0,buf,sizeof(PairRvous), - (void *) this); + (void *) this,1); PairRvous *outbuf = (PairRvous *) buf; memory->destroy(proclist); @@ -1126,7 +1126,7 @@ void Special::dihedral_trim() int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(PairRvous), 0,proclist, rendezvous_pairs,0,buf,sizeof(PairRvous), - (void *) this); + (void *) this,1); PairRvous *outbuf = (PairRvous *) buf; memory->destroy(proclist); From a278df586d72850c71a318b550d8f3f9de69e5f0 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Thu, 24 Jan 2019 09:56:21 -0700 Subject: [PATCH 09/12] cleanup up rendezvous diagnostic output --- src/RIGID/fix_rigid_small.cpp | 19 +++- src/RIGID/fix_shake.cpp | 10 ++- src/comm.cpp | 163 +++++++++------------------------- src/comm.h | 1 + src/special.cpp | 14 +-- 5 files changed, 73 insertions(+), 134 deletions(-) diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp index 957438fc55..bee5570316 100644 --- a/src/RIGID/fix_rigid_small.cpp +++ b/src/RIGID/fix_rigid_small.cpp @@ -28,6 +28,7 @@ #include "modify.h" #include "group.h" #include "comm.h" +#include "neighbor.h" #include "force.h" #include "input.h" #include "output.h" @@ -44,6 +45,8 @@ using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; +#define RVOUS 1 // 0 for irregular, 1 for all2all + #define MAXLINE 1024 #define CHUNK 1024 #define ATTRIBUTE_PERBODY 20 @@ -585,12 +588,22 @@ void FixRigidSmall::init() if (rflag && (modify->fmask[i] & POST_FORCE) && !modify->fix[i]->rigid_flag) { char str[128]; - snprintf(str,128,"Fix %s alters forces after fix rigid",modify->fix[i]->id); + snprintf(str,128,"Fix %s alters forces after fix rigid", + modify->fix[i]->id); error->warning(FLERR,str); } } } + // error if maxextent > comm->cutghost + // NOTE: could just warn if an override flag set + // NOTE: this could fail for comm multi mode if user sets a wrong cutoff + // for atom types in rigid bodies - need a more careful test + + double cutghost = MAX(neighbor->cutneighmax,comm->cutghostuser); + if (maxextent > cutghost) + error->all(FLERR,"Rigid body extent > ghost cutoff - use comm_modify cutoff"); + // error if npt,nph fix comes before rigid fix for (i = 0; i < modify->nfix; i++) { @@ -1576,10 +1589,10 @@ void FixRigidSmall::create_bodies(tagint *bodyID) // func = compute bbox of each body, find atom closest to geometric center char *buf; - int nreturn = comm->rendezvous(1,ncount,(char *) inbuf,sizeof(InRvous), + int nreturn = comm->rendezvous(RVOUS,ncount,(char *) inbuf,sizeof(InRvous), 0,proclist, rendezvous_body,0,buf,sizeof(OutRvous), - (void *) this); + (void *) this,1); OutRvous *outbuf = (OutRvous *) buf; memory->destroy(proclist); diff --git a/src/RIGID/fix_shake.cpp b/src/RIGID/fix_shake.cpp index 46e478064c..2d3244885d 100644 --- a/src/RIGID/fix_shake.cpp +++ b/src/RIGID/fix_shake.cpp @@ -39,6 +39,8 @@ using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; +#define RVOUS 1 // 0 for irregular, 1 for all2all + #define BIG 1.0e20 #define MASSDELTA 0.1 @@ -1068,7 +1070,7 @@ void FixShake::atom_owners() // each proc assigned every 1/Pth atom char *buf; - comm->rendezvous(1,nlocal,(char *) idbuf,sizeof(IDRvous), + comm->rendezvous(RVOUS,nlocal,(char *) idbuf,sizeof(IDRvous), 0,proclist, rendezvous_ids,0,buf,0,(void *) this,1); @@ -1174,7 +1176,7 @@ void FixShake::partner_info(int *npartner, tagint **partner_tag, // receives all data needed to populate un-owned partner 4 values char *buf; - int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(PartnerInfo), + int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(PartnerInfo), 0,proclist, rendezvous_partners_info, 0,buf,sizeof(PartnerInfo), @@ -1263,7 +1265,7 @@ void FixShake::nshake_info(int *npartner, tagint **partner_tag, // receives all data needed to populate un-owned partner nshake char *buf; - int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(NShakeInfo), + int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(NShakeInfo), 0,proclist, rendezvous_nshake,0,buf,sizeof(NShakeInfo), (void *) this,1); @@ -1356,7 +1358,7 @@ void FixShake::shake_info(int *npartner, tagint **partner_tag, // receives all data needed to populate un-owned shake info char *buf; - int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(ShakeInfo), + int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(ShakeInfo), 0,proclist, rendezvous_shake,0,buf,sizeof(ShakeInfo), (void *) this,1); diff --git a/src/comm.cpp b/src/comm.cpp index 54476078be..e8a796036c 100644 --- a/src/comm.cpp +++ b/src/comm.cpp @@ -796,7 +796,7 @@ rendezvous_irregular(int n, char *inbuf, int insize, int inorder, int *procs, "rendezvous:inbuf"); irregular->exchange_data(inbuf,insize,inbuf_rvous); - bigint irregular1_bytes = 0; //irregular->irregular_bytes; + bigint irregular1_bytes = irregular->memory_usage(); irregular->destroy_data(); delete irregular; @@ -826,7 +826,7 @@ rendezvous_irregular(int n, char *inbuf, int insize, int inorder, int *procs, "rendezvous:outbuf"); irregular->exchange_data(outbuf_rvous,outsize,outbuf); - bigint irregular2_bytes = 0; //irregular->irregular_bytes; + bigint irregular2_bytes = irregular->memory_usage(); irregular->destroy_data(); delete irregular; @@ -834,90 +834,11 @@ rendezvous_irregular(int n, char *inbuf, int insize, int inorder, int *procs, memory->sfree(outbuf_rvous); // return number of output datums + // last arg to stats() = memory for procs_rvous + irregular comm - if (!statflag) return nout; - - // memory info for caller and rendezvous decompositions - - bigint size_in_all,size_in_max,size_in_min; - bigint size_out_all,size_out_max,size_out_min; - bigint size_inrvous_all,size_inrvous_max,size_inrvous_min; - bigint size_outrvous_all,size_outrvous_max,size_outrvous_min; - - bigint size = (bigint) n*insize; - MPI_Allreduce(&size,&size_in_all,1,MPI_LMP_BIGINT,MPI_SUM,world); - MPI_Allreduce(&size,&size_in_max,1,MPI_LMP_BIGINT,MPI_MAX,world); - MPI_Allreduce(&size,&size_in_min,1,MPI_LMP_BIGINT,MPI_MIN,world); - - size = (bigint) nout*outsize; - MPI_Allreduce(&size,&size_out_all,1,MPI_LMP_BIGINT,MPI_SUM,world); - MPI_Allreduce(&size,&size_out_max,1,MPI_LMP_BIGINT,MPI_MAX,world); - MPI_Allreduce(&size,&size_out_min,1,MPI_LMP_BIGINT,MPI_MIN,world); - - size = (bigint) nrvous*insize; - MPI_Allreduce(&size,&size_inrvous_all,1,MPI_LMP_BIGINT,MPI_SUM,world); - MPI_Allreduce(&size,&size_inrvous_max,1,MPI_LMP_BIGINT,MPI_MAX,world); - MPI_Allreduce(&size,&size_inrvous_min,1,MPI_LMP_BIGINT,MPI_MIN,world); - - size = (bigint) nrvous_out*insize; - MPI_Allreduce(&size,&size_outrvous_all,1,MPI_LMP_BIGINT,MPI_SUM,world); - MPI_Allreduce(&size,&size_outrvous_max,1,MPI_LMP_BIGINT,MPI_MAX,world); - MPI_Allreduce(&size,&size_outrvous_min,1,MPI_LMP_BIGINT,MPI_MIN,world); - - int mbytes = 1024*1024; - - if (me == 0) { - if (screen) { - fprintf(screen,"Rendezvous balance and memory info:\n"); - fprintf(screen," input datum count " - "(tot,ave,max,min): " BIGINT_FORMAT " %g " - BIGINT_FORMAT " " BIGINT_FORMAT "\n", - size_in_all/insize,1.0*size_in_all/nprocs/insize, - size_in_max/insize,size_in_min/insize); - fprintf(screen," input data (MB) " - "(tot,ave,max,min): %g %g %g %g\n", - 1.0*size_in_all/mbytes,1.0*size_in_all/nprocs/mbytes, - 1.0*size_in_max/mbytes,1.0*size_in_min/mbytes); - fprintf(screen," output datum count " - "(tot,ave,max,min): " BIGINT_FORMAT " %g " - BIGINT_FORMAT " " BIGINT_FORMAT "\n", - size_out_all/outsize,1.0*size_out_all/nprocs/outsize, - size_out_max/outsize,size_out_min/outsize); - fprintf(screen," output data (MB) " - "(tot,ave,max,min): %g %g %g %g\n", - 1.0*size_out_all/mbytes,1.0*size_out_all/nprocs/mbytes, - 1.0*size_out_max/mbytes,1.0*size_out_min/mbytes); - fprintf(screen," input rvous datum count " - "(tot,ave,max,min): " BIGINT_FORMAT " %g " - BIGINT_FORMAT " " BIGINT_FORMAT "\n", - size_inrvous_all/insize,1.0*size_inrvous_all/nprocs/insize, - size_inrvous_max/insize,size_inrvous_min/insize); - fprintf(screen," input rvous data (MB) " - "(tot,ave,max,min): %g %g %g %g\n", - 1.0*size_inrvous_all/mbytes,1.0*size_inrvous_all/nprocs/mbytes, - 1.0*size_inrvous_max/mbytes,1.0*size_inrvous_min/mbytes); - fprintf(screen," output rvous datum count " - "(tot,ave,max,min): " BIGINT_FORMAT " %g " - BIGINT_FORMAT " " BIGINT_FORMAT "\n", - size_outrvous_all/outsize,1.0*size_outrvous_all/nprocs/outsize, - size_outrvous_max/outsize,size_outrvous_min/outsize); - fprintf(screen," output rvous data (MB) " - "(tot,ave,max,min): %g %g %g %g\n", - 1.0*size_outrvous_all/mbytes,1.0*size_outrvous_all/nprocs/mbytes, - 1.0*size_outrvous_max/mbytes,1.0*size_outrvous_min/mbytes); - } - } - - /* - bigint rvous_bytes = 0; - rvous_bytes += n*insize; // inbuf - rvous_bytes += nout*outsize; // outbuf - rvous_bytes += nrvous*insize; // inbuf_rvous - rvous_bytes += nrvous_out*outsize; // outbuf_rvous - rvous_bytes += nrvous_out*sizeof(int); // procs_rvous - rvous_bytes += MAX(irregular1_bytes,irregular2_bytes); // max of 2 comms - */ - + if (statflag) rendezvous_stats(n,nout,nrvous,nrvous_out,insize,outsize, + (bigint) nrvous_out*sizeof(int) + + MAX(irregular1_bytes,irregular2_bytes)); return nout; } @@ -1121,15 +1042,28 @@ rendezvous_all2all(int n, char *inbuf, int insize, int inorder, int *procs, memory->destroy(rdispls); // return number of output datums + // last arg to stats() = mem for procs_rvous + per-proc vecs + reordering ops - if (!statflag) return nout; + if (statflag) rendezvous_stats(n,nout,nrvous,nrvous_out,insize,outsize, + (bigint) nrvous_out*sizeof(int) + + 4*nprocs*sizeof(int) + + MAX(all2all1_bytes,all2all2_bytes)); + return nout; +} - // memory info for caller and rendezvous decompositions +/* ---------------------------------------------------------------------- + print balance and memory info for rendezvous operation + useful for debugging +------------------------------------------------------------------------- */ +void Comm::rendezvous_stats(int n, int nout, int nrvous, int nrvous_out, + int insize, int outsize, bigint commsize) +{ bigint size_in_all,size_in_max,size_in_min; bigint size_out_all,size_out_max,size_out_min; bigint size_inrvous_all,size_inrvous_max,size_inrvous_min; bigint size_outrvous_all,size_outrvous_max,size_outrvous_min; + bigint size_comm_all,size_comm_max,size_comm_min; bigint size = (bigint) n*insize; MPI_Allreduce(&size,&size_in_all,1,MPI_LMP_BIGINT,MPI_SUM,world); @@ -1151,62 +1085,49 @@ rendezvous_all2all(int n, char *inbuf, int insize, int inorder, int *procs, MPI_Allreduce(&size,&size_outrvous_max,1,MPI_LMP_BIGINT,MPI_MAX,world); MPI_Allreduce(&size,&size_outrvous_min,1,MPI_LMP_BIGINT,MPI_MIN,world); + size = commsize; + MPI_Allreduce(&size,&size_comm_all,1,MPI_LMP_BIGINT,MPI_SUM,world); + MPI_Allreduce(&size,&size_comm_max,1,MPI_LMP_BIGINT,MPI_MAX,world); + MPI_Allreduce(&size,&size_comm_min,1,MPI_LMP_BIGINT,MPI_MIN,world); + int mbytes = 1024*1024; if (me == 0) { if (screen) { - fprintf(screen,"Rendezvous balance and memory info:\n"); - fprintf(screen," input datum count " - "(tot,ave,max,min): " BIGINT_FORMAT " %g " - BIGINT_FORMAT " " BIGINT_FORMAT "\n", + fprintf(screen,"Rendezvous balance and memory info: (tot,ave,max,min) \n"); + fprintf(screen," input datum count: " + BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n", size_in_all/insize,1.0*size_in_all/nprocs/insize, size_in_max/insize,size_in_min/insize); - fprintf(screen," input data (MB) " - "(tot,ave,max,min): %g %g %g %g\n", + fprintf(screen," input data (MB): %g %g %g %g\n", 1.0*size_in_all/mbytes,1.0*size_in_all/nprocs/mbytes, 1.0*size_in_max/mbytes,1.0*size_in_min/mbytes); - fprintf(screen," output datum count " - "(tot,ave,max,min): " BIGINT_FORMAT " %g " - BIGINT_FORMAT " " BIGINT_FORMAT "\n", + fprintf(screen," output datum count: " + BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n", size_out_all/outsize,1.0*size_out_all/nprocs/outsize, size_out_max/outsize,size_out_min/outsize); - fprintf(screen," output data (MB) " - "(tot,ave,max,min): %g %g %g %g\n", + fprintf(screen," output data (MB): %g %g %g %g\n", 1.0*size_out_all/mbytes,1.0*size_out_all/nprocs/mbytes, 1.0*size_out_max/mbytes,1.0*size_out_min/mbytes); - fprintf(screen," input rvous datum count " - "(tot,ave,max,min): " BIGINT_FORMAT " %g " - BIGINT_FORMAT " " BIGINT_FORMAT "\n", + fprintf(screen," input rvous datum count: " + BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n", size_inrvous_all/insize,1.0*size_inrvous_all/nprocs/insize, size_inrvous_max/insize,size_inrvous_min/insize); - fprintf(screen," input rvous data (MB) " - "(tot,ave,max,min): %g %g %g %g\n", + fprintf(screen," input rvous data (MB): %g %g %g %g\n", 1.0*size_inrvous_all/mbytes,1.0*size_inrvous_all/nprocs/mbytes, 1.0*size_inrvous_max/mbytes,1.0*size_inrvous_min/mbytes); - fprintf(screen," output rvous datum count " - "(tot,ave,max,min): " BIGINT_FORMAT " %g " - BIGINT_FORMAT " " BIGINT_FORMAT "\n", + fprintf(screen," output rvous datum count: " + BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n", size_outrvous_all/outsize,1.0*size_outrvous_all/nprocs/outsize, size_outrvous_max/outsize,size_outrvous_min/outsize); - fprintf(screen," output rvous data (MB) " - "(tot,ave,max,min): %g %g %g %g\n", + fprintf(screen," output rvous data (MB): %g %g %g %g\n", 1.0*size_outrvous_all/mbytes,1.0*size_outrvous_all/nprocs/mbytes, 1.0*size_outrvous_max/mbytes,1.0*size_outrvous_min/mbytes); + fprintf(screen," rvous comm (MB): %g %g %g %g\n", + 1.0*size_comm_all/mbytes,1.0*size_comm_all/nprocs/mbytes, + 1.0*size_comm_max/mbytes,1.0*size_comm_min/mbytes); } } - - /* - bigint rvous_bytes = 0; - rvous_bytes += n*insize; // inbuf - rvous_bytes += nout*outsize; // outbuf - rvous_bytes += nrvous*insize; // inbuf_rvous - rvous_bytes += nrvous_out*outsize; // outbuf_rvous - rvous_bytes += nrvous_out*sizeof(int); // procs_rvous - rvous_bytes += 4*nprocs*sizeof(int); // all2all vectors - rvous_bytes += MAX(all2all1_bytes,all2all2_bytes); // reorder ops - */ - - return nout; } /* ---------------------------------------------------------------------- diff --git a/src/comm.h b/src/comm.h index 89889e3ebe..9c0112b4c4 100644 --- a/src/comm.h +++ b/src/comm.h @@ -153,6 +153,7 @@ class Comm : protected Pointers { int rendezvous_all2all(int, char *, int, int, int *, int (*)(int, char *, int &, int *&, char *&, void *), int, char *&, int, void *, int); + void rendezvous_stats(int, int, int, int, int, int, bigint); public: enum{MULTIPLE}; diff --git a/src/special.cpp b/src/special.cpp index 5e0f865488..a0739d6ccc 100644 --- a/src/special.cpp +++ b/src/special.cpp @@ -27,6 +27,8 @@ using namespace LAMMPS_NS; +#define RVOUS 1 // 0 for irregular, 1 for all2all + /* ---------------------------------------------------------------------- */ Special::Special(LAMMPS *lmp) : Pointers(lmp) @@ -185,7 +187,7 @@ void Special::atom_owners() // perform rendezvous operation char *buf; - comm->rendezvous(1,nlocal,(char *) idbuf,sizeof(IDRvous),0,proclist, + comm->rendezvous(RVOUS,nlocal,(char *) idbuf,sizeof(IDRvous),0,proclist, rendezvous_ids,0,buf,0,(void *) this,1); memory->destroy(proclist); @@ -241,7 +243,7 @@ void Special::onetwo_build_newton() // perform rendezvous operation char *buf; - int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(PairRvous), + int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(PairRvous), 0,proclist, rendezvous_pairs,0,buf,sizeof(PairRvous), (void *) this,1); @@ -376,7 +378,7 @@ void Special::onethree_build() // perform rendezvous operation char *buf; - int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(PairRvous), + int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(PairRvous), 0,proclist, rendezvous_pairs,0,buf,sizeof(PairRvous), (void *) this,1); @@ -481,7 +483,7 @@ void Special::onefour_build() // perform rendezvous operation char *buf; - int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(PairRvous), + int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(PairRvous), 0,proclist, rendezvous_pairs,0,buf,sizeof(PairRvous), (void *) this,1); @@ -904,7 +906,7 @@ void Special::angle_trim() // perform rendezvous operation char *buf; - int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(PairRvous), + int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(PairRvous), 0,proclist, rendezvous_pairs,0,buf,sizeof(PairRvous), (void *) this,1); @@ -1123,7 +1125,7 @@ void Special::dihedral_trim() // perform rendezvous operation char *buf; - int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(PairRvous), + int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(PairRvous), 0,proclist, rendezvous_pairs,0,buf,sizeof(PairRvous), (void *) this,1); From e04ab5197032679d563d3f15cc7931bdf5b98960 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Mon, 25 Mar 2019 10:20:32 -0600 Subject: [PATCH 10/12] bug fix for new fix shake redezvous comm --- src/RIGID/fix_shake.h | 2 +- src/comm.cpp | 41 ++++++++++++++++++++++++++++++++--------- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/src/RIGID/fix_shake.h b/src/RIGID/fix_shake.h index 2baea90a4a..b99a35f958 100644 --- a/src/RIGID/fix_shake.h +++ b/src/RIGID/fix_shake.h @@ -159,8 +159,8 @@ class FixShake : public Fix { struct ShakeInfo { tagint atomID; + tagint shake_atom[4]; int shake_flag; - int shake_atom[4]; int shake_type[3]; }; diff --git a/src/comm.cpp b/src/comm.cpp index e8a796036c..729f96581a 100644 --- a/src/comm.cpp +++ b/src/comm.cpp @@ -810,7 +810,12 @@ rendezvous_irregular(int n, char *inbuf, int insize, int inorder, int *procs, procs_rvous,outbuf_rvous,ptr); if (flag != 1) memory->sfree(inbuf_rvous); // outbuf_rvous = inbuf_vous - if (flag == 0) return 0; // all nout_rvous are 0, no 2nd comm stage + if (flag == 0) { + if (statflag) rendezvous_stats(n,0,nrvous,nrvous_out,insize,outsize, + (bigint) nrvous_out*sizeof(int) + + irregular1_bytes); + return 0; // all nout_rvous are 0, no 2nd comm stage + } // irregular comm of outbuf from rendezvous decomp back to caller decomp // caller will free outbuf @@ -952,9 +957,17 @@ rendezvous_all2all(int n, char *inbuf, int insize, int inorder, int *procs, memory->destroy(recvcount); memory->destroy(sdispls); memory->destroy(rdispls); + if (statflag) rendezvous_stats(n,0,nrvous,nrvous_out,insize,outsize, + (bigint) nrvous_out*sizeof(int) + + 4*nprocs*sizeof(int) + all2all1_bytes); return 0; // all nout_rvous are 0, no 2nd irregular } + + + + + // create procs and outbuf for All2all if necesary if (!outorder) { @@ -1102,10 +1115,15 @@ void Comm::rendezvous_stats(int n, int nout, int nrvous, int nrvous_out, fprintf(screen," input data (MB): %g %g %g %g\n", 1.0*size_in_all/mbytes,1.0*size_in_all/nprocs/mbytes, 1.0*size_in_max/mbytes,1.0*size_in_min/mbytes); - fprintf(screen," output datum count: " - BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n", - size_out_all/outsize,1.0*size_out_all/nprocs/outsize, - size_out_max/outsize,size_out_min/outsize); + if (outsize) + fprintf(screen," output datum count: " + BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n", + size_out_all/outsize,1.0*size_out_all/nprocs/outsize, + size_out_max/outsize,size_out_min/outsize); + else + fprintf(screen," output datum count: " + BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n", + 0,0.0,0,0); fprintf(screen," output data (MB): %g %g %g %g\n", 1.0*size_out_all/mbytes,1.0*size_out_all/nprocs/mbytes, 1.0*size_out_max/mbytes,1.0*size_out_min/mbytes); @@ -1116,10 +1134,15 @@ void Comm::rendezvous_stats(int n, int nout, int nrvous, int nrvous_out, fprintf(screen," input rvous data (MB): %g %g %g %g\n", 1.0*size_inrvous_all/mbytes,1.0*size_inrvous_all/nprocs/mbytes, 1.0*size_inrvous_max/mbytes,1.0*size_inrvous_min/mbytes); - fprintf(screen," output rvous datum count: " - BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n", - size_outrvous_all/outsize,1.0*size_outrvous_all/nprocs/outsize, - size_outrvous_max/outsize,size_outrvous_min/outsize); + if (outsize) + fprintf(screen," output rvous datum count: " + BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n", + size_outrvous_all/outsize,1.0*size_outrvous_all/nprocs/outsize, + size_outrvous_max/outsize,size_outrvous_min/outsize); + else + fprintf(screen," output rvous datum count: " + BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n", + 0,0.0,0,0); fprintf(screen," output rvous data (MB): %g %g %g %g\n", 1.0*size_outrvous_all/mbytes,1.0*size_outrvous_all/nprocs/mbytes, 1.0*size_outrvous_max/mbytes,1.0*size_outrvous_min/mbytes); From ab748ffe488ddefdb34f4ebc2f988f36df3c42eb Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Mon, 25 Mar 2019 10:29:06 -0600 Subject: [PATCH 11/12] turn off diagnostic output for rendezvous op --- src/RIGID/fix_rigid_small.cpp | 2 +- src/RIGID/fix_shake.cpp | 8 ++++---- src/special.cpp | 12 ++++++------ 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp index bee5570316..9a30fdf911 100644 --- a/src/RIGID/fix_rigid_small.cpp +++ b/src/RIGID/fix_rigid_small.cpp @@ -1592,7 +1592,7 @@ void FixRigidSmall::create_bodies(tagint *bodyID) int nreturn = comm->rendezvous(RVOUS,ncount,(char *) inbuf,sizeof(InRvous), 0,proclist, rendezvous_body,0,buf,sizeof(OutRvous), - (void *) this,1); + (void *) this); OutRvous *outbuf = (OutRvous *) buf; memory->destroy(proclist); diff --git a/src/RIGID/fix_shake.cpp b/src/RIGID/fix_shake.cpp index 2d3244885d..dcdb190d45 100644 --- a/src/RIGID/fix_shake.cpp +++ b/src/RIGID/fix_shake.cpp @@ -1072,7 +1072,7 @@ void FixShake::atom_owners() char *buf; comm->rendezvous(RVOUS,nlocal,(char *) idbuf,sizeof(IDRvous), 0,proclist, - rendezvous_ids,0,buf,0,(void *) this,1); + rendezvous_ids,0,buf,0,(void *) this); memory->destroy(proclist); memory->sfree(idbuf); @@ -1180,7 +1180,7 @@ void FixShake::partner_info(int *npartner, tagint **partner_tag, 0,proclist, rendezvous_partners_info, 0,buf,sizeof(PartnerInfo), - (void *) this,1); + (void *) this); PartnerInfo *outbuf = (PartnerInfo *) buf; memory->destroy(proclist); @@ -1268,7 +1268,7 @@ void FixShake::nshake_info(int *npartner, tagint **partner_tag, int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(NShakeInfo), 0,proclist, rendezvous_nshake,0,buf,sizeof(NShakeInfo), - (void *) this,1); + (void *) this); NShakeInfo *outbuf = (NShakeInfo *) buf; memory->destroy(proclist); @@ -1361,7 +1361,7 @@ void FixShake::shake_info(int *npartner, tagint **partner_tag, int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(ShakeInfo), 0,proclist, rendezvous_shake,0,buf,sizeof(ShakeInfo), - (void *) this,1); + (void *) this); ShakeInfo *outbuf = (ShakeInfo *) buf; memory->destroy(proclist); diff --git a/src/special.cpp b/src/special.cpp index a0739d6ccc..b9287df472 100644 --- a/src/special.cpp +++ b/src/special.cpp @@ -188,7 +188,7 @@ void Special::atom_owners() char *buf; comm->rendezvous(RVOUS,nlocal,(char *) idbuf,sizeof(IDRvous),0,proclist, - rendezvous_ids,0,buf,0,(void *) this,1); + rendezvous_ids,0,buf,0,(void *) this); memory->destroy(proclist); memory->sfree(idbuf); @@ -246,7 +246,7 @@ void Special::onetwo_build_newton() int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(PairRvous), 0,proclist, rendezvous_pairs,0,buf,sizeof(PairRvous), - (void *) this,1); + (void *) this); PairRvous *outbuf = (PairRvous *) buf; memory->destroy(proclist); @@ -381,7 +381,7 @@ void Special::onethree_build() int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(PairRvous), 0,proclist, rendezvous_pairs,0,buf,sizeof(PairRvous), - (void *) this,1); + (void *) this); PairRvous *outbuf = (PairRvous *) buf; memory->destroy(proclist); @@ -486,7 +486,7 @@ void Special::onefour_build() int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(PairRvous), 0,proclist, rendezvous_pairs,0,buf,sizeof(PairRvous), - (void *) this,1); + (void *) this); PairRvous *outbuf = (PairRvous *) buf; memory->destroy(proclist); @@ -909,7 +909,7 @@ void Special::angle_trim() int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(PairRvous), 0,proclist, rendezvous_pairs,0,buf,sizeof(PairRvous), - (void *) this,1); + (void *) this); PairRvous *outbuf = (PairRvous *) buf; memory->destroy(proclist); @@ -1128,7 +1128,7 @@ void Special::dihedral_trim() int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(PairRvous), 0,proclist, rendezvous_pairs,0,buf,sizeof(PairRvous), - (void *) this,1); + (void *) this); PairRvous *outbuf = (PairRvous *) buf; memory->destroy(proclist); From 3e8c1c801bc50822dff13447dd1b902495638510 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 25 Mar 2019 14:03:29 -0400 Subject: [PATCH 12/12] add some degree of endian detection to hashlittle.cpp --- src/hashlittle.cpp | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/hashlittle.cpp b/src/hashlittle.cpp index be930217a1..f1d4e61142 100644 --- a/src/hashlittle.cpp +++ b/src/hashlittle.cpp @@ -2,10 +2,26 @@ // from lookup3.c, by Bob Jenkins, May 2006, Public Domain // bob_jenkins@burtleburtle.net -#include "stddef.h" -#include "stdint.h" +#include +#include +#include -#define HASH_LITTLE_ENDIAN 1 // Intel and AMD are little endian +// if the system defines the __BYTE_ORDER__ define, +// we use it instead of guessing the platform + +#if defined(__BYTE_ORDER__) +# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define HASH_LITTLE_ENDIAN 1 +# else +# define HASH_LITTLE_ENDIAN 0 +# endif +#else // heuristic platform guess +# if defined(__bg__) +# define HASH_LITTLE_ENDIAN 0 // IBM BlueGene is big endian +# else +# define HASH_LITTLE_ENDIAN 1 // Intel and AMD x86 are little endian +# endif +#endif #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))