diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp index fb185d7702..d74dd0fc3b 100644 --- a/src/RIGID/fix_rigid_small.cpp +++ b/src/RIGID/fix_rigid_small.cpp @@ -28,12 +28,14 @@ #include "modify.h" #include "group.h" #include "comm.h" +#include "neighbor.h" #include "force.h" #include "input.h" #include "output.h" #include "variable.h" #include "random_mars.h" #include "math_const.h" +#include "hashlittle.h" #include "memory.h" #include "error.h" @@ -43,6 +45,8 @@ using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; +#define RVOUS 1 // 0 for irregular, 1 for all2all + #define MAXLINE 1024 #define CHUNK 1024 #define ATTRIBUTE_PERBODY 20 @@ -70,8 +74,7 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : xcmimage(NULL), displace(NULL), eflags(NULL), orient(NULL), dorient(NULL), avec_ellipsoid(NULL), avec_line(NULL), avec_tri(NULL), counts(NULL), itensor(NULL), mass_body(NULL), langextra(NULL), random(NULL), - id_dilate(NULL), onemols(NULL), hash(NULL), bbox(NULL), ctr(NULL), - idclose(NULL), rsqclose(NULL) + id_dilate(NULL), onemols(NULL) { int i; @@ -107,18 +110,18 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : // parse args for rigid body specification int *mask = atom->mask; - tagint *bodyid = NULL; + tagint *bodyID = NULL; int nlocal = atom->nlocal; if (narg < 4) error->all(FLERR,"Illegal fix rigid/small command"); if (strcmp(arg[3],"molecule") == 0) { if (atom->molecule_flag == 0) error->all(FLERR,"Fix rigid/small requires atom attribute molecule"); - bodyid = atom->molecule; + bodyID = atom->molecule; } else if (strcmp(arg[3],"custom") == 0) { if (narg < 5) error->all(FLERR,"Illegal fix rigid/small command"); - bodyid = new tagint[nlocal]; + bodyID = new tagint[nlocal]; customflag = 1; // determine whether atom-style variable or atom property is used. @@ -126,9 +129,11 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : int is_double=0; int custom_index = atom->find_custom(arg[4]+2,is_double); if (custom_index == -1) - error->all(FLERR,"Fix rigid/small custom requires previously defined property/atom"); + error->all(FLERR,"Fix rigid/small custom requires " + "previously defined property/atom"); else if (is_double) - error->all(FLERR,"Fix rigid/small custom requires integer-valued property/atom"); + error->all(FLERR,"Fix rigid/small custom requires " + "integer-valued property/atom"); int minval = INT_MAX; int *value = atom->ivector[custom_index]; @@ -139,15 +144,17 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : for (i = 0; i < nlocal; i++) if (mask[i] & groupbit) - bodyid[i] = (tagint)(value[i] - minval + 1); - else bodyid[i] = 0; + bodyID[i] = (tagint)(value[i] - minval + 1); + else bodyID[i] = 0; } else if (strstr(arg[4],"v_") == arg[4]) { int ivariable = input->variable->find(arg[4]+2); if (ivariable < 0) - error->all(FLERR,"Variable name for fix rigid/small custom does not exist"); + error->all(FLERR,"Variable name for fix rigid/small custom " + "does not exist"); if (input->variable->atomstyle(ivariable) == 0) - error->all(FLERR,"Fix rigid/small custom variable is no atom-style variable"); + error->all(FLERR,"Fix rigid/small custom variable is not " + "atom-style variable"); double *value = new double[nlocal]; input->variable->compute_atom(ivariable,0,value,1,0); int minval = INT_MAX; @@ -158,8 +165,8 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : for (i = 0; i < nlocal; i++) if (mask[i] & groupbit) - bodyid[i] = (tagint)((tagint)value[i] - minval + 1); - else bodyid[0] = 0; + bodyID[i] = (tagint)((tagint)value[i] - minval + 1); + else bodyID[0] = 0; delete[] value; } else error->all(FLERR,"Unsupported fix rigid custom property"); } else error->all(FLERR,"Illegal fix rigid/small command"); @@ -167,10 +174,11 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : if (atom->map_style == 0) error->all(FLERR,"Fix rigid/small requires an atom map, see atom_modify"); - // maxmol = largest bodyid # + // maxmol = largest bodyID # + maxmol = -1; for (i = 0; i < nlocal; i++) - if (mask[i] & groupbit) maxmol = MAX(maxmol,bodyid[i]); + if (mask[i] & groupbit) maxmol = MAX(maxmol,bodyID[i]); tagint itmp; MPI_Allreduce(&maxmol,&itmp,1,MPI_LMP_TAGINT,MPI_MAX,world); @@ -400,8 +408,19 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : // sets bodytag for owned atoms // body attributes are computed later by setup_bodies() - create_bodies(bodyid); - if (customflag) delete [] bodyid; + double time1 = MPI_Wtime(); + + create_bodies(bodyID); + if (customflag) delete [] bodyID; + + double time2 = MPI_Wtime(); + + if (comm->me == 0) { + if (screen) + fprintf(screen," create bodies CPU = %g secs\n",time2-time1); + if (logfile) + fprintf(logfile," create bodies CPU = %g secs\n",time2-time1); + } // set nlocal_body and allocate bodies I own @@ -569,12 +588,22 @@ void FixRigidSmall::init() if (rflag && (modify->fmask[i] & POST_FORCE) && !modify->fix[i]->rigid_flag) { char str[128]; - snprintf(str,128,"Fix %s alters forces after fix rigid",modify->fix[i]->id); + snprintf(str,128,"Fix %s alters forces after fix rigid", + modify->fix[i]->id); error->warning(FLERR,str); } } } + // error if maxextent > comm->cutghost + // NOTE: could just warn if an override flag set + // NOTE: this could fail for comm multi mode if user sets a wrong cutoff + // for atom types in rigid bodies - need a more careful test + + double cutghost = MAX(neighbor->cutneighmax,comm->cutghostuser); + if (maxextent > cutghost) + error->all(FLERR,"Rigid body extent > ghost cutoff - use comm_modify cutoff"); + // error if npt,nph fix comes before rigid fix for (i = 0; i < modify->nfix; i++) { @@ -1514,175 +1543,72 @@ void FixRigidSmall::set_v() set bodytag for all owned atoms ------------------------------------------------------------------------- */ -void FixRigidSmall::create_bodies(tagint *bodyid) +void FixRigidSmall::create_bodies(tagint *bodyID) { - int i,m,n; - double unwrap[3]; + int i,m; - // error check on image flags of atoms in rigid bodies - - imageint *image = atom->image; + // allocate buffer for input to rendezvous comm + // ncount = # of my atoms in bodies + int *mask = atom->mask; int nlocal = atom->nlocal; - int *periodicity = domain->periodicity; - int xbox,ybox,zbox; - - int flag = 0; - for (i = 0; i < nlocal; i++) { - if (!(mask[i] & groupbit)) continue; - xbox = (image[i] & IMGMASK) - IMGMAX; - ybox = (image[i] >> IMGBITS & IMGMASK) - IMGMAX; - zbox = (image[i] >> IMG2BITS) - IMGMAX; - if ((xbox && !periodicity[0]) || (ybox && !periodicity[1]) || - (zbox && !periodicity[2])) flag = 1; - } - - int flagall; - MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); - if (flagall) error->all(FLERR,"Fix rigid/small atom has non-zero image flag " - "in a non-periodic dimension"); - - // allocate buffer for passing messages around ring of procs - // percount = max number of values to put in buffer for each of ncount - int ncount = 0; for (i = 0; i < nlocal; i++) if (mask[i] & groupbit) ncount++; - int percount = 5; - double *buf; - memory->create(buf,ncount*percount,"rigid/small:buf"); + int *proclist; + memory->create(proclist,ncount,"rigid/small:proclist"); + InRvous *inbuf = (InRvous *) + memory->smalloc(ncount*sizeof(InRvous),"rigid/small:inbuf"); - // create map hash for storing unique body IDs of my atoms - // key = body ID - // value = index into per-body data structure - // n = # of entries in hash - - hash = new std::map(); - hash->clear(); - - // setup hash - // key = body ID - // value = index into N-length data structure - // n = count of unique bodies my atoms are part of - - n = 0; - for (i = 0; i < nlocal; i++) { - if (!(mask[i] & groupbit)) continue; - if (hash->find(bodyid[i]) == hash->end()) (*hash)[bodyid[i]] = n++; - } - - // bbox = bounding box of each rigid body my atoms are part of - - memory->create(bbox,n,6,"rigid/small:bbox"); - - for (i = 0; i < n; i++) { - bbox[i][0] = bbox[i][2] = bbox[i][4] = BIG; - bbox[i][1] = bbox[i][3] = bbox[i][5] = -BIG; - } - - // pack my atoms into buffer as body ID, unwrapped coords + // setup buf to pass to rendezvous comm + // one BodyMsg datum for each constituent atom + // datum = me, local index of atom, atomID, bodyID, unwrapped coords + // owning proc for each datum = random hash of bodyID double **x = atom->x; - - m = 0; - for (i = 0; i < nlocal; i++) { - if (!(mask[i] & groupbit)) continue; - domain->unmap(x[i],image[i],unwrap); - buf[m++] = bodyid[i]; - buf[m++] = unwrap[0]; - buf[m++] = unwrap[1]; - buf[m++] = unwrap[2]; - } - - // pass buffer around ring of procs - // func = update bbox with atom coords from every proc - // when done, have full bbox for every rigid body my atoms are part of - - comm->ring(m,sizeof(double),buf,1,ring_bbox,NULL,(void *)this); - - // check if any bbox is size 0.0, meaning rigid body is a single particle - - flag = 0; - for (i = 0; i < n; i++) - if (bbox[i][0] == bbox[i][1] && bbox[i][2] == bbox[i][3] && - bbox[i][4] == bbox[i][5]) flag = 1; - MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); - if (flagall) - error->all(FLERR,"One or more rigid bodies are a single particle"); - - // ctr = center pt of each rigid body my atoms are part of - - memory->create(ctr,n,6,"rigid/small:bbox"); - - for (i = 0; i < n; i++) { - ctr[i][0] = 0.5 * (bbox[i][0] + bbox[i][1]); - ctr[i][1] = 0.5 * (bbox[i][2] + bbox[i][3]); - ctr[i][2] = 0.5 * (bbox[i][4] + bbox[i][5]); - } - - // idclose = ID of atom in body closest to center pt (smaller ID if tied) - // rsqclose = distance squared from idclose to center pt - - memory->create(idclose,n,"rigid/small:idclose"); - memory->create(rsqclose,n,"rigid/small:rsqclose"); - - for (i = 0; i < n; i++) rsqclose[i] = BIG; - - // pack my atoms into buffer as body ID, atom ID, unwrapped coords - tagint *tag = atom->tag; + imageint *image = atom->image; m = 0; for (i = 0; i < nlocal; i++) { if (!(mask[i] & groupbit)) continue; - domain->unmap(x[i],image[i],unwrap); - buf[m++] = bodyid[i]; - buf[m++] = ubuf(tag[i]).d; - buf[m++] = unwrap[0]; - buf[m++] = unwrap[1]; - buf[m++] = unwrap[2]; + proclist[m] = hashlittle(&bodyID[i],sizeof(tagint),0) % nprocs; + inbuf[m].me = me; + inbuf[m].ilocal = i; + inbuf[m].atomID = tag[i]; + inbuf[m].bodyID = bodyID[i]; + domain->unmap(x[i],image[i],inbuf[m].x); + m++; } - // pass buffer around ring of procs - // func = update idclose,rsqclose with atom IDs from every proc - // when done, have idclose for every rigid body my atoms are part of + // perform rendezvous operation + // each proc owns random subset of bodies + // receives all atoms in those bodies + // func = compute bbox of each body, find atom closest to geometric center - comm->ring(m,sizeof(double),buf,2,ring_nearest,NULL,(void *)this); + char *buf; + int nreturn = comm->rendezvous(RVOUS,ncount,(char *) inbuf,sizeof(InRvous), + 0,proclist, + rendezvous_body,0,buf,sizeof(OutRvous), + (void *) this); + OutRvous *outbuf = (OutRvous *) buf; + + memory->destroy(proclist); + memory->sfree(inbuf); - // set bodytag of all owned atoms, based on idclose - // find max value of rsqclose across all procs + // set bodytag of all owned atoms based on outbuf info for constituent atoms - double rsqmax = 0.0; - for (i = 0; i < nlocal; i++) { - bodytag[i] = 0; - if (!(mask[i] & groupbit)) continue; - m = hash->find(bodyid[i])->second; - bodytag[i] = idclose[m]; - rsqmax = MAX(rsqmax,rsqclose[m]); - } + for (i = 0; i < nlocal; i++) + if (!(mask[i] & groupbit)) bodytag[i] = 0; - // pack my atoms into buffer as bodytag of owning atom, unwrapped coords + for (m = 0; m < nreturn; m++) + bodytag[outbuf[m].ilocal] = outbuf[m].atomID; - m = 0; - for (i = 0; i < nlocal; i++) { - if (!(mask[i] & groupbit)) continue; - domain->unmap(x[i],image[i],unwrap); - buf[m++] = ubuf(bodytag[i]).d; - buf[m++] = unwrap[0]; - buf[m++] = unwrap[1]; - buf[m++] = unwrap[2]; - } + memory->sfree(outbuf); - // pass buffer around ring of procs - // func = update rsqfar for atoms belonging to bodies I own - // when done, have rsqfar for all atoms in bodies I own - - rsqfar = 0.0; - comm->ring(m,sizeof(double),buf,3,ring_farthest,NULL,(void *)this); - - // find maxextent of rsqfar across all procs + // maxextent = max of rsqfar across all procs // if defined, include molecule->maxextent MPI_Allreduce(&rsqfar,&maxextent,1,MPI_DOUBLE,MPI_MAX,world); @@ -1691,125 +1617,156 @@ void FixRigidSmall::create_bodies(tagint *bodyid) for (int i = 0; i < nmol; i++) maxextent = MAX(maxextent,onemols[i]->maxextent); } +} + +/* ---------------------------------------------------------------------- + process rigid bodies assigned to me + buf = list of N BodyMsg datums +------------------------------------------------------------------------- */ + +int FixRigidSmall::rendezvous_body(int n, char *inbuf, + int &rflag, int *&proclist, char *&outbuf, + void *ptr) +{ + int i,j,m; + double delx,dely,delz,rsq; + int *iclose; + tagint *idclose; + double *x,*xown,*rsqclose; + double **bbox,**ctr; + + FixRigidSmall *frsptr = (FixRigidSmall *) ptr; + Memory *memory = frsptr->memory; + Error *error = frsptr->error; + MPI_Comm world = frsptr->world; + + // setup hash + // use STL map instead of atom->map + // b/c know nothing about body ID values specified by user + // ncount = number of bodies assigned to me + // key = body ID + // value = index into Ncount-length data structure + + InRvous *in = (InRvous *) inbuf; + std::map hash; + tagint id; + + int ncount = 0; + for (i = 0; i < n; i++) { + id = in[i].bodyID; + if (hash.find(id) == hash.end()) hash[id] = ncount++; + } + + // bbox = bounding box of each rigid body + + memory->create(bbox,ncount,6,"rigid/small:bbox"); + + for (m = 0; m < ncount; m++) { + bbox[m][0] = bbox[m][2] = bbox[m][4] = BIG; + bbox[m][1] = bbox[m][3] = bbox[m][5] = -BIG; + } + + for (i = 0; i < n; i++) { + m = hash.find(in[i].bodyID)->second; + x = in[i].x; + bbox[m][0] = MIN(bbox[m][0],x[0]); + bbox[m][1] = MAX(bbox[m][1],x[0]); + bbox[m][2] = MIN(bbox[m][2],x[1]); + bbox[m][3] = MAX(bbox[m][3],x[1]); + bbox[m][4] = MIN(bbox[m][4],x[2]); + bbox[m][5] = MAX(bbox[m][5],x[2]); + } + + // check if any bbox is size 0.0, meaning rigid body is a single particle + + int flag = 0; + for (m = 0; m < ncount; m++) + if (bbox[m][0] == bbox[m][1] && bbox[m][2] == bbox[m][3] && + bbox[m][4] == bbox[m][5]) flag = 1; + int flagall; + MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); // sync here? + if (flagall) + error->all(FLERR,"One or more rigid bodies are a single particle"); + + // ctr = geometric center pt of each rigid body + + memory->create(ctr,ncount,3,"rigid/small:bbox"); + + for (m = 0; m < ncount; m++) { + ctr[m][0] = 0.5 * (bbox[m][0] + bbox[m][1]); + ctr[m][1] = 0.5 * (bbox[m][2] + bbox[m][3]); + ctr[m][2] = 0.5 * (bbox[m][4] + bbox[m][5]); + } + + // idclose = atomID closest to center point of each body + + memory->create(idclose,ncount,"rigid/small:idclose"); + memory->create(iclose,ncount,"rigid/small:iclose"); + memory->create(rsqclose,ncount,"rigid/small:rsqclose"); + for (m = 0; m < ncount; m++) rsqclose[m] = BIG; + + for (i = 0; i < n; i++) { + m = hash.find(in[i].bodyID)->second; + x = in[i].x; + delx = x[0] - ctr[m][0]; + dely = x[1] - ctr[m][1]; + delz = x[2] - ctr[m][2]; + rsq = delx*delx + dely*dely + delz*delz; + if (rsq <= rsqclose[m]) { + if (rsq == rsqclose[m] && in[i].atomID > idclose[m]) continue; + iclose[m] = i; + idclose[m] = in[i].atomID; + rsqclose[m] = rsq; + } + } + + // compute rsqfar for all bodies I own + // set rsqfar back in caller + + double rsqfar = 0.0; + + for (int i = 0; i < n; i++) { + m = hash.find(in[i].bodyID)->second; + xown = in[iclose[m]].x; + x = in[i].x; + delx = x[0] - xown[0]; + dely = x[1] - xown[1]; + delz = x[2] - xown[2]; + rsq = delx*delx + dely*dely + delz*delz; + rsqfar = MAX(rsqfar,rsq); + } + + frsptr->rsqfar = rsqfar; + + // pass list of OutRvous datums back to comm->rendezvous + + int nout = n; + memory->create(proclist,nout,"rigid/small:proclist"); + OutRvous *out = (OutRvous *) + memory->smalloc(nout*sizeof(OutRvous),"rigid/small:out"); + + for (int i = 0; i < nout; i++) { + proclist[i] = in[i].me; + out[i].ilocal = in[i].ilocal; + m = hash.find(in[i].bodyID)->second; + out[i].atomID = idclose[m]; + } + + outbuf = (char *) out; // clean up + // Comm::rendezvous will delete proclist and out (outbuf) - delete hash; - memory->destroy(buf); memory->destroy(bbox); memory->destroy(ctr); memory->destroy(idclose); + memory->destroy(iclose); memory->destroy(rsqclose); -} -/* ---------------------------------------------------------------------- - process rigid body atoms from another proc - update bounding box for rigid bodies my atoms are part of -------------------------------------------------------------------------- */ + // flag = 2: new outbuf -void FixRigidSmall::ring_bbox(int n, char *cbuf, void *ptr) -{ - FixRigidSmall *frsptr = (FixRigidSmall *) ptr; - std::map *hash = frsptr->hash; - double **bbox = frsptr->bbox; - - double *buf = (double *) cbuf; - int ndatums = n/4; - - int j,imol; - double *x; - - int m = 0; - for (int i = 0; i < ndatums; i++, m += 4) { - imol = static_cast (buf[m]); - if (hash->find(imol) != hash->end()) { - j = hash->find(imol)->second; - x = &buf[m+1]; - bbox[j][0] = MIN(bbox[j][0],x[0]); - bbox[j][1] = MAX(bbox[j][1],x[0]); - bbox[j][2] = MIN(bbox[j][2],x[1]); - bbox[j][3] = MAX(bbox[j][3],x[1]); - bbox[j][4] = MIN(bbox[j][4],x[2]); - bbox[j][5] = MAX(bbox[j][5],x[2]); - } - } -} - -/* ---------------------------------------------------------------------- - process rigid body atoms from another proc - update nearest atom to body center for rigid bodies my atoms are part of -------------------------------------------------------------------------- */ - -void FixRigidSmall::ring_nearest(int n, char *cbuf, void *ptr) -{ - FixRigidSmall *frsptr = (FixRigidSmall *) ptr; - std::map *hash = frsptr->hash; - double **ctr = frsptr->ctr; - tagint *idclose = frsptr->idclose; - double *rsqclose = frsptr->rsqclose; - - double *buf = (double *) cbuf; - int ndatums = n/5; - - int j,imol; - tagint tag; - double delx,dely,delz,rsq; - double *x; - - int m = 0; - for (int i = 0; i < ndatums; i++, m += 5) { - imol = static_cast (buf[m]); - if (hash->find(imol) != hash->end()) { - j = hash->find(imol)->second; - tag = (tagint) ubuf(buf[m+1]).i; - x = &buf[m+2]; - delx = x[0] - ctr[j][0]; - dely = x[1] - ctr[j][1]; - delz = x[2] - ctr[j][2]; - rsq = delx*delx + dely*dely + delz*delz; - if (rsq <= rsqclose[j]) { - if (rsq == rsqclose[j] && tag > idclose[j]) continue; - idclose[j] = tag; - rsqclose[j] = rsq; - } - } - } -} - -/* ---------------------------------------------------------------------- - process rigid body atoms from another proc - update rsqfar = distance from owning atom to other atom -------------------------------------------------------------------------- */ - -void FixRigidSmall::ring_farthest(int n, char *cbuf, void *ptr) -{ - FixRigidSmall *frsptr = (FixRigidSmall *) ptr; - double **x = frsptr->atom->x; - imageint *image = frsptr->atom->image; - int nlocal = frsptr->atom->nlocal; - - double *buf = (double *) cbuf; - int ndatums = n/4; - - int iowner; - tagint tag; - double delx,dely,delz,rsq; - double *xx; - double unwrap[3]; - - int m = 0; - for (int i = 0; i < ndatums; i++, m += 4) { - tag = (tagint) ubuf(buf[m]).i; - iowner = frsptr->atom->map(tag); - if (iowner < 0 || iowner >= nlocal) continue; - frsptr->domain->unmap(x[iowner],image[iowner],unwrap); - xx = &buf[m+1]; - delx = xx[0] - unwrap[0]; - dely = xx[1] - unwrap[1]; - delz = xx[2] - unwrap[2]; - rsq = delx*delx + dely*dely + delz*delz; - frsptr->rsqfar = MAX(frsptr->rsqfar,rsq); - } + rflag = 2; + return nout; } /* ---------------------------------------------------------------------- @@ -2472,9 +2429,9 @@ void FixRigidSmall::readfile(int which, double **array, int *inbody) int nlocal = atom->nlocal; - hash = new std::map(); + std::map hash; for (i = 0; i < nlocal; i++) - if (bodyown[i] >= 0) (*hash)[atom->molecule[i]] = bodyown[i]; + if (bodyown[i] >= 0) hash[atom->molecule[i]] = bodyown[i]; // open file and read header @@ -2533,11 +2490,11 @@ void FixRigidSmall::readfile(int which, double **array, int *inbody) id = ATOTAGINT(values[0]); if (id <= 0 || id > maxmol) error->all(FLERR,"Invalid rigid body ID in fix rigid/small file"); - if (hash->find(id) == hash->end()) { + if (hash.find(id) == hash.end()) { buf = next + 1; continue; } - m = (*hash)[id]; + m = hash[id]; inbody[m] = 1; if (which == 0) { @@ -2576,7 +2533,6 @@ void FixRigidSmall::readfile(int which, double **array, int *inbody) delete [] buffer; delete [] values; - delete hash; } /* ---------------------------------------------------------------------- diff --git a/src/RIGID/fix_rigid_small.h b/src/RIGID/fix_rigid_small.h index 3f6826f9bb..f6ad1b7206 100644 --- a/src/RIGID/fix_rigid_small.h +++ b/src/RIGID/fix_rigid_small.h @@ -23,7 +23,7 @@ FixStyle(rigid/small,FixRigidSmall) #include "fix.h" // replace this later -#include +//#include namespace LAMMPS_NS { @@ -180,13 +180,21 @@ class FixRigidSmall : public Fix { // class data used by ring communication callbacks - std::map *hash; - double **bbox; - double **ctr; - tagint *idclose; - double *rsqclose; double rsqfar; + struct InRvous { + int me,ilocal; + tagint atomID,bodyID; + double x[3]; + }; + + struct OutRvous { + int ilocal; + tagint atomID; + }; + + // local methods + void image_shift(); void set_xv(); void set_v(); @@ -199,11 +207,9 @@ class FixRigidSmall : public Fix { void grow_body(); void reset_atom2body(); - // callback functions for ring communication + // callback function for rendezvous communication - static void ring_bbox(int, char *, void *); - static void ring_nearest(int, char *, void *); - static void ring_farthest(int, char *, void *); + static int rendezvous_body(int, char *, int &, int *&, char *&, void *); // debug diff --git a/src/RIGID/fix_shake.cpp b/src/RIGID/fix_shake.cpp index e0d1bf132b..dcdb190d45 100644 --- a/src/RIGID/fix_shake.cpp +++ b/src/RIGID/fix_shake.cpp @@ -39,6 +39,8 @@ using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; +#define RVOUS 1 // 0 for irregular, 1 for all2all + #define BIG 1.0e20 #define MASSDELTA 0.1 @@ -219,8 +221,19 @@ FixShake::FixShake(LAMMPS *lmp, int narg, char **arg) : // identify all SHAKE clusters + double time1 = MPI_Wtime(); + find_clusters(); + double time2 = MPI_Wtime(); + + if (comm->me == 0) { + if (screen) + fprintf(screen," find clusters CPU = %g secs\n",time2-time1); + if (logfile) + fprintf(logfile," find clusters CPU = %g secs\n",time2-time1); + } + // initialize list of SHAKE clusters to constrain maxlist = 0; @@ -707,13 +720,6 @@ void FixShake::find_clusters() int nlocal = atom->nlocal; int angles_allow = atom->avec->angles_allow; - // setup ring of procs - - int next = me + 1; - int prev = me -1; - if (next == nprocs) next = 0; - if (prev < 0) prev = nprocs - 1; - // ----------------------------------------------------- // allocate arrays for self (1d) and bond partners (2d) // max = max # of bond partners for owned atoms = 2nd dim of partner arrays @@ -755,6 +761,10 @@ void FixShake::find_clusters() memory->create(partner_shake,nlocal,max,"shake:partner_shake"); memory->create(partner_nshake,nlocal,max,"shake:partner_nshake"); + // setup atomIDs and procowner vectors in rendezvous decomposition + + atom_owners(); + // ----------------------------------------------------- // set npartner and partner_tag from special arrays // ----------------------------------------------------- @@ -778,86 +788,13 @@ void FixShake::find_clusters() } // ----------------------------------------------------- - // set partner_mask, partner_type, partner_massflag, partner_bondtype - // for bonded partners - // requires communication for off-proc partners + // set partner_mask, partner_type, partner_massflag, + // partner_bondtype for all my bonded partners + // requires rendezvous communication for off-proc partners // ----------------------------------------------------- - // fill in mask, type, massflag, bondtype if own bond partner - // info to store in buf for each off-proc bond = nper = 6 - // 2 atoms IDs in bond, space for mask, type, massflag, bondtype - // nbufmax = largest buffer needed to hold info from any proc - - int nper = 6; - - nbuf = 0; - for (i = 0; i < nlocal; i++) { - for (j = 0; j < npartner[i]; j++) { - partner_mask[i][j] = 0; - partner_type[i][j] = 0; - partner_massflag[i][j] = 0; - partner_bondtype[i][j] = 0; - - m = atom->map(partner_tag[i][j]); - if (m >= 0 && m < nlocal) { - partner_mask[i][j] = mask[m]; - partner_type[i][j] = type[m]; - if (nmass) { - if (rmass) massone = rmass[m]; - else massone = mass[type[m]]; - partner_massflag[i][j] = masscheck(massone); - } - n = bondtype_findset(i,tag[i],partner_tag[i][j],0); - if (n) partner_bondtype[i][j] = n; - else { - n = bondtype_findset(m,tag[i],partner_tag[i][j],0); - if (n) partner_bondtype[i][j] = n; - } - } else nbuf += nper; - } - } - - memory->create(buf,nbuf,"shake:buf"); - - // fill buffer with info - - size = 0; - for (i = 0; i < nlocal; i++) { - for (j = 0; j < npartner[i]; j++) { - m = atom->map(partner_tag[i][j]); - if (m < 0 || m >= nlocal) { - buf[size] = tag[i]; - buf[size+1] = partner_tag[i][j]; - buf[size+2] = 0; - buf[size+3] = 0; - buf[size+4] = 0; - n = bondtype_findset(i,tag[i],partner_tag[i][j],0); - if (n) buf[size+5] = n; - else buf[size+5] = 0; - size += nper; - } - } - } - - // cycle buffer around ring of procs back to self - - comm->ring(size,sizeof(tagint),buf,1,ring_bonds,buf,(void *)this); - - // store partner info returned to me - - m = 0; - while (m < size) { - i = atom->map(buf[m]); - for (j = 0; j < npartner[i]; j++) - if (buf[m+1] == partner_tag[i][j]) break; - partner_mask[i][j] = buf[m+2]; - partner_type[i][j] = buf[m+3]; - partner_massflag[i][j] = buf[m+4]; - partner_bondtype[i][j] = buf[m+5]; - m += nper; - } - - memory->destroy(buf); + partner_info(npartner,partner_tag,partner_mask,partner_type, + partner_massflag,partner_bondtype); // error check for unfilled partner info // if partner_type not set, is an error @@ -868,17 +805,18 @@ void FixShake::find_clusters() // else it's an error flag = 0; + int flag2 = 0; for (i = 0; i < nlocal; i++) for (j = 0; j < npartner[i]; j++) { - if (partner_type[i][j] == 0) flag = 1; + if (partner_type[i][j] == 0) flag++; if (!(mask[i] & groupbit)) continue; if (!(partner_mask[i][j] & groupbit)) continue; - if (partner_bondtype[i][j] == 0) flag = 1; + if (partner_bondtype[i][j] == 0) flag2++; } MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); if (flag_all) error->all(FLERR,"Did not find fix shake partner info"); - + // ----------------------------------------------------- // identify SHAKEable bonds // set nshake[i] = # of SHAKE bonds attached to atom i @@ -931,56 +869,11 @@ void FixShake::find_clusters() // ----------------------------------------------------- // set partner_nshake for bonded partners - // requires communication for off-proc partners + // requires rendezvous communication for off-proc partners // ----------------------------------------------------- - // fill in partner_nshake if own bond partner - // info to store in buf for each off-proc bond = - // 2 atoms IDs in bond, space for nshake value - // nbufmax = largest buffer needed to hold info from any proc - - nbuf = 0; - for (i = 0; i < nlocal; i++) { - for (j = 0; j < npartner[i]; j++) { - m = atom->map(partner_tag[i][j]); - if (m >= 0 && m < nlocal) partner_nshake[i][j] = nshake[m]; - else nbuf += 3; - } - } - - memory->create(buf,nbuf,"shake:buf"); - - // fill buffer with info - - size = 0; - for (i = 0; i < nlocal; i++) { - for (j = 0; j < npartner[i]; j++) { - m = atom->map(partner_tag[i][j]); - if (m < 0 || m >= nlocal) { - buf[size] = tag[i]; - buf[size+1] = partner_tag[i][j]; - size += 3; - } - } - } - - // cycle buffer around ring of procs back to self - - comm->ring(size,sizeof(tagint),buf,2,ring_nshake,buf,(void *)this); - - // store partner info returned to me - - m = 0; - while (m < size) { - i = atom->map(buf[m]); - for (j = 0; j < npartner[i]; j++) - if (buf[m+1] == partner_tag[i][j]) break; - partner_nshake[i][j] = buf[m+2]; - m += 3; - } - - memory->destroy(buf); - + nshake_info(npartner,partner_tag,partner_nshake); + // ----------------------------------------------------- // error checks // no atom with nshake > 3 @@ -988,7 +881,7 @@ void FixShake::find_clusters() // ----------------------------------------------------- flag = 0; - for (i = 0; i < nlocal; i++) if (nshake[i] > 3) flag = 1; + for (i = 0; i < nlocal; i++) if (nshake[i] > 3) flag++; MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); if (flag_all) error->all(FLERR,"Shake cluster of more than 4 atoms"); @@ -996,7 +889,7 @@ void FixShake::find_clusters() for (i = 0; i < nlocal; i++) { if (nshake[i] <= 1) continue; for (j = 0; j < npartner[i]; j++) - if (partner_shake[i][j] && partner_nshake[i][j] > 1) flag = 1; + if (partner_shake[i][j] && partner_nshake[i][j] > 1) flag++; } MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); if (flag_all) error->all(FLERR,"Shake clusters are connected"); @@ -1064,68 +957,18 @@ void FixShake::find_clusters() // ----------------------------------------------------- // set shake_flag,shake_atom,shake_type for non-central atoms - // requires communication for off-proc atoms + // requires rendezvous communication for off-proc atoms // ----------------------------------------------------- - // fill in shake arrays for each bond partner I own - // info to store in buf for each off-proc bond = - // all values from shake_flag, shake_atom, shake_type - // nbufmax = largest buffer needed to hold info from any proc - - nbuf = 0; - for (i = 0; i < nlocal; i++) { - if (shake_flag[i] == 0) continue; - for (j = 0; j < npartner[i]; j++) { - if (partner_shake[i][j] == 0) continue; - m = atom->map(partner_tag[i][j]); - if (m >= 0 && m < nlocal) { - shake_flag[m] = shake_flag[i]; - shake_atom[m][0] = shake_atom[i][0]; - shake_atom[m][1] = shake_atom[i][1]; - shake_atom[m][2] = shake_atom[i][2]; - shake_atom[m][3] = shake_atom[i][3]; - shake_type[m][0] = shake_type[i][0]; - shake_type[m][1] = shake_type[i][1]; - shake_type[m][2] = shake_type[i][2]; - } else nbuf += 9; - } - } - - memory->create(buf,nbuf,"shake:buf"); - - // fill buffer with info - - size = 0; - for (i = 0; i < nlocal; i++) { - if (shake_flag[i] == 0) continue; - for (j = 0; j < npartner[i]; j++) { - if (partner_shake[i][j] == 0) continue; - m = atom->map(partner_tag[i][j]); - if (m < 0 || m >= nlocal) { - buf[size] = partner_tag[i][j]; - buf[size+1] = shake_flag[i]; - buf[size+2] = shake_atom[i][0]; - buf[size+3] = shake_atom[i][1]; - buf[size+4] = shake_atom[i][2]; - buf[size+5] = shake_atom[i][3]; - buf[size+6] = shake_type[i][0]; - buf[size+7] = shake_type[i][1]; - buf[size+8] = shake_type[i][2]; - size += 9; - } - } - } - - // cycle buffer around ring of procs back to self - - comm->ring(size,sizeof(tagint),buf,3,ring_shake,NULL,(void *)this); - - memory->destroy(buf); + shake_info(npartner,partner_tag,partner_shake); // ----------------------------------------------------- // free local memory // ----------------------------------------------------- + memory->destroy(atomIDs); + memory->destroy(procowner); + memory->destroy(npartner); memory->destroy(nshake); memory->destroy(partner_tag); @@ -1199,98 +1042,551 @@ void FixShake::find_clusters() } /* ---------------------------------------------------------------------- - when receive buffer, scan bond partner IDs for atoms I own - if I own partner: - fill in mask and type and massflag - search for bond with 1st atom and fill in bondtype + setup atomIDs and procowner ------------------------------------------------------------------------- */ -void FixShake::ring_bonds(int ndatum, char *cbuf, void *ptr) +void FixShake::atom_owners() { - FixShake *fsptr = (FixShake *)ptr; - Atom *atom = fsptr->atom; + tagint *tag = atom->tag; + int nlocal = atom->nlocal; + + int *proclist; + memory->create(proclist,nlocal,"shake:proclist"); + IDRvous *idbuf = (IDRvous *) + memory->smalloc((bigint) nlocal*sizeof(IDRvous),"shake:idbuf"); + + // setup input buf to rendezvous comm + // input datums = pairs of bonded atoms + // owning proc for each datum = random hash of atomID + // one datum for each owned atom: datum = owning proc, atomID + + for (int i = 0; i < nlocal; i++) { + proclist[i] = tag[i] % nprocs; + idbuf[i].me = me; + idbuf[i].atomID = tag[i]; + } + + // perform rendezvous operation + // each proc assigned every 1/Pth atom + + char *buf; + comm->rendezvous(RVOUS,nlocal,(char *) idbuf,sizeof(IDRvous), + 0,proclist, + rendezvous_ids,0,buf,0,(void *) this); + + memory->destroy(proclist); + memory->sfree(idbuf); +} + +/* ---------------------------------------------------------------------- + setup partner_mask, partner_type, partner_massflag, partner_bondtype +------------------------------------------------------------------------- */ + +void FixShake::partner_info(int *npartner, tagint **partner_tag, + int **partner_mask, int **partner_type, + int **partner_massflag, int **partner_bondtype) +{ + int i,j,m,n; + int nlocal = atom->nlocal; + + // nsend = # of my datums to send + // one datum for every off-processor partner + + int nsend = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < npartner[i]; j++) { + m = atom->map(partner_tag[i][j]); + if (m < 0 || m >= nlocal) nsend++; + } + } + + int *proclist; + memory->create(proclist,nsend,"special:proclist"); + PartnerInfo *inbuf = (PartnerInfo *) + memory->smalloc((bigint) nsend*sizeof(PartnerInfo),"special:inbuf"); + + // set values in 4 partner arrays for all partner atoms I own + // also setup input buf to rendezvous comm + // input datums = pair of bonded atoms where I do not own partner + // owning proc for each datum = partner_tag % nprocs + // datum: atomID = partner_tag (off-proc), partnerID = tag (on-proc) + // 4 values for my owned atom + double *rmass = atom->rmass; double *mass = atom->mass; - int *mask = atom->mask; int *type = atom->type; + int *mask = atom->mask; + tagint *tag = atom->tag; + + double massone; + + nsend = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < npartner[i]; j++) { + partner_mask[i][j] = 0; + partner_type[i][j] = 0; + partner_massflag[i][j] = 0; + partner_bondtype[i][j] = 0; + + m = atom->map(partner_tag[i][j]); + + if (m >= 0 && m < nlocal) { + partner_mask[i][j] = mask[m]; + partner_type[i][j] = type[m]; + if (nmass) { + if (rmass) massone = rmass[m]; + else massone = mass[type[m]]; + partner_massflag[i][j] = masscheck(massone); + } + n = bondtype_findset(i,tag[i],partner_tag[i][j],0); + if (n) partner_bondtype[i][j] = n; + else { + n = bondtype_findset(m,tag[i],partner_tag[i][j],0); + if (n) partner_bondtype[i][j] = n; + } + + } else { + proclist[nsend] = partner_tag[i][j] % nprocs; + inbuf[nsend].atomID = partner_tag[i][j]; + inbuf[nsend].partnerID = tag[i]; + inbuf[nsend].mask = mask[i]; + inbuf[nsend].type = type[i]; + if (nmass) { + if (rmass) massone = rmass[i]; + else massone = mass[type[i]]; + inbuf[nsend].massflag = masscheck(massone); + } else inbuf[nsend].massflag = 0; + + // my atom may own bond, in which case set partner_bondtype + // else receiver of this datum will own the bond and return the value + + n = bondtype_findset(i,tag[i],partner_tag[i][j],0); + if (n) { + partner_bondtype[i][j] = n; + inbuf[nsend].bondtype = n; + } else inbuf[nsend].bondtype = 0; + + nsend++; + } + } + } + + // perform rendezvous operation + // each proc owns random subset of atoms + // receives all data needed to populate un-owned partner 4 values + + char *buf; + int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(PartnerInfo), + 0,proclist, + rendezvous_partners_info, + 0,buf,sizeof(PartnerInfo), + (void *) this); + PartnerInfo *outbuf = (PartnerInfo *) buf; + + memory->destroy(proclist); + memory->sfree(inbuf); + + // set partner 4 values for un-onwed partners based on output info + // outbuf.atomID = my owned atom, outbuf.partnerID = partner the info is for + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + for (j = 0; j < npartner[i]; j++) + if (partner_tag[i][j] == outbuf[m].partnerID) break; + partner_mask[i][j] = outbuf[m].mask; + partner_type[i][j] = outbuf[m].type; + partner_massflag[i][j] = outbuf[m].massflag; + + // only set partner_bondtype if my atom did not set it + // when setting up rendezvous + // if this proc set it, then sender of this datum set outbuf.bondtype = 0 + + if (partner_bondtype[i][j] == 0) + partner_bondtype[i][j] = outbuf[m].bondtype; + } + + memory->sfree(outbuf); +} + +/* ---------------------------------------------------------------------- + setup partner_nshake +------------------------------------------------------------------------- */ + +void FixShake::nshake_info(int *npartner, tagint **partner_tag, + int **partner_nshake) +{ + int i,j,m,n; int nlocal = atom->nlocal; + + // nsend = # of my datums to send + // one datum for every off-processor partner + + int nsend = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < npartner[i]; j++) { + m = atom->map(partner_tag[i][j]); + if (m < 0 || m >= nlocal) nsend++; + } + } + + int *proclist; + memory->create(proclist,nsend,"special:proclist"); + NShakeInfo *inbuf = (NShakeInfo *) + memory->smalloc((bigint) nsend*sizeof(NShakeInfo),"special:inbuf"); + + // set partner_nshake for all partner atoms I own + // also setup input buf to rendezvous comm + // input datums = pair of bonded atoms where I do not own partner + // owning proc for each datum = partner_tag % nprocs + // datum: atomID = partner_tag (off-proc), partnerID = tag (on-proc) + // nshake value for my owned atom + + tagint *tag = atom->tag; + + nsend = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < npartner[i]; j++) { + partner_nshake[i][j] = 0; + m = atom->map(partner_tag[i][j]); + if (m >= 0 && m < nlocal) { + partner_nshake[i][j] = nshake[m]; + } else { + proclist[nsend] = partner_tag[i][j] % nprocs; + inbuf[nsend].atomID = partner_tag[i][j]; + inbuf[nsend].partnerID = tag[i]; + inbuf[nsend].nshake = nshake[i]; + nsend++; + } + } + } + + // perform rendezvous operation + // each proc owns random subset of atoms + // receives all data needed to populate un-owned partner nshake + + char *buf; + int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(NShakeInfo), + 0,proclist, + rendezvous_nshake,0,buf,sizeof(NShakeInfo), + (void *) this); + NShakeInfo *outbuf = (NShakeInfo *) buf; + + memory->destroy(proclist); + memory->sfree(inbuf); + + // set partner nshake for un-onwed partners based on output info + // outbuf.atomID = my owned atom, outbuf.partnerID = partner the info is for + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + for (j = 0; j < npartner[i]; j++) + if (partner_tag[i][j] == outbuf[m].partnerID) break; + partner_nshake[i][j] = outbuf[m].nshake; + } + + memory->sfree(outbuf); +} + +/* ---------------------------------------------------------------------- + setup shake_flag, shake_atom, shake_type +------------------------------------------------------------------------- */ + +void FixShake::shake_info(int *npartner, tagint **partner_tag, + int **partner_shake) +{ + int i,j,m,n; + int nlocal = atom->nlocal; + + // nsend = # of my datums to send + // one datum for every off-processor partner + + int nsend = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < npartner[i]; j++) { + m = atom->map(partner_tag[i][j]); + if (m < 0 || m >= nlocal) nsend++; + } + } + + int *proclist; + memory->create(proclist,nsend,"special:proclist"); + ShakeInfo *inbuf = (ShakeInfo *) + memory->smalloc((bigint) nsend*sizeof(ShakeInfo),"special:inbuf"); + + // set 3 shake arrays for all partner atoms I own + // also setup input buf to rendezvous comm + // input datums = partner atom where I do not own partner + // owning proc for each datum = partner_tag % nprocs + // datum: atomID = partner_tag (off-proc) + // values in 3 shake arrays + + nsend = 0; + for (i = 0; i < nlocal; i++) { + if (shake_flag[i] == 0) continue; + for (j = 0; j < npartner[i]; j++) { + if (partner_shake[i][j] == 0) continue; + m = atom->map(partner_tag[i][j]); + + if (m >= 0 && m < nlocal) { + shake_flag[m] = shake_flag[i]; + shake_atom[m][0] = shake_atom[i][0]; + shake_atom[m][1] = shake_atom[i][1]; + shake_atom[m][2] = shake_atom[i][2]; + shake_atom[m][3] = shake_atom[i][3]; + shake_type[m][0] = shake_type[i][0]; + shake_type[m][1] = shake_type[i][1]; + shake_type[m][2] = shake_type[i][2]; + + } else { + proclist[nsend] = partner_tag[i][j] % nprocs; + inbuf[nsend].atomID = partner_tag[i][j]; + inbuf[nsend].shake_flag = shake_flag[i]; + inbuf[nsend].shake_atom[0] = shake_atom[i][0]; + inbuf[nsend].shake_atom[1] = shake_atom[i][1]; + inbuf[nsend].shake_atom[2] = shake_atom[i][2]; + inbuf[nsend].shake_atom[3] = shake_atom[i][3]; + inbuf[nsend].shake_type[0] = shake_type[i][0]; + inbuf[nsend].shake_type[1] = shake_type[i][1]; + inbuf[nsend].shake_type[2] = shake_type[i][2]; + nsend++; + } + } + } + + // perform rendezvous operation + // each proc owns random subset of atoms + // receives all data needed to populate un-owned shake info + + char *buf; + int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(ShakeInfo), + 0,proclist, + rendezvous_shake,0,buf,sizeof(ShakeInfo), + (void *) this); + ShakeInfo *outbuf = (ShakeInfo *) buf; + + memory->destroy(proclist); + memory->sfree(inbuf); + + // set shake info for un-onwed partners based on output info + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + shake_flag[i] = outbuf[m].shake_flag; + shake_atom[i][0] = outbuf[m].shake_atom[0]; + shake_atom[i][1] = outbuf[m].shake_atom[1]; + shake_atom[i][2] = outbuf[m].shake_atom[2]; + shake_atom[i][3] = outbuf[m].shake_atom[3]; + shake_type[i][0] = outbuf[m].shake_type[0]; + shake_type[i][1] = outbuf[m].shake_type[1]; + shake_type[i][2] = outbuf[m].shake_type[2]; + } + + memory->sfree(outbuf); +} + +/* ---------------------------------------------------------------------- + process data for atoms assigned to me in rendezvous decomposition + inbuf = list of N IDRvous datums + no outbuf +------------------------------------------------------------------------- */ + +int FixShake::rendezvous_ids(int n, char *inbuf, + int &flag, int *&proclist, char *&outbuf, + void *ptr) +{ + FixShake *fsptr = (FixShake *) ptr; + Memory *memory = fsptr->memory; + + tagint *atomIDs; + int *procowner; + + memory->create(atomIDs,n,"special:atomIDs"); + memory->create(procowner,n,"special:procowner"); + + IDRvous *in = (IDRvous *) inbuf; + + for (int i = 0; i < n; i++) { + atomIDs[i] = in[i].atomID; + procowner[i] = in[i].me; + } + + // store rendezvous data in FixShake class + + fsptr->nrvous = n; + fsptr->atomIDs = atomIDs; + fsptr->procowner = procowner; + + // flag = 0: no second comm needed in rendezvous + + flag = 0; + return 0; +} + +/* ---------------------------------------------------------------------- + process data for atoms assigned to me in rendezvous decomposition + inbuf = list of N PairRvous datums + outbuf = same list of N PairRvous datums, routed to different procs +------------------------------------------------------------------------- */ + +int FixShake::rendezvous_partners_info(int n, char *inbuf, + int &flag, int *&proclist, char *&outbuf, + void *ptr) +{ + int i,m; + + FixShake *fsptr = (FixShake *) ptr; + Atom *atom = fsptr->atom; + Memory *memory = fsptr->memory; + + // clear atom map so it can be here as a hash table + // faster than an STL map for large atom counts + + atom->map_clear(); + + // hash atom IDs stored in rendezvous decomposition + + int nrvous = fsptr->nrvous; + tagint *atomIDs = fsptr->atomIDs; + + for (i = 0; i < nrvous; i++) + atom->map_one(atomIDs[i],i); + + // proclist = owner of atomID in caller decomposition + // outbuf = info about owned atomID = 4 values + + PartnerInfo *in = (PartnerInfo *) inbuf; + int *procowner = fsptr->procowner; + memory->create(proclist,n,"shake:proclist"); + + double massone; int nmass = fsptr->nmass; - tagint *buf = (tagint *) cbuf; - int m,n; - double massone; - - for (int i = 0; i < ndatum; i += 6) { - m = atom->map(buf[i+1]); - if (m >= 0 && m < nlocal) { - buf[i+2] = mask[m]; - buf[i+3] = type[m]; - if (nmass) { - if (rmass) massone = rmass[m]; - else massone = mass[type[m]]; - buf[i+4] = fsptr->masscheck(massone); - } - if (buf[i+5] == 0) { - n = fsptr->bondtype_findset(m,buf[i],buf[i+1],0); - if (n) buf[i+5] = n; - } - } + for (i = 0; i < n; i++) { + m = atom->map(in[i].atomID); + proclist[i] = procowner[m]; } + + outbuf = inbuf; + + // re-create atom map + + atom->map_init(0); + atom->nghost = 0; + atom->map_set(); + + // flag = 1: outbuf = inbuf + + flag = 1; + return n; } /* ---------------------------------------------------------------------- - when receive buffer, scan bond partner IDs for atoms I own - if I own partner, fill in nshake value + process data for atoms assigned to me in rendezvous decomposition + inbuf = list of N NShakeInfo datums + outbuf = same list of N NShakeInfo datums, routed to different procs ------------------------------------------------------------------------- */ -void FixShake::ring_nshake(int ndatum, char *cbuf, void *ptr) +int FixShake::rendezvous_nshake(int n, char *inbuf, + int &flag, int *&proclist, char *&outbuf, + void *ptr) { - FixShake *fsptr = (FixShake *)ptr; + int i,j,m; + + FixShake *fsptr = (FixShake *) ptr; Atom *atom = fsptr->atom; - int nlocal = atom->nlocal; + Memory *memory = fsptr->memory; - int *nshake = fsptr->nshake; + // clear atom map so it can be here as a hash table + // faster than an STL map for large atom counts - tagint *buf = (tagint *) cbuf; - int m; + atom->map_clear(); - for (int i = 0; i < ndatum; i += 3) { - m = atom->map(buf[i+1]); - if (m >= 0 && m < nlocal) buf[i+2] = nshake[m]; + // hash atom IDs stored in rendezvous decomposition + + int nrvous = fsptr->nrvous; + tagint *atomIDs = fsptr->atomIDs; + + for (i = 0; i < nrvous; i++) + atom->map_one(atomIDs[i],i); + + // proclist = owner of atomID in caller decomposition + // outbuf = info about owned atomID + + NShakeInfo *in = (NShakeInfo *) inbuf; + int *procowner = fsptr->procowner; + memory->create(proclist,n,"shake:proclist"); + + for (i = 0; i < n; i++) { + m = atom->map(in[i].atomID); + proclist[i] = procowner[m]; } + + outbuf = inbuf; + + // re-create atom map + + atom->map_init(0); + atom->nghost = 0; + atom->map_set(); + + // flag = 1: outbuf = inbuf + + flag = 1; + return n; } - /* ---------------------------------------------------------------------- - when receive buffer, scan bond partner IDs for atoms I own - if I own partner, fill in nshake value + process data for atoms assigned to me in rendezvous decomposition + inbuf = list of N PairRvous datums + outbuf = same list of N PairRvous datums, routed to different procs ------------------------------------------------------------------------- */ -void FixShake::ring_shake(int ndatum, char *cbuf, void *ptr) +int FixShake::rendezvous_shake(int n, char *inbuf, + int &flag, int *&proclist, char *&outbuf, + void *ptr) { - FixShake *fsptr = (FixShake *)ptr; + int i,j,m; + + FixShake *fsptr = (FixShake *) ptr; Atom *atom = fsptr->atom; - int nlocal = atom->nlocal; + Memory *memory = fsptr->memory; - int *shake_flag = fsptr->shake_flag; - tagint **shake_atom = fsptr->shake_atom; - int **shake_type = fsptr->shake_type; + // clear atom map so it can be here as a hash table + // faster than an STL map for large atom counts - tagint *buf = (tagint *) cbuf; - int m; + atom->map_clear(); - for (int i = 0; i < ndatum; i += 9) { - m = atom->map(buf[i]); - if (m >= 0 && m < nlocal) { - shake_flag[m] = buf[i+1]; - shake_atom[m][0] = buf[i+2]; - shake_atom[m][1] = buf[i+3]; - shake_atom[m][2] = buf[i+4]; - shake_atom[m][3] = buf[i+5]; - shake_type[m][0] = buf[i+6]; - shake_type[m][1] = buf[i+7]; - shake_type[m][2] = buf[i+8]; - } + // hash atom IDs stored in rendezvous decomposition + + int nrvous = fsptr->nrvous; + tagint *atomIDs = fsptr->atomIDs; + + for (i = 0; i < nrvous; i++) + atom->map_one(atomIDs[i],i); + + // proclist = owner of atomID in caller decomposition + // outbuf = info about owned atomID + + ShakeInfo *in = (ShakeInfo *) inbuf; + int *procowner = fsptr->procowner; + memory->create(proclist,n,"shake:proclist"); + + for (i = 0; i < n; i++) { + m = atom->map(in[i].atomID); + proclist[i] = procowner[m]; } + + outbuf = inbuf; + + // re-create atom map + + atom->map_init(0); + atom->nghost = 0; + atom->map_set(); + + // flag = 1: outbuf = inbuf; + + flag = 1; + return n; } /* ---------------------------------------------------------------------- diff --git a/src/RIGID/fix_shake.h b/src/RIGID/fix_shake.h index d4e7b85ec4..b99a35f958 100644 --- a/src/RIGID/fix_shake.h +++ b/src/RIGID/fix_shake.h @@ -120,6 +120,11 @@ class FixShake : public Fix { int nmol; void find_clusters(); + void atom_owners(); + void partner_info(int *, tagint **, int **, int **, int **, int **); + void nshake_info(int *, tagint **, int **); + void shake_info(int *, tagint **, int **); + int masscheck(double); void unconstrained_update(); void unconstrained_update_respa(int); @@ -131,12 +136,40 @@ class FixShake : public Fix { int bondtype_findset(int, tagint, tagint, int); int angletype_findset(int, tagint, tagint, int); - // static variable for ring communication callback to access class data - // callback functions for ring communication + // data used by rendezvous callback methods - static void ring_bonds(int, char *, void *); - static void ring_nshake(int, char *, void *); - static void ring_shake(int, char *, void *); + int nrvous; + tagint *atomIDs; + int *procowner; + + struct IDRvous { + int me; + tagint atomID; + }; + + struct PartnerInfo { + tagint atomID,partnerID; + int mask,type,massflag,bondtype; + }; + + struct NShakeInfo { + tagint atomID,partnerID; + int nshake; + }; + + struct ShakeInfo { + tagint atomID; + tagint shake_atom[4]; + int shake_flag; + int shake_type[3]; + }; + + // callback functions for rendezvous communication + + static int rendezvous_ids(int, char *, int &, int *&, char *&, void *); + static int rendezvous_partners_info(int, char *, int &, int *&, char *&, void *); + static int rendezvous_nshake(int, char *, int &, int *&, char *&, void *); + static int rendezvous_shake(int, char *, int &, int *&, char *&, void *); }; } diff --git a/src/comm.cpp b/src/comm.cpp index f355a562fc..729f96581a 100644 --- a/src/comm.cpp +++ b/src/comm.cpp @@ -28,6 +28,7 @@ #include "dump.h" #include "group.h" #include "procmap.h" +#include "irregular.h" #include "accelerator_kokkos.h" #include "memory.h" #include "error.h" @@ -725,6 +726,433 @@ void Comm::ring(int n, int nper, void *inbuf, int messtag, memory->destroy(bufcopy); } +/* ---------------------------------------------------------------------- + rendezvous communication operation + three stages: + first comm sends inbuf from caller decomp to rvous decomp + callback operates on data in rendevous decomp + second comm sends outbuf from rvous decomp back to caller decomp + inputs: + which = perform (0) irregular or (1) MPI_All2allv communication + n = # of datums in inbuf + inbuf = vector of input datums + insize = byte size of each input datum + inorder = 0 for inbuf in random proc order, 1 for datums ordered by proc + procs: inorder 0 = proc to send each datum to, 1 = # of datums/proc, + callback = caller function to invoke in rendezvous decomposition + takes input datums, returns output datums + outorder = same as inorder, but for datums returned by callback() + ptr = pointer to caller class, passed to callback() + outputs: + nout = # of output datums (function return) + outbuf = vector of output datums + outsize = byte size of each output datum + callback inputs: + nrvous = # of rvous decomp datums in inbuf_rvous + inbuf_rvous = vector of rvous decomp input datums + ptr = pointer to caller class + callback outputs: + nrvous_out = # of rvous decomp output datums (function return) + flag = 0 for no second comm, 1 for outbuf_rvous = inbuf_rvous, + 2 for second comm with new outbuf_rvous + procs_rvous = outorder 0 = proc to send each datum to, 1 = # of datums/proc + allocated + outbuf_rvous = vector of rvous decomp output datums + NOTE: could use MPI_INT or MPI_DOUBLE insead of MPI_CHAR + to avoid checked-for overflow in MPI_Alltoallv? +------------------------------------------------------------------------- */ + +int Comm:: +rendezvous(int which, int n, char *inbuf, int insize, + int inorder, int *procs, + int (*callback)(int, char *, int &, int *&, char *&, void *), + int outorder, char *&outbuf, int outsize, void *ptr, int statflag) +{ + if (which == 0) + return rendezvous_irregular(n,inbuf,insize,inorder,procs,callback, + outorder,outbuf,outsize,ptr,statflag); + else + return rendezvous_all2all(n,inbuf,insize,inorder,procs,callback, + outorder,outbuf,outsize,ptr,statflag); +} + +/* ---------------------------------------------------------------------- */ + +int Comm:: +rendezvous_irregular(int n, char *inbuf, int insize, int inorder, int *procs, + int (*callback)(int, char *, int &, int *&, char *&, void *), + int outorder, char *&outbuf, + int outsize, void *ptr, int statflag) +{ + // irregular comm of inbuf from caller decomp to rendezvous decomp + + Irregular *irregular = new Irregular(lmp); + + int nrvous; + if (inorder) nrvous = irregular->create_data_grouped(n,procs); + else nrvous = irregular->create_data(n,procs); + + char *inbuf_rvous = (char *) memory->smalloc((bigint) nrvous*insize, + "rendezvous:inbuf"); + irregular->exchange_data(inbuf,insize,inbuf_rvous); + + bigint irregular1_bytes = irregular->memory_usage(); + irregular->destroy_data(); + delete irregular; + + // peform rendezvous computation via callback() + // callback() allocates/populates proclist_rvous and outbuf_rvous + + int flag; + int *procs_rvous; + char *outbuf_rvous; + int nrvous_out = callback(nrvous,inbuf_rvous,flag, + procs_rvous,outbuf_rvous,ptr); + + if (flag != 1) memory->sfree(inbuf_rvous); // outbuf_rvous = inbuf_vous + if (flag == 0) { + if (statflag) rendezvous_stats(n,0,nrvous,nrvous_out,insize,outsize, + (bigint) nrvous_out*sizeof(int) + + irregular1_bytes); + return 0; // all nout_rvous are 0, no 2nd comm stage + } + + // irregular comm of outbuf from rendezvous decomp back to caller decomp + // caller will free outbuf + + irregular = new Irregular(lmp); + + int nout; + if (outorder) + nout = irregular->create_data_grouped(nrvous_out,procs_rvous); + else nout = irregular->create_data(nrvous_out,procs_rvous); + + outbuf = (char *) memory->smalloc((bigint) nout*outsize, + "rendezvous:outbuf"); + irregular->exchange_data(outbuf_rvous,outsize,outbuf); + + bigint irregular2_bytes = irregular->memory_usage(); + irregular->destroy_data(); + delete irregular; + + memory->destroy(procs_rvous); + memory->sfree(outbuf_rvous); + + // return number of output datums + // last arg to stats() = memory for procs_rvous + irregular comm + + if (statflag) rendezvous_stats(n,nout,nrvous,nrvous_out,insize,outsize, + (bigint) nrvous_out*sizeof(int) + + MAX(irregular1_bytes,irregular2_bytes)); + return nout; +} + +/* ---------------------------------------------------------------------- */ + +int Comm:: +rendezvous_all2all(int n, char *inbuf, int insize, int inorder, int *procs, + int (*callback)(int, char *, int &, int *&, char *&, void *), + int outorder, char *&outbuf, int outsize, void *ptr, + int statflag) +{ + int iproc; + bigint all2all1_bytes,all2all2_bytes; + int *sendcount,*sdispls,*recvcount,*rdispls; + int *procs_a2a; + bigint *offsets; + char *inbuf_a2a,*outbuf_a2a; + + // create procs and inbuf for All2all if necesary + + if (!inorder) { + memory->create(procs_a2a,nprocs,"rendezvous:procs"); + inbuf_a2a = (char *) memory->smalloc((bigint) n*insize, + "rendezvous:inbuf"); + memory->create(offsets,nprocs,"rendezvous:offsets"); + + for (int i = 0; i < nprocs; i++) procs_a2a[i] = 0; + for (int i = 0; i < n; i++) procs_a2a[procs[i]]++; + + offsets[0] = 0; + for (int i = 1; i < nprocs; i++) + offsets[i] = offsets[i-1] + insize*procs_a2a[i-1]; + + bigint offset = 0; + for (int i = 0; i < n; i++) { + iproc = procs[i]; + memcpy(&inbuf_a2a[offsets[iproc]],&inbuf[offset],insize); + offsets[iproc] += insize; + offset += insize; + } + + all2all1_bytes = nprocs*sizeof(int) + nprocs*sizeof(bigint) + n*insize; + + } else { + procs_a2a = procs; + inbuf_a2a = inbuf; + all2all1_bytes = 0; + } + + // create args for MPI_Alltoallv() on input data + + memory->create(sendcount,nprocs,"rendezvous:sendcount"); + memcpy(sendcount,procs_a2a,nprocs*sizeof(int)); + + memory->create(recvcount,nprocs,"rendezvous:recvcount"); + MPI_Alltoall(sendcount,1,MPI_INT,recvcount,1,MPI_INT,world); + + memory->create(sdispls,nprocs,"rendezvous:sdispls"); + memory->create(rdispls,nprocs,"rendezvous:rdispls"); + sdispls[0] = rdispls[0] = 0; + for (int i = 1; i < nprocs; i++) { + sdispls[i] = sdispls[i-1] + sendcount[i-1]; + rdispls[i] = rdispls[i-1] + recvcount[i-1]; + } + int nrvous = rdispls[nprocs-1] + recvcount[nprocs-1]; + + // test for overflow of input data due to imbalance or insize + // means that individual sdispls or rdispls values overflow + + int overflow = 0; + if ((bigint) n*insize > MAXSMALLINT) overflow = 1; + if ((bigint) nrvous*insize > MAXSMALLINT) overflow = 1; + int overflowall; + MPI_Allreduce(&overflow,&overflowall,1,MPI_INT,MPI_MAX,world); + if (overflowall) error->all(FLERR,"Overflow input size in rendezvous_a2a"); + + for (int i = 0; i < nprocs; i++) { + sendcount[i] *= insize; + sdispls[i] *= insize; + recvcount[i] *= insize; + rdispls[i] *= insize; + } + + // all2all comm of inbuf from caller decomp to rendezvous decomp + + char *inbuf_rvous = (char *) memory->smalloc((bigint) nrvous*insize, + "rendezvous:inbuf"); + + MPI_Alltoallv(inbuf_a2a,sendcount,sdispls,MPI_CHAR, + inbuf_rvous,recvcount,rdispls,MPI_CHAR,world); + + if (!inorder) { + memory->destroy(procs_a2a); + memory->sfree(inbuf_a2a); + memory->destroy(offsets); + } + + // peform rendezvous computation via callback() + // callback() allocates/populates proclist_rvous and outbuf_rvous + + int flag; + int *procs_rvous; + char *outbuf_rvous; + + int nrvous_out = callback(nrvous,inbuf_rvous,flag, + procs_rvous,outbuf_rvous,ptr); + + if (flag != 1) memory->sfree(inbuf_rvous); // outbuf_rvous = inbuf_vous + if (flag == 0) { + memory->destroy(sendcount); + memory->destroy(recvcount); + memory->destroy(sdispls); + memory->destroy(rdispls); + if (statflag) rendezvous_stats(n,0,nrvous,nrvous_out,insize,outsize, + (bigint) nrvous_out*sizeof(int) + + 4*nprocs*sizeof(int) + all2all1_bytes); + return 0; // all nout_rvous are 0, no 2nd irregular + } + + + + + + + // create procs and outbuf for All2all if necesary + + if (!outorder) { + memory->create(procs_a2a,nprocs,"rendezvous_a2a:procs"); + + outbuf_a2a = (char *) memory->smalloc((bigint) nrvous_out*outsize, + "rendezvous:outbuf"); + memory->create(offsets,nprocs,"rendezvous:offsets"); + + for (int i = 0; i < nprocs; i++) procs_a2a[i] = 0; + for (int i = 0; i < nrvous_out; i++) procs_a2a[procs_rvous[i]]++; + + offsets[0] = 0; + for (int i = 1; i < nprocs; i++) + offsets[i] = offsets[i-1] + outsize*procs_a2a[i-1]; + + bigint offset = 0; + for (int i = 0; i < nrvous_out; i++) { + iproc = procs_rvous[i]; + memcpy(&outbuf_a2a[offsets[iproc]],&outbuf_rvous[offset],outsize); + offsets[iproc] += outsize; + offset += outsize; + } + + all2all2_bytes = nprocs*sizeof(int) + nprocs*sizeof(bigint) + + nrvous_out*outsize; + + } else { + procs_a2a = procs_rvous; + outbuf_a2a = outbuf_rvous; + all2all2_bytes = 0; + } + + // comm outbuf from rendezvous decomposition back to caller + + memcpy(sendcount,procs_a2a,nprocs*sizeof(int)); + + MPI_Alltoall(sendcount,1,MPI_INT,recvcount,1,MPI_INT,world); + + sdispls[0] = rdispls[0] = 0; + for (int i = 1; i < nprocs; i++) { + sdispls[i] = sdispls[i-1] + sendcount[i-1]; + rdispls[i] = rdispls[i-1] + recvcount[i-1]; + } + int nout = rdispls[nprocs-1] + recvcount[nprocs-1]; + + // test for overflow of outbuf due to imbalance or outsize + // means that individual sdispls or rdispls values overflow + + overflow = 0; + if ((bigint) nrvous*outsize > MAXSMALLINT) overflow = 1; + if ((bigint) nout*outsize > MAXSMALLINT) overflow = 1; + MPI_Allreduce(&overflow,&overflowall,1,MPI_INT,MPI_MAX,world); + if (overflowall) error->all(FLERR,"Overflow output in rendezvous_a2a"); + + for (int i = 0; i < nprocs; i++) { + sendcount[i] *= outsize; + sdispls[i] *= outsize; + recvcount[i] *= outsize; + rdispls[i] *= outsize; + } + + // all2all comm of outbuf from rendezvous decomp back to caller decomp + // caller will free outbuf + + outbuf = (char *) memory->smalloc((bigint) nout*outsize,"rendezvous:outbuf"); + + MPI_Alltoallv(outbuf_a2a,sendcount,sdispls,MPI_CHAR, + outbuf,recvcount,rdispls,MPI_CHAR,world); + + memory->destroy(procs_rvous); + memory->sfree(outbuf_rvous); + + if (!outorder) { + memory->destroy(procs_a2a); + memory->sfree(outbuf_a2a); + memory->destroy(offsets); + } + + // clean up + + memory->destroy(sendcount); + memory->destroy(recvcount); + memory->destroy(sdispls); + memory->destroy(rdispls); + + // return number of output datums + // last arg to stats() = mem for procs_rvous + per-proc vecs + reordering ops + + if (statflag) rendezvous_stats(n,nout,nrvous,nrvous_out,insize,outsize, + (bigint) nrvous_out*sizeof(int) + + 4*nprocs*sizeof(int) + + MAX(all2all1_bytes,all2all2_bytes)); + return nout; +} + +/* ---------------------------------------------------------------------- + print balance and memory info for rendezvous operation + useful for debugging +------------------------------------------------------------------------- */ + +void Comm::rendezvous_stats(int n, int nout, int nrvous, int nrvous_out, + int insize, int outsize, bigint commsize) +{ + bigint size_in_all,size_in_max,size_in_min; + bigint size_out_all,size_out_max,size_out_min; + bigint size_inrvous_all,size_inrvous_max,size_inrvous_min; + bigint size_outrvous_all,size_outrvous_max,size_outrvous_min; + bigint size_comm_all,size_comm_max,size_comm_min; + + bigint size = (bigint) n*insize; + MPI_Allreduce(&size,&size_in_all,1,MPI_LMP_BIGINT,MPI_SUM,world); + MPI_Allreduce(&size,&size_in_max,1,MPI_LMP_BIGINT,MPI_MAX,world); + MPI_Allreduce(&size,&size_in_min,1,MPI_LMP_BIGINT,MPI_MIN,world); + + size = (bigint) nout*outsize; + MPI_Allreduce(&size,&size_out_all,1,MPI_LMP_BIGINT,MPI_SUM,world); + MPI_Allreduce(&size,&size_out_max,1,MPI_LMP_BIGINT,MPI_MAX,world); + MPI_Allreduce(&size,&size_out_min,1,MPI_LMP_BIGINT,MPI_MIN,world); + + size = (bigint) nrvous*insize; + MPI_Allreduce(&size,&size_inrvous_all,1,MPI_LMP_BIGINT,MPI_SUM,world); + MPI_Allreduce(&size,&size_inrvous_max,1,MPI_LMP_BIGINT,MPI_MAX,world); + MPI_Allreduce(&size,&size_inrvous_min,1,MPI_LMP_BIGINT,MPI_MIN,world); + + size = (bigint) nrvous_out*insize; + MPI_Allreduce(&size,&size_outrvous_all,1,MPI_LMP_BIGINT,MPI_SUM,world); + MPI_Allreduce(&size,&size_outrvous_max,1,MPI_LMP_BIGINT,MPI_MAX,world); + MPI_Allreduce(&size,&size_outrvous_min,1,MPI_LMP_BIGINT,MPI_MIN,world); + + size = commsize; + MPI_Allreduce(&size,&size_comm_all,1,MPI_LMP_BIGINT,MPI_SUM,world); + MPI_Allreduce(&size,&size_comm_max,1,MPI_LMP_BIGINT,MPI_MAX,world); + MPI_Allreduce(&size,&size_comm_min,1,MPI_LMP_BIGINT,MPI_MIN,world); + + int mbytes = 1024*1024; + + if (me == 0) { + if (screen) { + fprintf(screen,"Rendezvous balance and memory info: (tot,ave,max,min) \n"); + fprintf(screen," input datum count: " + BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n", + size_in_all/insize,1.0*size_in_all/nprocs/insize, + size_in_max/insize,size_in_min/insize); + fprintf(screen," input data (MB): %g %g %g %g\n", + 1.0*size_in_all/mbytes,1.0*size_in_all/nprocs/mbytes, + 1.0*size_in_max/mbytes,1.0*size_in_min/mbytes); + if (outsize) + fprintf(screen," output datum count: " + BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n", + size_out_all/outsize,1.0*size_out_all/nprocs/outsize, + size_out_max/outsize,size_out_min/outsize); + else + fprintf(screen," output datum count: " + BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n", + 0,0.0,0,0); + fprintf(screen," output data (MB): %g %g %g %g\n", + 1.0*size_out_all/mbytes,1.0*size_out_all/nprocs/mbytes, + 1.0*size_out_max/mbytes,1.0*size_out_min/mbytes); + fprintf(screen," input rvous datum count: " + BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n", + size_inrvous_all/insize,1.0*size_inrvous_all/nprocs/insize, + size_inrvous_max/insize,size_inrvous_min/insize); + fprintf(screen," input rvous data (MB): %g %g %g %g\n", + 1.0*size_inrvous_all/mbytes,1.0*size_inrvous_all/nprocs/mbytes, + 1.0*size_inrvous_max/mbytes,1.0*size_inrvous_min/mbytes); + if (outsize) + fprintf(screen," output rvous datum count: " + BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n", + size_outrvous_all/outsize,1.0*size_outrvous_all/nprocs/outsize, + size_outrvous_max/outsize,size_outrvous_min/outsize); + else + fprintf(screen," output rvous datum count: " + BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n", + 0,0.0,0,0); + fprintf(screen," output rvous data (MB): %g %g %g %g\n", + 1.0*size_outrvous_all/mbytes,1.0*size_outrvous_all/nprocs/mbytes, + 1.0*size_outrvous_max/mbytes,1.0*size_outrvous_min/mbytes); + fprintf(screen," rvous comm (MB): %g %g %g %g\n", + 1.0*size_comm_all/mbytes,1.0*size_comm_all/nprocs/mbytes, + 1.0*size_comm_max/mbytes,1.0*size_comm_min/mbytes); + } + } +} + /* ---------------------------------------------------------------------- proc 0 reads Nlines from file into buf and bcasts buf to all procs caller allocates buf to max size needed diff --git a/src/comm.h b/src/comm.h index 2579f9b283..9c0112b4c4 100644 --- a/src/comm.h +++ b/src/comm.h @@ -109,6 +109,10 @@ class Comm : protected Pointers { void ring(int, int, void *, int, void (*)(int, char *, void *), void *, void *, int self = 1); + int rendezvous(int, int, char *, int, int, int *, + int (*)(int, char *, int &, int *&, char *&, void *), + int, char *&, int, void *, int statflag=0); + int read_lines_from_file(FILE *, int, int, char *); int read_lines_from_file_universe(FILE *, int, int, char *); @@ -142,6 +146,15 @@ class Comm : protected Pointers { int ncores; // # of cores per node int coregrid[3]; // 3d grid of cores within a node int user_coregrid[3]; // user request for cores in each dim + + int rendezvous_irregular(int, char *, int, int, int *, + int (*)(int, char *, int &, int *&, char *&, void *), + int, char *&, int, void *, int); + int rendezvous_all2all(int, char *, int, int, int *, + int (*)(int, char *, int &, int *&, char *&, void *), + int, char *&, int, void *, int); + void rendezvous_stats(int, int, int, int, int, int, bigint); + public: enum{MULTIPLE}; }; diff --git a/src/create_atoms.cpp b/src/create_atoms.cpp index cb0da42970..52e4256fca 100644 --- a/src/create_atoms.cpp +++ b/src/create_atoms.cpp @@ -514,9 +514,6 @@ void CreateAtoms::command(int narg, char **arg) if (domain->triclinic) domain->lamda2x(atom->nlocal); } - MPI_Barrier(world); - double time2 = MPI_Wtime(); - // clean up delete ranmol; @@ -526,21 +523,6 @@ void CreateAtoms::command(int narg, char **arg) delete [] ystr; delete [] zstr; - // print status - - if (comm->me == 0) { - if (screen) { - fprintf(screen,"Created " BIGINT_FORMAT " atoms\n", - atom->natoms-natoms_previous); - fprintf(screen," Time spent = %g secs\n",time2-time1); - } - if (logfile) { - fprintf(logfile,"Created " BIGINT_FORMAT " atoms\n", - atom->natoms-natoms_previous); - fprintf(logfile," Time spent = %g secs\n",time2-time1); - } - } - // for MOLECULE mode: // create special bond lists for molecular systems, // but not for atom style template @@ -550,6 +532,25 @@ void CreateAtoms::command(int narg, char **arg) if (atom->molecular == 1 && onemol->bondflag && !onemol->specialflag) { Special special(lmp); special.build(); + + } + } + + // print status + + MPI_Barrier(world); + double time2 = MPI_Wtime(); + + if (comm->me == 0) { + if (screen) { + fprintf(screen,"Created " BIGINT_FORMAT " atoms\n", + atom->natoms-natoms_previous); + fprintf(screen," create_atoms CPU = %g secs\n",time2-time1); + } + if (logfile) { + fprintf(logfile,"Created " BIGINT_FORMAT " atoms\n", + atom->natoms-natoms_previous); + fprintf(logfile," create_atoms CPU = %g secs\n",time2-time1); } } } diff --git a/src/hashlittle.cpp b/src/hashlittle.cpp new file mode 100644 index 0000000000..f1d4e61142 --- /dev/null +++ b/src/hashlittle.cpp @@ -0,0 +1,349 @@ +// Hash function hashlittle() +// from lookup3.c, by Bob Jenkins, May 2006, Public Domain +// bob_jenkins@burtleburtle.net + +#include +#include +#include + +// if the system defines the __BYTE_ORDER__ define, +// we use it instead of guessing the platform + +#if defined(__BYTE_ORDER__) +# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define HASH_LITTLE_ENDIAN 1 +# else +# define HASH_LITTLE_ENDIAN 0 +# endif +#else // heuristic platform guess +# if defined(__bg__) +# define HASH_LITTLE_ENDIAN 0 // IBM BlueGene is big endian +# else +# define HASH_LITTLE_ENDIAN 1 // Intel and AMD x86 are little endian +# endif +#endif + +#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) + +/* +------------------------------------------------------------------------------- +mix -- mix 3 32-bit values reversibly. + +This is reversible, so any information in (a,b,c) before mix() is +still in (a,b,c) after mix(). + +If four pairs of (a,b,c) inputs are run through mix(), or through +mix() in reverse, there are at least 32 bits of the output that +are sometimes the same for one pair and different for another pair. +This was tested for: +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that +satisfy this are + 4 6 8 16 19 4 + 9 15 3 18 27 15 + 14 9 3 7 17 3 +Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing +for "differ" defined as + with a one-bit base and a two-bit delta. I +used http://burtleburtle.net/bob/hash/avalanche.html to choose +the operations, constants, and arrangements of the variables. + +This does not achieve avalanche. There are input bits of (a,b,c) +that fail to affect some output bits of (a,b,c), especially of a. The +most thoroughly mixed value is c, but it doesn't really even achieve +avalanche in c. + +This allows some parallelism. Read-after-writes are good at doubling +the number of bits affected, so the goal of mixing pulls in the opposite +direction as the goal of parallelism. I did what I could. Rotates +seem to cost as much as shifts on every machine I could lay my hands +on, and rotates are much kinder to the top and bottom bits, so I used +rotates. +------------------------------------------------------------------------------- +*/ +#define mix(a,b,c) \ +{ \ + a -= c; a ^= rot(c, 4); c += b; \ + b -= a; b ^= rot(a, 6); a += c; \ + c -= b; c ^= rot(b, 8); b += a; \ + a -= c; a ^= rot(c,16); c += b; \ + b -= a; b ^= rot(a,19); a += c; \ + c -= b; c ^= rot(b, 4); b += a; \ +} + +/* +------------------------------------------------------------------------------- +final -- final mixing of 3 32-bit values (a,b,c) into c + +Pairs of (a,b,c) values differing in only a few bits will usually +produce values of c that look totally different. This was tested for +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +These constants passed: + 14 11 25 16 4 14 24 + 12 14 25 16 4 14 24 +and these came close: + 4 8 15 26 3 22 24 + 10 8 15 26 3 22 24 + 11 8 15 26 3 22 24 +------------------------------------------------------------------------------- +*/ +#define final(a,b,c) \ +{ \ + c ^= b; c -= rot(b,14); \ + a ^= c; a -= rot(c,11); \ + b ^= a; b -= rot(a,25); \ + c ^= b; c -= rot(b,16); \ + a ^= c; a -= rot(c,4); \ + b ^= a; b -= rot(a,14); \ + c ^= b; c -= rot(b,24); \ +} + +/* +------------------------------------------------------------------------------- +hashlittle() -- hash a variable-length key into a 32-bit value + k : the key (the unaligned variable-length array of bytes) + length : the length of the key, counting by bytes + initval : can be any 4-byte value +Returns a 32-bit value. Every bit of the key affects every bit of +the return value. Two keys differing by one or two bits will have +totally different hash values. + +The best hash table sizes are powers of 2. There is no need to do +mod a prime (mod is sooo slow!). If you need less than 32 bits, +use a bitmask. For example, if you need only 10 bits, do + h = (h & hashmask(10)); +In which case, the hash table should have hashsize(10) elements. + +If you are hashing n strings (uint8_t **)k, do it like this: + for (i=0, h=0; i 12) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 12; + k += 3; + } + + /*----------------------------- handle the last (probably partial) block */ + /* + * "k[2]&0xffffff" actually reads beyond the end of the string, but + * then masks off the part it's not allowed to read. Because the + * string is aligned, the masked-off tail is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But VALGRIND will + * still catch it and complain. The masking trick does make the hash + * noticably faster for short strings (like English words). + */ +#ifndef VALGRIND + + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; + case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; + case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=k[1]&0xffffff; a+=k[0]; break; + case 6 : b+=k[1]&0xffff; a+=k[0]; break; + case 5 : b+=k[1]&0xff; a+=k[0]; break; + case 4 : a+=k[0]; break; + case 3 : a+=k[0]&0xffffff; break; + case 2 : a+=k[0]&0xffff; break; + case 1 : a+=k[0]&0xff; break; + case 0 : return c; /* zero length strings require no mixing */ + } + +#else /* make valgrind happy */ + + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]; break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ + case 1 : a+=k8[0]; break; + case 0 : return c; + } + +#endif /* !valgrind */ + + } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { + const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ + const uint8_t *k8; + + /*--------------- all but last block: aligned reads and different mixing */ + while (length > 12) + { + a += k[0] + (((uint32_t)k[1])<<16); + b += k[2] + (((uint32_t)k[3])<<16); + c += k[4] + (((uint32_t)k[5])<<16); + mix(a,b,c); + length -= 12; + k += 6; + } + + /*----------------------------- handle the last (probably partial) block */ + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[4]+(((uint32_t)k[5])<<16); + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=k[4]; + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=k[2]; + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=k[0]; + break; + case 1 : a+=k8[0]; + break; + case 0 : return c; /* zero length requires no mixing */ + } + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = (const uint8_t *)key; + + /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + a += ((uint32_t)k[1])<<8; + a += ((uint32_t)k[2])<<16; + a += ((uint32_t)k[3])<<24; + b += k[4]; + b += ((uint32_t)k[5])<<8; + b += ((uint32_t)k[6])<<16; + b += ((uint32_t)k[7])<<24; + c += k[8]; + c += ((uint32_t)k[9])<<8; + c += ((uint32_t)k[10])<<16; + c += ((uint32_t)k[11])<<24; + mix(a,b,c); + length -= 12; + k += 12; + } + + /*-------------------------------- last block: affect all 32 bits of (c) */ + switch(length) /* all the case statements fall through */ + { + case 12: c+=((uint32_t)k[11])<<24; + case 11: c+=((uint32_t)k[10])<<16; + case 10: c+=((uint32_t)k[9])<<8; + case 9 : c+=k[8]; + case 8 : b+=((uint32_t)k[7])<<24; + case 7 : b+=((uint32_t)k[6])<<16; + case 6 : b+=((uint32_t)k[5])<<8; + case 5 : b+=k[4]; + case 4 : a+=((uint32_t)k[3])<<24; + case 3 : a+=((uint32_t)k[2])<<16; + case 2 : a+=((uint32_t)k[1])<<8; + case 1 : a+=k[0]; + break; + case 0 : return c; + } + } + + final(a,b,c); + return c; + +#else /* PURIFY_HATES_HASHLITTLE */ +/* I don't know what it is about Jenkins' hashlittle function, but + * it drives purify insane, even with VALGRIND defined. It makes + * purify unusable!! The code execution doesn't even make sense. + * Below is a (probably) weaker hash function that at least allows + * testing with purify. + */ +#define MAXINT_DIV_PHI 11400714819323198485U + + uint32_t h, rest, *p, bytes, num_bytes; + char *byteptr; + + num_bytes = length; + + /* First hash the uint32_t-sized portions of the key */ + h = 0; + for (p = (uint32_t *)key, bytes=num_bytes; + bytes >= (uint32_t) sizeof(uint32_t); + bytes-=sizeof(uint32_t), p++){ + h = (h^(*p))*MAXINT_DIV_PHI; + } + + /* Then take care of the remaining bytes, if any */ + rest = 0; + for (byteptr = (char *)p; bytes > 0; bytes--, byteptr++){ + rest = (rest<<8) | (*byteptr); + } + + /* If extra bytes, merge the two parts */ + if (rest) + h = (h^rest)*MAXINT_DIV_PHI; + + return h; +#endif /* PURIFY_HATES_HASHLITTLE */ +} diff --git a/src/hashlittle.h b/src/hashlittle.h new file mode 100644 index 0000000000..7b57a35c80 --- /dev/null +++ b/src/hashlittle.h @@ -0,0 +1,5 @@ +// Hash function hashlittle() +// from lookup3.c, by Bob Jenkins, May 2006, Public Domain +// bob_jenkins@burtleburtle.net + +uint32_t hashlittle(const void *key, size_t length, uint32_t); diff --git a/src/irregular.cpp b/src/irregular.cpp index 9c15f135d0..c27a8c8e18 100644 --- a/src/irregular.cpp +++ b/src/irregular.cpp @@ -501,7 +501,8 @@ int compare_standalone(const int i, const int j, void *ptr) void Irregular::exchange_atom(double *sendbuf, int *sizes, double *recvbuf) { - int i,m,n,offset,count; + int i,m,n,count; + bigint offset; // post all receives @@ -621,6 +622,7 @@ int Irregular::create_data(int n, int *proclist, int sortflag) num_send = new int[nsend_proc]; index_send = new int[n-work1[me]]; index_self = new int[work1[me]]; + maxindex = n; // proc_send = procs I send to // num_send = # of datums I send to each proc @@ -678,8 +680,182 @@ int Irregular::create_data(int n, int *proclist, int sortflag) // receive incoming messages // proc_recv = procs I recv from - // num_recv = total size of message each proc sends me - // nrecvdatum = total size of data I recv + // num_recv = # of datums each proc sends me + // nrecvdatum = total # of datums I recv + + int nrecvdatum = 0; + for (i = 0; i < nrecv_proc; i++) { + MPI_Recv(&num_recv[i],1,MPI_INT,MPI_ANY_SOURCE,0,world,status); + proc_recv[i] = status->MPI_SOURCE; + nrecvdatum += num_recv[i]; + } + nrecvdatum += num_self; + + // sort proc_recv and num_recv by proc ID if requested + // useful for debugging to insure reproducible ordering of received datums + + if (sortflag) { + int *order = new int[nrecv_proc]; + int *proc_recv_ordered = new int[nrecv_proc]; + int *num_recv_ordered = new int[nrecv_proc]; + + for (i = 0; i < nrecv_proc; i++) order[i] = i; + +#if defined(LMP_QSORT) + proc_recv_copy = proc_recv; + qsort(order,nrecv_proc,sizeof(int),compare_standalone); +#else + merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone); +#endif + + int j; + for (i = 0; i < nrecv_proc; i++) { + j = order[i]; + proc_recv_ordered[i] = proc_recv[j]; + num_recv_ordered[i] = num_recv[j]; + } + + memcpy(proc_recv,proc_recv_ordered,nrecv_proc*sizeof(int)); + memcpy(num_recv,num_recv_ordered,nrecv_proc*sizeof(int)); + delete [] order; + delete [] proc_recv_ordered; + delete [] num_recv_ordered; + } + + // barrier to insure all MPI_ANY_SOURCE messages are received + // else another proc could proceed to exchange_data() and send to me + + MPI_Barrier(world); + + // return # of datums I will receive + + return nrecvdatum; +} + +/* ---------------------------------------------------------------------- + create communication plan based on list of datums of uniform size + n = # of datums to send + procs = how many datums to send to each proc, must include self + sort = flag for sorting order of received messages by proc ID + return total # of datums I will recv, including any to self +------------------------------------------------------------------------- */ + +int Irregular::create_data_grouped(int n, int *procs, int sortflag) +{ + int i,j,k,m; + + // setup for collective comm + // work1 = # of datums I send to each proc, set self to 0 + // work2 = 1 for all procs, used for ReduceScatter + + for (i = 0; i < nprocs; i++) { + work1[i] = procs[i]; + work2[i] = 1; + } + work1[me] = 0; + + // nrecv_proc = # of procs I receive messages from, not including self + // options for performing ReduceScatter operation + // some are more efficient on some machines at big sizes + +#ifdef LAMMPS_RS_ALLREDUCE_INPLACE + MPI_Allreduce(MPI_IN_PLACE,work1,nprocs,MPI_INT,MPI_SUM,world); + nrecv_proc = work1[me]; +#else +#ifdef LAMMPS_RS_ALLREDUCE + MPI_Allreduce(work1,work2,nprocs,MPI_INT,MPI_SUM,world); + nrecv_proc = work2[me]; +#else + MPI_Reduce_scatter(work1,&nrecv_proc,work2,MPI_INT,MPI_SUM,world); +#endif +#endif + + // allocate receive arrays + + proc_recv = new int[nrecv_proc]; + num_recv = new int[nrecv_proc]; + request = new MPI_Request[nrecv_proc]; + status = new MPI_Status[nrecv_proc]; + + // work1 = # of datums I send to each proc, including self + // nsend_proc = # of procs I send messages to, not including self + + for (i = 0; i < nprocs; i++) work1[i] = procs[i]; + + nsend_proc = 0; + for (i = 0; i < nprocs; i++) + if (work1[i]) nsend_proc++; + if (work1[me]) nsend_proc--; + + // allocate send and self arrays + + proc_send = new int[nsend_proc]; + num_send = new int[nsend_proc]; + index_send = new int[n-work1[me]]; + index_self = new int[work1[me]]; + maxindex = n; + + // proc_send = procs I send to + // num_send = # of datums I send to each proc + // num_self = # of datums I copy to self + // to balance pattern of send messages: + // each proc begins with iproc > me, continues until iproc = me + // reset work1 to store which send message each proc corresponds to + + int iproc = me; + int isend = 0; + for (i = 0; i < nprocs; i++) { + iproc++; + if (iproc == nprocs) iproc = 0; + if (iproc == me) { + num_self = work1[iproc]; + work1[iproc] = 0; + } else if (work1[iproc] > 0) { + proc_send[isend] = iproc; + num_send[isend] = work1[iproc]; + work1[iproc] = isend; + isend++; + } + } + + // work2 = offsets into index_send for each proc I send to + // m = ptr into index_self + // index_send = list of which datums to send to each proc + // 1st N1 values are datum indices for 1st proc, + // next N2 values are datum indices for 2nd proc, etc + // index_self = list of which datums to copy to self + + work2[0] = 0; + for (i = 1; i < nsend_proc; i++) work2[i] = work2[i-1] + num_send[i-1]; + + m = 0; + i = 0; + for (iproc = 0; iproc < nprocs; iproc++) { + k = procs[iproc]; + for (j = 0; j < k; j++) { + if (iproc == me) index_self[m++] = i++; + else { + isend = work1[iproc]; + index_send[work2[isend]++] = i++; + } + } + } + + // tell receivers how much data I send + // sendmax_proc = largest # of datums I send in a single message + + sendmax_proc = 0; + for (i = 0; i < nsend_proc; i++) { + MPI_Request tmpReq; // Use non-blocking send to avoid possible deadlock + MPI_Isend(&num_send[i],1,MPI_INT,proc_send[i],0,world,&tmpReq); + MPI_Request_free(&tmpReq); // the MPI_Barrier below marks completion + sendmax_proc = MAX(sendmax_proc,num_send[i]); + } + + // receive incoming messages + // proc_recv = procs I recv from + // num_recv = # of datums each proc sends me + // nrecvdatum = total # of datums I recv int nrecvdatum = 0; for (i = 0; i < nrecv_proc; i++) { @@ -739,11 +915,13 @@ int Irregular::create_data(int n, int *proclist, int sortflag) void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf) { - int i,m,n,offset,count; + int i,n,count; + bigint m; // these 2 lines enable send/recv buf to be larger than 2 GB + char *dest; // post all receives, starting after self copies - offset = num_self*nbytes; + bigint offset = num_self*nbytes; for (int irecv = 0; irecv < nrecv_proc; irecv++) { MPI_Irecv(&recvbuf[offset],num_recv[irecv]*nbytes,MPI_CHAR, proc_recv[irecv],0,world,&request[irecv]); @@ -765,23 +943,34 @@ void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf) n = 0; for (int isend = 0; isend < nsend_proc; isend++) { count = num_send[isend]; + dest = buf; for (i = 0; i < count; i++) { m = index_send[n++]; - memcpy(&buf[i*nbytes],&sendbuf[m*nbytes],nbytes); + memcpy(dest,&sendbuf[m*nbytes],nbytes); + dest += nbytes; } MPI_Send(buf,count*nbytes,MPI_CHAR,proc_send[isend],0,world); } // copy datums to self, put at beginning of recvbuf + dest = recvbuf; for (i = 0; i < num_self; i++) { m = index_self[i]; - memcpy(&recvbuf[i*nbytes],&sendbuf[m*nbytes],nbytes); + memcpy(dest,&sendbuf[m*nbytes],nbytes); + dest += nbytes; } // wait on all incoming messages if (nrecv_proc) MPI_Waitall(nrecv_proc,request,status); + + // approximate memory tally + // DEBUG lines + + //bigint irregular_bytes = 2*nprocs*sizeof(int); + //irregular_bytes += maxindex*sizeof(int); + //irregular_bytes += maxbuf; } /* ---------------------------------------------------------------------- diff --git a/src/irregular.h b/src/irregular.h index 1f74fe801b..d56bcb253d 100644 --- a/src/irregular.h +++ b/src/irregular.h @@ -33,6 +33,7 @@ class Irregular : protected Pointers { int *procassign = NULL); int migrate_check(); int create_data(int, int *, int sortflag = 0); + int create_data_grouped(int, int *, int sortflag = 0); void exchange_data(char *, int, char *); void destroy_data(); bigint memory_usage(); @@ -48,6 +49,7 @@ class Irregular : protected Pointers { double *dbuf; // double buf for largest single atom send int maxbuf; // size of char buf in bytes char *buf; // char buf for largest single data send + int maxindex; // combined size of index_send + index_self int *mproclist,*msizes; // persistent vectors in migrate_atoms int maxlocal; // allocated size of mproclist and msizes diff --git a/src/read_data.cpp b/src/read_data.cpp index 2f4484010b..e2efbaaefa 100644 --- a/src/read_data.cpp +++ b/src/read_data.cpp @@ -120,6 +120,9 @@ void ReadData::command(int narg, char **arg) { if (narg < 1) error->all(FLERR,"Illegal read_data command"); + MPI_Barrier(world); + double time1 = MPI_Wtime(); + // optional args addflag = NONE; @@ -906,6 +909,18 @@ void ReadData::command(int narg, char **arg) force->kspace = saved_kspace; } + + // total time + + MPI_Barrier(world); + double time2 = MPI_Wtime(); + + if (comm->me == 0) { + if (screen) + fprintf(screen," read_data CPU = %g secs\n",time2-time1); + if (logfile) + fprintf(logfile," read_data CPU = %g secs\n",time2-time1); + } } /* ---------------------------------------------------------------------- diff --git a/src/read_restart.cpp b/src/read_restart.cpp index 73dc37f4cb..5aa4622a67 100644 --- a/src/read_restart.cpp +++ b/src/read_restart.cpp @@ -81,6 +81,9 @@ void ReadRestart::command(int narg, char **arg) if (domain->box_exist) error->all(FLERR,"Cannot read_restart after simulation box is defined"); + MPI_Barrier(world); + double time1 = MPI_Wtime(); + MPI_Comm_rank(world,&me); MPI_Comm_size(world,&nprocs); @@ -562,6 +565,18 @@ void ReadRestart::command(int narg, char **arg) Special special(lmp); special.build(); } + + // total time + + MPI_Barrier(world); + double time2 = MPI_Wtime(); + + if (comm->me == 0) { + if (screen) + fprintf(screen," read_restart CPU = %g secs\n",time2-time1); + if (logfile) + fprintf(logfile," read_restart CPU = %g secs\n",time2-time1); + } } /* ---------------------------------------------------------------------- diff --git a/src/replicate.cpp b/src/replicate.cpp index cdadf1fd1f..3c8f4a8aee 100644 --- a/src/replicate.cpp +++ b/src/replicate.cpp @@ -76,7 +76,7 @@ void Replicate::command(int narg, char **arg) if (atom->nextra_grow || atom->nextra_restart || atom->nextra_store) error->all(FLERR,"Cannot replicate with fixes that store atom quantities"); - // Record wall time for atom replication + // record wall time for atom replication MPI_Barrier(world); double time1 = MPI_Wtime(); @@ -762,15 +762,15 @@ void Replicate::command(int narg, char **arg) special.build(); } - // Wall time + // total time MPI_Barrier(world); double time2 = MPI_Wtime(); if (me == 0) { if (screen) - fprintf(screen," Time spent = %g secs\n",time2-time1); + fprintf(screen," replicate CPU = %g secs\n",time2-time1); if (logfile) - fprintf(logfile," Time spent = %g secs\n",time2-time1); + fprintf(logfile," replicate CPU = %g secs\n",time2-time1); } } diff --git a/src/special.cpp b/src/special.cpp index fccc930353..b9287df472 100644 --- a/src/special.cpp +++ b/src/special.cpp @@ -21,12 +21,14 @@ #include "modify.h" #include "fix.h" #include "accelerator_kokkos.h" +#include "atom_masks.h" #include "memory.h" #include "error.h" -#include "atom_masks.h" using namespace LAMMPS_NS; +#define RVOUS 1 // 0 for irregular, 1 for all2all + /* ---------------------------------------------------------------------- */ Special::Special(LAMMPS *lmp) : Pointers(lmp) @@ -54,18 +56,8 @@ Special::~Special() void Special::build() { - int i,j,k,size; - int max,maxall,nbuf; - tagint *buf; - MPI_Barrier(world); - - int nlocal = atom->nlocal; - - tagint *tag = atom->tag; - int *num_bond = atom->num_bond; - tagint **bond_atom = atom->bond_atom; - int **nspecial = atom->nspecial; + double time1 = MPI_Wtime(); if (me == 0 && screen) { const double * const special_lj = force->special_lj; @@ -79,224 +71,57 @@ void Special::build() // initialize nspecial counters to 0 - for (i = 0; i < nlocal; i++) { + int **nspecial = atom->nspecial; + int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; i++) { nspecial[i][0] = 0; nspecial[i][1] = 0; nspecial[i][2] = 0; } - // ----------------------------------------------------- - // compute nspecial[i][0] = # of 1-2 neighbors of atom i - // ----------------------------------------------------- + // setup atomIDs and procowner vectors in rendezvous decomposition - // bond partners stored by atom itself + atom_owners(); - for (i = 0; i < nlocal; i++) nspecial[i][0] = num_bond[i]; - - // if newton_bond off, then done - // else only counted 1/2 of all bonds, so count other half - - if (force->newton_bond) { - - // nbufmax = largest buffer needed to hold info from any proc - // info for each atom = global tag of 2nd atom in each bond - - nbuf = 0; - for (i = 0; i < nlocal; i++) nbuf += num_bond[i]; - memory->create(buf,nbuf,"special:buf"); - - // fill buffer with global tags of bond partners of my atoms - - size = 0; - for (i = 0; i < nlocal; i++) - for (j = 0; j < num_bond[i]; j++) - buf[size++] = bond_atom[i][j]; - - // cycle buffer around ring of procs back to self - // when receive buffer, scan tags for atoms I own - // when find one, increment nspecial count for that atom - - comm->ring(size,sizeof(tagint),buf,1,ring_one,NULL,(void *)this); - - memory->destroy(buf); - } - - // ---------------------------------------------------- + // tally nspecial[i][0] = # of 1-2 neighbors of atom i // create onetwo[i] = list of 1-2 neighbors for atom i - // ---------------------------------------------------- - max = 0; - for (i = 0; i < nlocal; i++) max = MAX(max,nspecial[i][0]); + if (force->newton_bond) onetwo_build_newton(); + else onetwo_build_newton_off(); - MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); + // print max # of 1-2 neighbors if (me == 0) { if (screen) fprintf(screen," %d = max # of 1-2 neighbors\n",maxall); if (logfile) fprintf(logfile," %d = max # of 1-2 neighbors\n",maxall); } - memory->create(onetwo,nlocal,maxall,"special:onetwo"); - - // count = accumulating counter - - memory->create(count,nlocal,"special:count"); - for (i = 0; i < nlocal; i++) count[i] = 0; - - // add bond partners stored by atom to onetwo list - - for (i = 0; i < nlocal; i++) - for (j = 0; j < num_bond[i]; j++) - onetwo[i][count[i]++] = bond_atom[i][j]; - - // if newton_bond off, then done - // else only stored 1/2 of all bonds, so store other half - - if (force->newton_bond) { - - // nbufmax = largest buffer needed to hold info from any proc - // info for each atom = 2 global tags in each bond - - nbuf = 0; - for (i = 0; i < nlocal; i++) nbuf += 2*num_bond[i]; - memory->create(buf,nbuf,"special:buf"); - - // fill buffer with global tags of both atoms in bond - - size = 0; - for (i = 0; i < nlocal; i++) - for (j = 0; j < num_bond[i]; j++) { - buf[size++] = tag[i]; - buf[size++] = bond_atom[i][j]; - } - - // cycle buffer around ring of procs back to self - // when receive buffer, scan 2nd-atom tags for atoms I own - // when find one, add 1st-atom tag to onetwo list for 2nd atom - - comm->ring(size,sizeof(tagint),buf,2,ring_two,NULL,(void *)this); - - memory->destroy(buf); - } - - memory->destroy(count); - - // ----------------------------------------------------- // done if special_bond weights for 1-3, 1-4 are set to 1.0 - // ----------------------------------------------------- if (force->special_lj[2] == 1.0 && force->special_coul[2] == 1.0 && force->special_lj[3] == 1.0 && force->special_coul[3] == 1.0) { dedup(); combine(); fix_alteration(); + memory->destroy(procowner); + memory->destroy(atomIDs); + timer_output(time1); return; } - // ----------------------------------------------------- - // compute nspecial[i][1] = # of 1-3 neighbors of atom i - // ----------------------------------------------------- - - // nbufmax = largest buffer needed to hold info from any proc - // info for each atom = 2 scalars + list of 1-2 neighbors - - nbuf = 0; - for (i = 0; i < nlocal; i++) nbuf += 2 + nspecial[i][0]; - memory->create(buf,nbuf,"special:buf"); - - // fill buffer with: - // (1) = counter for 1-3 neighbors, initialized to 0 - // (2) = # of 1-2 neighbors - // (3:N) = list of 1-2 neighbors - - size = 0; - for (i = 0; i < nlocal; i++) { - buf[size++] = 0; - buf[size++] = nspecial[i][0]; - for (j = 0; j < nspecial[i][0]; j++) buf[size++] = onetwo[i][j]; - } - - // cycle buffer around ring of procs back to self - // when receive buffer, scan list of 1-2 neighbors for atoms I own - // when find one, increment 1-3 count by # of 1-2 neighbors of my atom, - // subtracting one since my list will contain original atom - - comm->ring(size,sizeof(tagint),buf,3,ring_three,buf,(void *)this); - - // extract count from buffer that has cycled back to me - // nspecial[i][1] = # of 1-3 neighbors of atom i - - j = 0; - for (i = 0; i < nlocal; i++) { - nspecial[i][1] = buf[j]; - j += 2 + nspecial[i][0]; - } - - memory->destroy(buf); - - // ---------------------------------------------------- + // tally nspecial[i][1] = # of 1-3 neighbors of atom i // create onethree[i] = list of 1-3 neighbors for atom i - // ---------------------------------------------------- - max = 0; - for (i = 0; i < nlocal; i++) max = MAX(max,nspecial[i][1]); - MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); + onethree_build(); + + // print max # of 1-3 neighbors if (me == 0) { if (screen) fprintf(screen," %d = max # of 1-3 neighbors\n",maxall); if (logfile) fprintf(logfile," %d = max # of 1-3 neighbors\n",maxall); } - memory->create(onethree,nlocal,maxall,"special:onethree"); - - // nbufmax = largest buffer needed to hold info from any proc - // info for each atom = 4 scalars + list of 1-2 neighs + list of 1-3 neighs - - nbuf = 0; - for (i = 0; i < nlocal; i++) nbuf += 4 + nspecial[i][0] + nspecial[i][1]; - memory->create(buf,nbuf,"special:buf"); - - // fill buffer with: - // (1) = global tag of original atom - // (2) = # of 1-2 neighbors - // (3) = # of 1-3 neighbors - // (4) = counter for 1-3 neighbors, initialized to 0 - // (5:N) = list of 1-2 neighbors - // (N+1:2N) space for list of 1-3 neighbors - - size = 0; - for (i = 0; i < nlocal; i++) { - buf[size++] = tag[i]; - buf[size++] = nspecial[i][0]; - buf[size++] = nspecial[i][1]; - buf[size++] = 0; - for (j = 0; j < nspecial[i][0]; j++) buf[size++] = onetwo[i][j]; - size += nspecial[i][1]; - } - - // cycle buffer around ring of procs back to self - // when receive buffer, scan list of 1-2 neighbors for atoms I own - // when find one, add its neighbors to 1-3 list - // increment the count in buf(i+4) - // exclude the atom whose tag = original - // this process may include duplicates but they will be culled later - - comm->ring(size,sizeof(tagint),buf,4,ring_four,buf,(void *)this); - - // fill onethree with buffer values that have been returned to me - // sanity check: accumulated buf[i+3] count should equal - // nspecial[i][1] for each atom - - j = 0; - for (i = 0; i < nlocal; i++) { - if (buf[j+3] != nspecial[i][1]) - error->one(FLERR,"1-3 bond count is inconsistent"); - j += 4 + nspecial[i][0]; - for (k = 0; k < nspecial[i][1]; k++) - onethree[i][k] = buf[j++]; - } - - memory->destroy(buf); - // done if special_bond weights for 1-4 are set to 1.0 if (force->special_lj[3] == 1.0 && force->special_coul[3] == 1.0) { @@ -304,117 +129,410 @@ void Special::build() if (force->special_angle) angle_trim(); combine(); fix_alteration(); + memory->destroy(procowner); + memory->destroy(atomIDs); + timer_output(time1); return; } - // ----------------------------------------------------- - // compute nspecial[i][2] = # of 1-4 neighbors of atom i - // ----------------------------------------------------- - - // nbufmax = largest buffer needed to hold info from any proc - // info for each atom = 2 scalars + list of 1-3 neighbors - - nbuf = 0; - for (i = 0; i < nlocal; i++) nbuf += 2 + nspecial[i][1]; - memory->create(buf,nbuf,"special:buf"); - - // fill buffer with: - // (1) = counter for 1-4 neighbors, initialized to 0 - // (2) = # of 1-3 neighbors - // (3:N) = list of 1-3 neighbors - - size = 0; - for (i = 0; i < nlocal; i++) { - buf[size++] = 0; - buf[size++] = nspecial[i][1]; - for (j = 0; j < nspecial[i][1]; j++) buf[size++] = onethree[i][j]; - } - - // cycle buffer around ring of procs back to self - // when receive buffer, scan list of 1-3 neighbors for atoms I own - // when find one, increment 1-4 count by # of 1-2 neighbors of my atom - // may include duplicates and original atom but they will be culled later - - comm->ring(size,sizeof(tagint),buf,5,ring_five,buf,(void *)this); - - // extract count from buffer that has cycled back to me - // nspecial[i][2] = # of 1-4 neighbors of atom i - - j = 0; - for (i = 0; i < nlocal; i++) { - nspecial[i][2] = buf[j]; - j += 2 + nspecial[i][1]; - } - - memory->destroy(buf); - - // ---------------------------------------------------- + // tally nspecial[i][2] = # of 1-4 neighbors of atom i // create onefour[i] = list of 1-4 neighbors for atom i - // ---------------------------------------------------- - max = 0; - for (i = 0; i < nlocal; i++) max = MAX(max,nspecial[i][2]); - MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); + onefour_build(); + + // print max # of 1-4 neighbors if (me == 0) { if (screen) fprintf(screen," %d = max # of 1-4 neighbors\n",maxall); if (logfile) fprintf(logfile," %d = max # of 1-4 neighbors\n",maxall); } - memory->create(onefour,nlocal,maxall,"special:onefour"); - - // nbufmax = largest buffer needed to hold info from any proc - // info for each atom = 3 scalars + list of 1-3 neighs + list of 1-4 neighs - - nbuf = 0; - for (i = 0; i < nlocal; i++) - nbuf += 3 + nspecial[i][1] + nspecial[i][2]; - memory->create(buf,nbuf,"special:buf"); - - // fill buffer with: - // (1) = # of 1-3 neighbors - // (2) = # of 1-4 neighbors - // (3) = counter for 1-4 neighbors, initialized to 0 - // (4:N) = list of 1-3 neighbors - // (N+1:2N) space for list of 1-4 neighbors - - size = 0; - for (i = 0; i < nlocal; i++) { - buf[size++] = nspecial[i][1]; - buf[size++] = nspecial[i][2]; - buf[size++] = 0; - for (j = 0; j < nspecial[i][1]; j++) buf[size++] = onethree[i][j]; - size += nspecial[i][2]; - } - - // cycle buffer around ring of procs back to self - // when receive buffer, scan list of 1-3 neighbors for atoms I own - // when find one, add its neighbors to 1-4 list - // incrementing the count in buf(i+4) - // this process may include duplicates but they will be culled later - - comm->ring(size,sizeof(tagint),buf,6,ring_six,buf,(void *)this); - - // fill onefour with buffer values that have been returned to me - // sanity check: accumulated buf[i+2] count should equal - // nspecial[i][2] for each atom - - j = 0; - for (i = 0; i < nlocal; i++) { - if (buf[j+2] != nspecial[i][2]) - error->one(FLERR,"1-4 bond count is inconsistent"); - j += 3 + nspecial[i][1]; - for (k = 0; k < nspecial[i][2]; k++) - onefour[i][k] = buf[j++]; - } - - memory->destroy(buf); + // finish processing the onetwo, onethree, onefour lists dedup(); if (force->special_angle) angle_trim(); if (force->special_dihedral) dihedral_trim(); combine(); fix_alteration(); + memory->destroy(procowner); + memory->destroy(atomIDs); + + timer_output(time1); +} + +/* ---------------------------------------------------------------------- + setup atomIDs and procowner +------------------------------------------------------------------------- */ + +void Special::atom_owners() +{ + tagint *tag = atom->tag; + int nlocal = atom->nlocal; + + int *proclist; + memory->create(proclist,nlocal,"special:proclist"); + IDRvous *idbuf = (IDRvous *) + memory->smalloc((bigint) nlocal*sizeof(IDRvous),"special:idbuf"); + + // setup input buf for rendezvous comm + // one datum for each owned atom: datum = owning proc, atomID + // each proc assigned every 1/Pth atom + + for (int i = 0; i < nlocal; i++) { + proclist[i] = tag[i] % nprocs; + idbuf[i].me = me; + idbuf[i].atomID = tag[i]; + } + + // perform rendezvous operation + + char *buf; + comm->rendezvous(RVOUS,nlocal,(char *) idbuf,sizeof(IDRvous),0,proclist, + rendezvous_ids,0,buf,0,(void *) this); + + memory->destroy(proclist); + memory->sfree(idbuf); +} + +/* ---------------------------------------------------------------------- + onetwo build when newton_bond flag on + uses rendezvous comm +------------------------------------------------------------------------- */ + +void Special::onetwo_build_newton() +{ + int i,j,m; + + tagint *tag = atom->tag; + int *num_bond = atom->num_bond; + tagint **bond_atom = atom->bond_atom; + int **nspecial = atom->nspecial; + int nlocal = atom->nlocal; + + // nsend = # of my datums to send + + int nsend = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < num_bond[i]; j++) { + m = atom->map(bond_atom[i][j]); + if (m < 0 || m >= nlocal) nsend++; + } + } + + int *proclist; + memory->create(proclist,nsend,"special:proclist"); + PairRvous *inbuf = (PairRvous *) + memory->smalloc((bigint) nsend*sizeof(PairRvous),"special:inbuf"); + + // setup input buf to rendezvous comm + // one datum for each unowned bond partner: bond partner ID, atomID + // owning proc for each datum = bond partner ID % nprocs + + nsend = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < num_bond[i]; j++) { + m = atom->map(bond_atom[i][j]); + if (m >= 0 && m < nlocal) continue; + proclist[nsend] = bond_atom[i][j] % nprocs; + inbuf[nsend].atomID = bond_atom[i][j]; + inbuf[nsend].partnerID = tag[i]; + nsend++; + } + } + + // perform rendezvous operation + + char *buf; + int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(PairRvous), + 0,proclist, + rendezvous_pairs,0,buf,sizeof(PairRvous), + (void *) this); + PairRvous *outbuf = (PairRvous *) buf; + + memory->destroy(proclist); + memory->sfree(inbuf); + + // set nspecial[0] and onetwo for all owned atoms + // based on owned info plus rendezvous output info + // output datums = pairs of atoms that are 1-2 neighbors + + for (i = 0; i < nlocal; i++) { + nspecial[i][0] = num_bond[i]; + for (j = 0; j < num_bond[i]; j++) { + m = atom->map(bond_atom[i][j]); + if (m >= 0 && m < nlocal) nspecial[m][0]++; + } + } + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + nspecial[i][0]++; + } + + int max = 0; + for (i = 0; i < nlocal; i++) + max = MAX(max,nspecial[i][0]); + + MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); + memory->create(onetwo,nlocal,maxall,"special:onetwo"); + + for (i = 0; i < nlocal; i++) nspecial[i][0] = 0; + + for (i = 0; i < nlocal; i++) { + for (j = 0; j < num_bond[i]; j++) { + onetwo[i][nspecial[i][0]++] = bond_atom[i][j]; + m = atom->map(bond_atom[i][j]); + if (m >= 0 && m < nlocal) onetwo[m][nspecial[m][0]++] = tag[i]; + } + } + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + onetwo[i][nspecial[i][0]++] = outbuf[m].partnerID; + } + + memory->sfree(outbuf); +} + +/* ---------------------------------------------------------------------- + onetwo build with newton_bond flag off + no need for rendezvous comm +------------------------------------------------------------------------- */ + +void Special::onetwo_build_newton_off() +{ + int i,j; + + int *num_bond = atom->num_bond; + tagint **bond_atom = atom->bond_atom; + int **nspecial = atom->nspecial; + int nlocal = atom->nlocal; + + int max = 0; + for (i = 0; i < nlocal; i++) + max = MAX(max,num_bond[i]); + + MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); + memory->create(onetwo,nlocal,maxall,"special:onetwo"); + + // nsend = # of my datums to send + // include nlocal datums with owner of each atom + + for (i = 0; i < nlocal; i++) { + nspecial[i][0] = num_bond[i]; + for (j = 0; j < num_bond[i]; j++) + onetwo[i][j] = bond_atom[i][j]; + } +} + +/* ---------------------------------------------------------------------- + onethree build + uses rendezvous comm +------------------------------------------------------------------------- */ + +void Special::onethree_build() +{ + int i,j,k,m,proc; + + tagint *tag = atom->tag; + int **nspecial = atom->nspecial; + int nlocal = atom->nlocal; + + // nsend = # of my datums to send + + int nsend = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < nspecial[i][0]; j++) { + m = atom->map(onetwo[i][j]); + if (m < 0 || m >= nlocal) nsend += nspecial[i][0]-1; + } + } + + int *proclist; + memory->create(proclist,nsend,"special:proclist"); + PairRvous *inbuf = (PairRvous *) + memory->smalloc((bigint) nsend*sizeof(PairRvous),"special:inbuf"); + + // setup input buf to rendezvous comm + // datums = pairs of onetwo partners where either is unknown + // these pairs are onethree neighbors + // datum = onetwo ID, onetwo ID + // owning proc for each datum = onetwo ID % nprocs + + nsend = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < nspecial[i][0]; j++) { + m = atom->map(onetwo[i][j]); + if (m >= 0 && m < nlocal) continue; + proc = onetwo[i][j] % nprocs; + for (k = 0; k < nspecial[i][0]; k++) { + if (j == k) continue; + proclist[nsend] = proc; + inbuf[nsend].atomID = onetwo[i][j]; + inbuf[nsend].partnerID = onetwo[i][k]; + nsend++; + } + } + } + + // perform rendezvous operation + + char *buf; + int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(PairRvous), + 0,proclist, + rendezvous_pairs,0,buf,sizeof(PairRvous), + (void *) this); + PairRvous *outbuf = (PairRvous *) buf; + + memory->destroy(proclist); + memory->sfree(inbuf); + + // set nspecial[1] and onethree for all owned atoms + // based on owned info plus rendezvous output info + // output datums = pairs of atoms that are 1-3 neighbors + + for (i = 0; i < nlocal; i++) { + for (j = 0; j < nspecial[i][0]; j++) { + m = atom->map(onetwo[i][j]); + if (m >= 0 && m < nlocal) nspecial[m][1] += nspecial[i][0]-1; + } + } + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + nspecial[i][1]++; + } + + int max = 0; + for (i = 0; i < nlocal; i++) + max = MAX(max,nspecial[i][1]); + + MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); + memory->create(onethree,nlocal,maxall,"special:onethree"); + + for (i = 0; i < nlocal; i++) nspecial[i][1] = 0; + + for (i = 0; i < nlocal; i++) { + for (j = 0; j < nspecial[i][0]; j++) { + m = atom->map(onetwo[i][j]); + if (m < 0 || m >= nlocal) continue; + for (k = 0; k < nspecial[i][0]; k++) { + if (j == k) continue; + onethree[m][nspecial[m][1]++] = onetwo[i][k]; + } + } + } + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + onethree[i][nspecial[i][1]++] = outbuf[m].partnerID; + } + + memory->sfree(outbuf); +} + +/* ---------------------------------------------------------------------- + onefour build + uses rendezvous comm +------------------------------------------------------------------------- */ + +void Special::onefour_build() +{ + int i,j,k,m,proc; + + tagint *tag = atom->tag; + int **nspecial = atom->nspecial; + int nlocal = atom->nlocal; + + // nsend = # of my datums to send + + int nsend = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < nspecial[i][1]; j++) { + m = atom->map(onethree[i][j]); + if (m < 0 || m >= nlocal) nsend += nspecial[i][0]; + } + } + + int *proclist; + memory->create(proclist,nsend,"special:proclist"); + PairRvous *inbuf = (PairRvous *) + memory->smalloc((bigint) nsend*sizeof(PairRvous),"special:inbuf"); + + // setup input buf to rendezvous comm + // datums = pairs of onethree and onetwo partners where onethree is unknown + // these pairs are onefour neighbors + // datum = onetwo ID, onetwo ID + // owning proc for each datum = onethree ID % nprocs + + nsend = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < nspecial[i][1]; j++) { + m = atom->map(onethree[i][j]); + if (m >= 0 && m < nlocal) continue; + proc = onethree[i][j] % nprocs; + for (k = 0; k < nspecial[i][0]; k++) { + proclist[nsend] = proc; + inbuf[nsend].atomID = onethree[i][j]; + inbuf[nsend].partnerID = onetwo[i][k]; + nsend++; + } + } + } + + // perform rendezvous operation + + char *buf; + int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(PairRvous), + 0,proclist, + rendezvous_pairs,0,buf,sizeof(PairRvous), + (void *) this); + PairRvous *outbuf = (PairRvous *) buf; + + memory->destroy(proclist); + memory->sfree(inbuf); + + // set nspecial[2] and onefour for all owned atoms + // based on owned info plus rendezvous output info + // output datums = pairs of atoms that are 1-4 neighbors + + for (i = 0; i < nlocal; i++) { + for (j = 0; j < nspecial[i][1]; j++) { + m = atom->map(onethree[i][j]); + if (m >= 0 && m < nlocal) nspecial[m][2] += nspecial[i][0]; + } + } + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + nspecial[i][2]++; + } + + int max = 0; + for (i = 0; i < nlocal; i++) + max = MAX(max,nspecial[i][2]); + + MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); + memory->create(onefour,nlocal,maxall,"special:onefour"); + + for (i = 0; i < nlocal; i++) nspecial[i][2] = 0; + + for (i = 0; i < nlocal; i++) { + for (j = 0; j < nspecial[i][1]; j++) { + m = atom->map(onethree[i][j]); + if (m < 0 || m >= nlocal) continue; + for (k = 0; k < nspecial[i][0]; k++) { + onefour[m][nspecial[m][2]++] = onetwo[i][k]; + } + } + } + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + onefour[i][nspecial[i][2]++] = outbuf[m].partnerID; + } + + memory->sfree(outbuf); } /* ---------------------------------------------------------------------- @@ -659,21 +777,24 @@ void Special::combine() trim list of 1-3 neighbors by checking defined angles delete a 1-3 neigh if they are not end atoms of a defined angle and if they are not 1,3 or 2,4 atoms of a defined dihedral + uses rendezvous comm ------------------------------------------------------------------------- */ void Special::angle_trim() { - int i,j,m,n; + int i,j,k,m;; int *num_angle = atom->num_angle; int *num_dihedral = atom->num_dihedral; tagint **angle_atom1 = atom->angle_atom1; + tagint **angle_atom2 = atom->angle_atom2; tagint **angle_atom3 = atom->angle_atom3; tagint **dihedral_atom1 = atom->dihedral_atom1; tagint **dihedral_atom2 = atom->dihedral_atom2; tagint **dihedral_atom3 = atom->dihedral_atom3; tagint **dihedral_atom4 = atom->dihedral_atom4; int **nspecial = atom->nspecial; + tagint *tag = atom->tag; int nlocal = atom->nlocal; // stats on old 1-3 neighbor counts @@ -692,73 +813,206 @@ void Special::angle_trim() " %g = # of 1-3 neighbors before angle trim\n",allcount); } - // if angles or dihedrals are defined, - // flag each 1-3 neigh if it appears in an angle or dihedral + // if angles or dihedrals are defined + // rendezvous angle 1-3 and dihedral 1-3,2-4 pairs if ((num_angle && atom->nangles) || (num_dihedral && atom->ndihedrals)) { - // dflag = flag for 1-3 neighs of all owned atoms - - int maxcount = 0; - for (i = 0; i < nlocal; i++) maxcount = MAX(maxcount,nspecial[i][1]); - memory->create(dflag,nlocal,maxcount,"special::dflag"); + // nsend = # of my datums to send + // latter is only for angles or dihedrlas where I own atom2 (newton bond off) + int nsend = 0; for (i = 0; i < nlocal; i++) { - n = nspecial[i][1]; - for (j = 0; j < n; j++) dflag[i][j] = 0; + for (j = 0; j < num_angle[i]; j++) { + if (tag[i] != angle_atom2[i][j]) continue; + m = atom->map(angle_atom1[i][j]); + if (m < 0 || m >= nlocal) nsend++; + m = atom->map(angle_atom3[i][j]); + if (m < 0 || m >= nlocal) nsend++; + } + for (j = 0; j < num_dihedral[i]; j++) { + if (tag[i] != dihedral_atom2[i][j]) continue; + m = atom->map(dihedral_atom1[i][j]); + if (m < 0 || m >= nlocal) nsend++; + m = atom->map(dihedral_atom3[i][j]); + if (m < 0 || m >= nlocal) nsend++; + m = atom->map(dihedral_atom4[i][j]); + if (m < 0 || m >= nlocal) nsend++; + } } - // nbufmax = largest buffer needed to hold info from any proc - // info for each atom = list of 1,3 atoms in each angle stored by atom - // and list of 1,3 and 2,4 atoms in each dihedral stored by atom + int *proclist; + memory->create(proclist,nsend,"special:proclist"); + PairRvous *inbuf = (PairRvous *) + memory->smalloc((bigint) nsend*sizeof(PairRvous),"special:inbuf"); - int nbuf = 0; + // setup input buf to rendezvous comm + // datums = pairs of onetwo partners where either is unknown + // these pairs are onethree neighbors + // datum = onetwo ID, onetwo ID + // owning proc for each datum = onetwo ID % nprocs + + nsend = 0; for (i = 0; i < nlocal; i++) { - if (num_angle && atom->nangles) nbuf += 2*num_angle[i]; - if (num_dihedral && atom->ndihedrals) nbuf += 2*2*num_dihedral[i]; + for (j = 0; j < num_angle[i]; j++) { + if (tag[i] != angle_atom2[i][j]) continue; + + m = atom->map(angle_atom1[i][j]); + if (m < 0 || m >= nlocal) { + proclist[nsend] = angle_atom1[i][j] % nprocs; + inbuf[nsend].atomID = angle_atom1[i][j]; + inbuf[nsend].partnerID = angle_atom3[i][j]; + nsend++; + } + + m = atom->map(angle_atom3[i][j]); + if (m < 0 || m >= nlocal) { + proclist[nsend] = angle_atom3[i][j] % nprocs; + inbuf[nsend].atomID = angle_atom3[i][j]; + inbuf[nsend].partnerID = angle_atom1[i][j]; + nsend++; + } + } + + for (j = 0; j < num_dihedral[i]; j++) { + if (tag[i] != dihedral_atom2[i][j]) continue; + + m = atom->map(dihedral_atom1[i][j]); + if (m < 0 || m >= nlocal) { + proclist[nsend] = dihedral_atom1[i][j] % nprocs; + inbuf[nsend].atomID = dihedral_atom1[i][j]; + inbuf[nsend].partnerID = dihedral_atom3[i][j]; + nsend++; + } + + m = atom->map(dihedral_atom3[i][j]); + if (m < 0 || m >= nlocal) { + proclist[nsend] = dihedral_atom3[i][j] % nprocs; + inbuf[nsend].atomID = dihedral_atom3[i][j]; + inbuf[nsend].partnerID = dihedral_atom1[i][j]; + nsend++; + } + + m = atom->map(dihedral_atom4[i][j]); + if (m < 0 || m >= nlocal) { + proclist[nsend] = dihedral_atom4[i][j] % nprocs; + inbuf[nsend].atomID = dihedral_atom4[i][j]; + inbuf[nsend].partnerID = dihedral_atom2[i][j]; + nsend++; + } + } } - int *buf; - memory->create(buf,nbuf,"special:buf"); - // fill buffer with list of 1,3 atoms in each angle - // and with list of 1,3 and 2,4 atoms in each dihedral + // perform rendezvous operation + + char *buf; + int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(PairRvous), + 0,proclist, + rendezvous_pairs,0,buf,sizeof(PairRvous), + (void *) this); + PairRvous *outbuf = (PairRvous *) buf; - int size = 0; - if (num_angle && atom->nangles) - for (i = 0; i < nlocal; i++) - for (j = 0; j < num_angle[i]; j++) { - buf[size++] = angle_atom1[i][j]; - buf[size++] = angle_atom3[i][j]; - } + memory->destroy(proclist); + memory->sfree(inbuf); - if (num_dihedral && atom->ndihedrals) - for (i = 0; i < nlocal; i++) - for (j = 0; j < num_dihedral[i]; j++) { - buf[size++] = dihedral_atom1[i][j]; - buf[size++] = dihedral_atom3[i][j]; - buf[size++] = dihedral_atom2[i][j]; - buf[size++] = dihedral_atom4[i][j]; - } + // flag all onethree atoms to keep - // cycle buffer around ring of procs back to self - // when receive buffer, scan list of 1,3 atoms looking for atoms I own - // when find one, scan its 1-3 neigh list and mark I,J as in an angle + int max = 0; + for (i = 0; i < nlocal; i++) + max = MAX(max,nspecial[i][1]); + MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); + + int **flag; + memory->create(flag,nlocal,maxall,"special:flag"); - comm->ring(size,sizeof(tagint),buf,7,ring_seven,NULL,(void *)this); - - // delete 1-3 neighbors if they are not flagged in dflag - - for (i = 0; i < nlocal; i++) { - m = 0; + for (i = 0; i < nlocal; i++) for (j = 0; j < nspecial[i][1]; j++) - if (dflag[i][j]) onethree[i][m++] = onethree[i][j]; - nspecial[i][1] = m; + flag[i][j] = 0; + + // reset nspecial[1] and onethree for all owned atoms based on output info + // based on owned info plus rendezvous output info + // output datums = pairs of atoms that are 1-3 neighbors + + for (i = 0; i < nlocal; i++) { + for (j = 0; j < num_angle[i]; j++) { + if (tag[i] != angle_atom2[i][j]) continue; + + m = atom->map(angle_atom1[i][j]); + if (m >= 0 && m < nlocal) { + for (k = 0; k < nspecial[m][1]; k++) + if (onethree[m][k] == angle_atom3[i][j]) { + flag[m][k] = 1; + break; + } + } + + m = atom->map(angle_atom3[i][j]); + if (m >= 0 && m < nlocal) { + for (k = 0; k < nspecial[m][1]; k++) + if (onethree[m][k] == angle_atom1[i][j]) { + flag[m][k] = 1; + break; + } + } + } + + for (j = 0; j < num_dihedral[i]; j++) { + if (tag[i] != dihedral_atom2[i][j]) continue; + + m = atom->map(dihedral_atom1[i][j]); + if (m >= 0 && m < nlocal) { + for (k = 0; k < nspecial[m][1]; k++) + if (onethree[m][k] == dihedral_atom3[i][j]) { + flag[m][k] = 1; + break; + } + } + + m = atom->map(dihedral_atom3[i][j]); + if (m >= 0 && m < nlocal) { + for (k = 0; k < nspecial[m][1]; k++) + if (onethree[m][k] == dihedral_atom1[i][j]) { + flag[m][k] = 1; + break; + } + } + + m = atom->map(dihedral_atom4[i][j]); + if (m >= 0 && m < nlocal) { + for (k = 0; k < nspecial[m][1]; k++) + if (onethree[m][k] == dihedral_atom2[i][j]) { + flag[m][k] = 1; + break; + } + } + } } - // clean up + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + for (k = 0; k < nspecial[i][1]; k++) + if (onethree[i][k] == outbuf[m].partnerID) { + flag[i][k] = 1; + break; + } + } + + memory->destroy(outbuf); - memory->destroy(dflag); - memory->destroy(buf); + // use flag values to compress onefour list for each atom + + for (i = 0; i < nlocal; i++) { + j = 0; + while (j < nspecial[i][1]) { + if (flag[i][j] == 0) { + onethree[i][j] = onethree[i][nspecial[i][1]-1]; + flag[i][j] = flag[i][nspecial[i][1]-1]; + nspecial[i][1]--; + } else j++; + } + } + + memory->destroy(flag); // if no angles or dihedrals are defined, delete all 1-3 neighs @@ -783,18 +1037,21 @@ void Special::angle_trim() } /* ---------------------------------------------------------------------- - trim list of 1-4 neighbors by checking defined dihedrals + trim list of 1-4 neighbors by checking all defined dihedrals delete a 1-4 neigh if they are not end atoms of a defined dihedral + uses rendezvous comm ------------------------------------------------------------------------- */ void Special::dihedral_trim() { - int i,j,m,n; + int i,j,k,m; int *num_dihedral = atom->num_dihedral; tagint **dihedral_atom1 = atom->dihedral_atom1; + tagint **dihedral_atom2 = atom->dihedral_atom2; tagint **dihedral_atom4 = atom->dihedral_atom4; int **nspecial = atom->nspecial; + tagint *tag = atom->tag; int nlocal = atom->nlocal; // stats on old 1-4 neighbor counts @@ -813,58 +1070,134 @@ void Special::dihedral_trim() " %g = # of 1-4 neighbors before dihedral trim\n",allcount); } - // if dihedrals are defined, flag each 1-4 neigh if it appears in a dihedral + // if dihedrals are defined, rendezvous the dihedral 1-4 pairs if (num_dihedral && atom->ndihedrals) { - // dflag = flag for 1-4 neighs of all owned atoms - - int maxcount = 0; - for (i = 0; i < nlocal; i++) maxcount = MAX(maxcount,nspecial[i][2]); - memory->create(dflag,nlocal,maxcount,"special::dflag"); + // nsend = # of my datums to send + int nsend = 0; for (i = 0; i < nlocal; i++) { - n = nspecial[i][2]; - for (j = 0; j < n; j++) dflag[i][j] = 0; - } - - // nbufmax = largest buffer needed to hold info from any proc - // info for each atom = list of 1,4 atoms in each dihedral stored by atom - - int nbuf = 0; - for (i = 0; i < nlocal; i++) nbuf += 2*num_dihedral[i]; - int *buf; - memory->create(buf,nbuf,"special:buf"); - - // fill buffer with list of 1,4 atoms in each dihedral - - int size = 0; - for (i = 0; i < nlocal; i++) for (j = 0; j < num_dihedral[i]; j++) { - buf[size++] = dihedral_atom1[i][j]; - buf[size++] = dihedral_atom4[i][j]; + if (tag[i] != dihedral_atom2[i][j]) continue; + m = atom->map(dihedral_atom1[i][j]); + if (m < 0 || m >= nlocal) nsend++; + m = atom->map(dihedral_atom4[i][j]); + if (m < 0 || m >= nlocal) nsend++; } - - // cycle buffer around ring of procs back to self - // when receive buffer, scan list of 1,4 atoms looking for atoms I own - // when find one, scan its 1-4 neigh list and mark I,J as in a dihedral - - comm->ring(size,sizeof(tagint),buf,8,ring_eight,NULL,(void *)this); - - // delete 1-4 neighbors if they are not flagged in dflag - - for (i = 0; i < nlocal; i++) { - m = 0; - for (j = 0; j < nspecial[i][2]; j++) - if (dflag[i][j]) onefour[i][m++] = onefour[i][j]; - nspecial[i][2] = m; } - // clean up + int *proclist; + memory->create(proclist,nsend,"special:proclist"); + PairRvous *inbuf = (PairRvous *) + memory->smalloc((bigint) nsend*sizeof(PairRvous),"special:inbuf"); + + // setup input buf to rendezvous comm + // datums = pairs of onefour atom IDs in a dihedral defined for my atoms + // only dihedrals where I own atom2 (in case newton_bond off) + // datum = atom1 ID and atom4 ID + // send the datum twice, to owner of atom1 ID and atom4 ID + // owning procs for each datum = atom1 or atom4 ID % nprocs - memory->destroy(dflag); - memory->destroy(buf); + nsend = 0; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < num_dihedral[i]; j++) { + if (tag[i] != dihedral_atom2[i][j]) continue; + m = atom->map(dihedral_atom1[i][j]); + if (m < 0 || m >= nlocal) { + proclist[nsend] = dihedral_atom1[i][j] % nprocs; + inbuf[nsend].atomID = dihedral_atom1[i][j]; + inbuf[nsend].partnerID = dihedral_atom4[i][j]; + nsend++; + } + + m = atom->map(dihedral_atom4[i][j]); + if (m < 0 || m >= nlocal) { + proclist[nsend] = dihedral_atom4[i][j] % nprocs; + inbuf[nsend].atomID = dihedral_atom4[i][j]; + inbuf[nsend].partnerID = dihedral_atom1[i][j]; + nsend++; + } + } + } + + // perform rendezvous operation + + char *buf; + int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(PairRvous), + 0,proclist, + rendezvous_pairs,0,buf,sizeof(PairRvous), + (void *) this); + PairRvous *outbuf = (PairRvous *) buf; + + memory->destroy(proclist); + memory->sfree(inbuf); + + // flag all of my onefour IDs to keep + + int max = 0; + for (i = 0; i < nlocal; i++) + max = MAX(max,nspecial[i][2]); + MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); + + int **flag; + memory->create(flag,nlocal,maxall,"special:flag"); + + for (i = 0; i < nlocal; i++) + for (j = 0; j < nspecial[i][2]; j++) + flag[i][j] = 0; + + for (i = 0; i < nlocal; i++) { + for (j = 0; j < num_dihedral[i]; j++) { + if (tag[i] != dihedral_atom2[i][j]) continue; + + m = atom->map(dihedral_atom1[i][j]); + if (m >= 0 && m < nlocal) { + for (k = 0; k < nspecial[m][2]; k++) + if (onefour[m][k] == dihedral_atom4[i][j]) { + flag[m][k] = 1; + break; + } + } + + m = atom->map(dihedral_atom4[i][j]); + if (m >= 0 && m < nlocal) { + for (k = 0; k < nspecial[m][2]; k++) + if (onefour[m][k] == dihedral_atom1[i][j]) { + flag[m][k] = 1; + break; + } + } + } + } + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + for (k = 0; k < nspecial[i][2]; k++) + if (onefour[i][k] == outbuf[m].partnerID) { + flag[i][k] = 1; + break; + } + } + + memory->destroy(outbuf); + + // use flag values to compress onefour list for each atom + + for (i = 0; i < nlocal; i++) { + j = 0; + while (j < nspecial[i][2]) { + if (flag[i][j] == 0) { + onefour[i][j] = onefour[i][nspecial[i][2]-1]; + flag[i][j] = flag[i][nspecial[i][2]-1]; + nspecial[i][2]--; + } else j++; + } + } + + memory->destroy(flag); + // if no dihedrals are defined, delete all 1-4 neighs } else { @@ -888,262 +1221,95 @@ void Special::dihedral_trim() } /* ---------------------------------------------------------------------- - when receive buffer, scan tags for atoms I own - when find one, increment nspecial count for that atom + process data for atoms assigned to me in rendezvous decomposition + inbuf = list of N IDRvous datums + no outbuf ------------------------------------------------------------------------- */ -void Special::ring_one(int ndatum, char *cbuf, void *ptr) +int Special::rendezvous_ids(int n, char *inbuf, + int &flag, int *&proclist, char *&outbuf, + void *ptr) +{ + Special *sptr = (Special *) ptr; + Memory *memory = sptr->memory; + + int *procowner; + tagint *atomIDs; + + memory->create(procowner,n,"special:procowner"); + memory->create(atomIDs,n,"special:atomIDs"); + + IDRvous *in = (IDRvous *) inbuf; + + for (int i = 0; i < n; i++) { + procowner[i] = in[i].me; + atomIDs[i] = in[i].atomID; + } + + // store rendezvous data in Special class + + sptr->nrvous = n; + sptr->procowner = procowner; + sptr->atomIDs = atomIDs; + + // flag = 0: no second comm needed in rendezvous + + flag = 0; + return 0; +} + + +/* ---------------------------------------------------------------------- + process data for atoms assigned to me in rendezvous decomposition + inbuf = list of N PairRvous datums + outbuf = same list of N PairRvous datums, routed to different procs +------------------------------------------------------------------------- */ + +int Special::rendezvous_pairs(int n, char *inbuf, + int &flag, int *&proclist, char *&outbuf, + void *ptr) { Special *sptr = (Special *) ptr; Atom *atom = sptr->atom; - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; + Memory *memory = sptr->memory; + + // clear atom map so it can be used here as a hash table + // faster than an STL map for large atom counts + + atom->map_clear(); + + // hash atom IDs stored in rendezvous decomposition + + int nrvous = sptr->nrvous; + tagint *atomIDs = sptr->atomIDs; + + for (int i = 0; i < nrvous; i++) + atom->map_one(atomIDs[i],i); + + // proclist = owner of atomID in caller decomposition + + PairRvous *in = (PairRvous *) inbuf; + int *procowner = sptr->procowner; + memory->create(proclist,n,"special:proclist"); - tagint *buf = (tagint *) cbuf; int m; - - for (int i = 0; i < ndatum; i++) { - m = atom->map(buf[i]); - if (m >= 0 && m < nlocal) nspecial[m][0]++; + for (int i = 0; i < n; i++) { + m = atom->map(in[i].atomID); + proclist[i] = procowner[m]; } -} -/* ---------------------------------------------------------------------- - when receive buffer, scan 2nd-atom tags for atoms I own - when find one, add 1st-atom tag to onetwo list for 2nd atom -------------------------------------------------------------------------- */ + outbuf = inbuf; + + // re-create atom map -void Special::ring_two(int ndatum, char *cbuf, void *ptr) -{ - Special *sptr = (Special *) ptr; - Atom *atom = sptr->atom; - int nlocal = atom->nlocal; + atom->map_init(0); + atom->nghost = 0; + atom->map_set(); - tagint **onetwo = sptr->onetwo; - int *count = sptr->count; - - tagint *buf = (tagint *) cbuf; - int m; - - for (int i = 1; i < ndatum; i += 2) { - m = atom->map(buf[i]); - if (m >= 0 && m < nlocal) onetwo[m][count[m]++] = buf[i-1]; - } -} - -/* ---------------------------------------------------------------------- - when receive buffer, scan list of 1-2 neighbors for atoms I own - when find one, increment 1-3 count by # of 1-2 neighbors of my atom, - subtracting one since my list will contain original atom -------------------------------------------------------------------------- */ - -void Special::ring_three(int ndatum, char *cbuf, void *ptr) -{ - Special *sptr = (Special *) ptr; - Atom *atom = sptr->atom; - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; - - tagint *buf = (tagint *) cbuf; - int i,j,m,n,num12; - - i = 0; - while (i < ndatum) { - n = buf[i]; - num12 = buf[i+1]; - for (j = 0; j < num12; j++) { - m = atom->map(buf[i+2+j]); - if (m >= 0 && m < nlocal) - n += nspecial[m][0] - 1; - } - buf[i] = n; - i += 2 + num12; - } -} - -/* ---------------------------------------------------------------------- - when receive buffer, scan list of 1-2 neighbors for atoms I own - when find one, add its neighbors to 1-3 list - increment the count in buf(i+4) - exclude the atom whose tag = original - this process may include duplicates but they will be culled later -------------------------------------------------------------------------- */ - -void Special::ring_four(int ndatum, char *cbuf, void *ptr) -{ - Special *sptr = (Special *) ptr; - Atom *atom = sptr->atom; - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; - - tagint **onetwo = sptr->onetwo; - - tagint *buf = (tagint *) cbuf; - tagint original; - int i,j,k,m,n,num12,num13; - - i = 0; - while (i < ndatum) { - original = buf[i]; - num12 = buf[i+1]; - num13 = buf[i+2]; - n = buf[i+3]; - for (j = 0; j < num12; j++) { - m = atom->map(buf[i+4+j]); - if (m >= 0 && m < nlocal) - for (k = 0; k < nspecial[m][0]; k++) - if (onetwo[m][k] != original) - buf[i+4+num12+(n++)] = onetwo[m][k]; - } - buf[i+3] = n; - i += 4 + num12 + num13; - } -} - -/* ---------------------------------------------------------------------- - when receive buffer, scan list of 1-3 neighbors for atoms I own - when find one, increment 1-4 count by # of 1-2 neighbors of my atom - may include duplicates and original atom but they will be culled later -------------------------------------------------------------------------- */ - -void Special::ring_five(int ndatum, char *cbuf, void *ptr) -{ - Special *sptr = (Special *) ptr; - Atom *atom = sptr->atom; - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; - - tagint *buf = (tagint *) cbuf; - int i,j,m,n,num13; - - i = 0; - while (i < ndatum) { - n = buf[i]; - num13 = buf[i+1]; - for (j = 0; j < num13; j++) { - m = atom->map(buf[i+2+j]); - if (m >= 0 && m < nlocal) n += nspecial[m][0]; - } - buf[i] = n; - i += 2 + num13; - } -} - -/* ---------------------------------------------------------------------- - when receive buffer, scan list of 1-3 neighbors for atoms I own - when find one, add its neighbors to 1-4 list - incrementing the count in buf(i+4) - this process may include duplicates but they will be culled later -------------------------------------------------------------------------- */ - -void Special::ring_six(int ndatum, char *cbuf, void *ptr) -{ - Special *sptr = (Special *) ptr; - Atom *atom = sptr->atom; - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; - - tagint **onetwo = sptr->onetwo; - - tagint *buf = (tagint *) cbuf; - int i,j,k,m,n,num13,num14; - - i = 0; - while (i < ndatum) { - num13 = buf[i]; - num14 = buf[i+1]; - n = buf[i+2]; - for (j = 0; j < num13; j++) { - m = atom->map(buf[i+3+j]); - if (m >= 0 && m < nlocal) - for (k = 0; k < nspecial[m][0]; k++) - buf[i+3+num13+(n++)] = onetwo[m][k]; - } - buf[i+2] = n; - i += 3 + num13 + num14; - } -} - -/* ---------------------------------------------------------------------- - when receive buffer, scan list of 1,3 atoms looking for atoms I own - when find one, scan its 1-3 neigh list and mark I,J as in an angle -------------------------------------------------------------------------- */ - -void Special::ring_seven(int ndatum, char *cbuf, void *ptr) -{ - Special *sptr = (Special *) ptr; - Atom *atom = sptr->atom; - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; - - tagint **onethree = sptr->onethree; - int **dflag = sptr->dflag; - - tagint *buf = (tagint *) cbuf; - tagint iglobal,jglobal; - int i,m,ilocal,jlocal; - - i = 0; - while (i < ndatum) { - iglobal = buf[i]; - jglobal = buf[i+1]; - ilocal = atom->map(iglobal); - jlocal = atom->map(jglobal); - if (ilocal >= 0 && ilocal < nlocal) - for (m = 0; m < nspecial[ilocal][1]; m++) - if (jglobal == onethree[ilocal][m]) { - dflag[ilocal][m] = 1; - break; - } - if (jlocal >= 0 && jlocal < nlocal) - for (m = 0; m < nspecial[jlocal][1]; m++) - if (iglobal == onethree[jlocal][m]) { - dflag[jlocal][m] = 1; - break; - } - i += 2; - } -} - -/* ---------------------------------------------------------------------- - when receive buffer, scan list of 1,4 atoms looking for atoms I own - when find one, scan its 1-4 neigh list and mark I,J as in a dihedral -------------------------------------------------------------------------- */ - -void Special::ring_eight(int ndatum, char *cbuf, void *ptr) -{ - Special *sptr = (Special *) ptr; - Atom *atom = sptr->atom; - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; - - tagint **onefour = sptr->onefour; - int **dflag = sptr->dflag; - - tagint *buf = (tagint *) cbuf; - tagint iglobal,jglobal; - int i,m,ilocal,jlocal; - - i = 0; - while (i < ndatum) { - iglobal = buf[i]; - jglobal = buf[i+1]; - ilocal = atom->map(iglobal); - jlocal = atom->map(jglobal); - if (ilocal >= 0 && ilocal < nlocal) - for (m = 0; m < nspecial[ilocal][2]; m++) - if (jglobal == onefour[ilocal][m]) { - dflag[ilocal][m] = 1; - break; - } - if (jlocal >= 0 && jlocal < nlocal) - for (m = 0; m < nspecial[jlocal][2]; m++) - if (iglobal == onefour[jlocal][m]) { - dflag[jlocal][m] = 1; - break; - } - i += 2; - } + // flag = 1: outbuf = inbuf + + flag = 1; + return n; } /* ---------------------------------------------------------------------- @@ -1159,3 +1325,15 @@ void Special::fix_alteration() modify->fix[ifix]->rebuild_special(); } +/* ---------------------------------------------------------------------- + print timing output +------------------------------------------------------------------------- */ + +void Special::timer_output(double time1) +{ + double time2 = MPI_Wtime(); + if (comm->me == 0) { + if (screen) fprintf(screen," special bonds CPU = %g secs\n",time2-time1); + if (logfile) fprintf(logfile," special bonds CPU = %g secs\n",time2-time1); + } +} diff --git a/src/special.h b/src/special.h index 9f25200336..d02a8522f6 100644 --- a/src/special.h +++ b/src/special.h @@ -26,29 +26,43 @@ class Special : protected Pointers { private: int me,nprocs; + int maxall; tagint **onetwo,**onethree,**onefour; - // data used by ring callback methods + // data used by rendezvous callback methods - int *count; - int **dflag; + int nrvous; + tagint *atomIDs; + int *procowner; + + struct IDRvous { + int me; + tagint atomID; + }; + + struct PairRvous { + tagint atomID,partnerID; + }; + + // private methods + + void atom_owners(); + void onetwo_build_newton(); + void onetwo_build_newton_off(); + void onethree_build(); + void onefour_build(); void dedup(); void angle_trim(); void dihedral_trim(); void combine(); void fix_alteration(); + void timer_output(double); - // callback functions for ring communication + // callback functions for rendezvous communication - static void ring_one(int, char *, void *); - static void ring_two(int, char *, void *); - static void ring_three(int, char *, void *); - static void ring_four(int, char *, void *); - static void ring_five(int, char *, void *); - static void ring_six(int, char *, void *); - static void ring_seven(int, char *, void *); - static void ring_eight(int, char *, void *); + static int rendezvous_ids(int, char *, int &, int *&, char *&, void *); + static int rendezvous_pairs(int, char *, int &, int *&, char *&, void *); }; }