// clang-format off /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator https://www.lammps.org/, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "comm.h" #include "accelerator_kokkos.h" #include "atom.h" // IWYU pragma: keep #include "atom_vec.h" #include "bond.h" #include "compute.h" #include "domain.h" // IWYU pragma: keep #include "dump.h" #include "error.h" #include "fix.h" #include "force.h" #include "group.h" #include "irregular.h" #include "memory.h" // IWYU pragma: keep #include "modify.h" #include "neighbor.h" // IWYU pragma: keep #include "output.h" #include "pair.h" #include "procmap.h" #include "universe.h" #include "update.h" #include #ifdef _OPENMP #include #endif using namespace LAMMPS_NS; #define BUFEXTRA 1024 enum{ONELEVEL,TWOLEVEL,NUMA,CUSTOM}; enum{CART,CARTREORDER,XYZ}; /* ---------------------------------------------------------------------- */ Comm::Comm(LAMMPS *lmp) : Pointers(lmp) { MPI_Comm_rank(world,&me); MPI_Comm_size(world,&nprocs); mode = 0; bordergroup = 0; cutghostuser = 0.0; cutusermulti = nullptr; cutusermultiold = nullptr; ncollections = 0; ncollections_cutoff = 0; ghost_velocity = 0; user_procgrid[0] = user_procgrid[1] = user_procgrid[2] = 0; coregrid[0] = coregrid[1] = coregrid[2] = 1; gridflag = ONELEVEL; mapflag = CART; customfile = nullptr; outfile = nullptr; recv_from_partition = send_to_partition = -1; otherflag = 0; maxexchange = maxexchange_atom = maxexchange_fix = 0; maxexchange_fix_dynamic = 0; bufextra = BUFEXTRA; grid2proc = nullptr; xsplit = ysplit = zsplit = nullptr; rcbnew = 0; multi_reduce = 0; // use of OpenMP threads // query OpenMP for number of threads/process set by user at run-time // if the OMP_NUM_THREADS environment variable is not set, we default // to using 1 thread. This follows the principle of the least surprise, // while practically all OpenMP implementations violate it by using // as many threads as there are (virtual) CPU cores by default. nthreads = 1; #ifdef _OPENMP if (lmp->kokkos) { nthreads = lmp->kokkos->nthreads * lmp->kokkos->numa; } else if (getenv("OMP_NUM_THREADS") == nullptr) { nthreads = 1; if (me == 0) error->message(FLERR,"OMP_NUM_THREADS environment is not set. " "Defaulting to 1 thread."); } else { nthreads = omp_get_max_threads(); } // enforce consistent number of threads across all MPI tasks MPI_Bcast(&nthreads,1,MPI_INT,0,world); if (!lmp->kokkos) omp_set_num_threads(nthreads); if (me == 0) utils::logmesg(lmp," using {} OpenMP thread(s) per MPI task\n",nthreads); #endif } /* ---------------------------------------------------------------------- */ Comm::~Comm() { memory->destroy(grid2proc); memory->destroy(xsplit); memory->destroy(ysplit); memory->destroy(zsplit); memory->destroy(cutusermulti); memory->destroy(cutusermultiold); delete [] customfile; delete [] outfile; } /* ---------------------------------------------------------------------- deep copy of arrays from old Comm class to new one all public/protected vectors/arrays in parent Comm class must be copied called from alternate constructor of child classes when new comm style is created from Input ------------------------------------------------------------------------- */ void Comm::copy_arrays(Comm *oldcomm) { if (oldcomm->grid2proc) { memory->create(grid2proc,procgrid[0],procgrid[1],procgrid[2], "comm:grid2proc"); memcpy(&grid2proc[0][0][0],&oldcomm->grid2proc[0][0][0], (procgrid[0]*procgrid[1]*procgrid[2])*sizeof(int)); memory->create(xsplit,procgrid[0]+1,"comm:xsplit"); memory->create(ysplit,procgrid[1]+1,"comm:ysplit"); memory->create(zsplit,procgrid[2]+1,"comm:zsplit"); memcpy(xsplit,oldcomm->xsplit,(procgrid[0]+1)*sizeof(double)); memcpy(ysplit,oldcomm->ysplit,(procgrid[1]+1)*sizeof(double)); memcpy(zsplit,oldcomm->zsplit,(procgrid[2]+1)*sizeof(double)); } ncollections = oldcomm->ncollections; ncollections_cutoff = oldcomm->ncollections_cutoff; if (oldcomm->cutusermulti) { memory->create(cutusermulti,ncollections_cutoff,"comm:cutusermulti"); memcpy(cutusermulti,oldcomm->cutusermulti,ncollections_cutoff); } if (oldcomm->cutusermultiold) { memory->create(cutusermultiold,atom->ntypes+1,"comm:cutusermultiold"); memcpy(cutusermultiold,oldcomm->cutusermultiold,atom->ntypes+1); } if (customfile) customfile = utils::strdup(oldcomm->customfile); if (outfile) outfile = utils::strdup(oldcomm->outfile); } /* ---------------------------------------------------------------------- common to all Comm styles ------------------------------------------------------------------------- */ void Comm::init() { triclinic = domain->triclinic; map_style = atom->map_style; // check warn if any proc's subbox is smaller than neigh skin // since may lead to lost atoms in exchange() // really should check every exchange() in case box size is shrinking // but seems overkill to do that (fix balance does perform this check) domain->subbox_too_small_check(neighbor->skin); // comm_only = 1 if only x,f are exchanged in forward/reverse comm // comm_x_only = 0 if ghost_velocity since velocities are added comm_x_only = atom->avec->comm_x_only; comm_f_only = atom->avec->comm_f_only; if (ghost_velocity) comm_x_only = 0; // set per-atom sizes for forward/reverse/border comm // augment by velocity and fix quantities if needed size_forward = atom->avec->size_forward; size_reverse = atom->avec->size_reverse; size_border = atom->avec->size_border; if (ghost_velocity) size_forward += atom->avec->size_velocity; if (ghost_velocity) size_border += atom->avec->size_velocity; const auto &fix_list = modify->get_fix_list(); for (const auto &fix : fix_list) size_border += fix->comm_border; // per-atom limits for communication // maxexchange = max # of datums in exchange comm, set in exchange() // maxforward = # of datums in largest forward comm // maxreverse = # of datums in largest reverse comm // query pair,fix,compute,dump for their requirements // pair style can force reverse comm even if newton off maxforward = MAX(size_forward,size_border); maxreverse = size_reverse; if (force->pair) maxforward = MAX(maxforward,force->pair->comm_forward); if (force->pair) maxreverse = MAX(maxreverse,force->pair->comm_reverse); if (force->bond) maxforward = MAX(maxforward,force->bond->comm_forward); if (force->bond) maxreverse = MAX(maxreverse,force->bond->comm_reverse); for (const auto &fix : fix_list) { maxforward = MAX(maxforward, fix->comm_forward); maxreverse = MAX(maxreverse, fix->comm_reverse); } for (const auto &compute : modify->get_compute_list()) { maxforward = MAX(maxforward,compute->comm_forward); maxreverse = MAX(maxreverse,compute->comm_reverse); } for (const auto &dump: output->get_dump_list()) { maxforward = MAX(maxforward,dump->comm_forward); maxreverse = MAX(maxreverse,dump->comm_reverse); } if (force->newton == 0) maxreverse = 0; if (force->pair) maxreverse = MAX(maxreverse,force->pair->comm_reverse_off); if (force->bond) maxreverse = MAX(maxreverse,force->bond->comm_reverse_off); // maxexchange_atom = size of an exchanged atom, set by AtomVec // only needs to be set if size > BUFEXTRA // maxexchange_fix_dynamic = 1 if any fix sets its maxexchange dynamically maxexchange_atom = atom->avec->maxexchange; maxexchange_fix_dynamic = 0; for (const auto &fix : fix_list) if (fix->maxexchange_dynamic) maxexchange_fix_dynamic = 1; if ((mode == Comm::MULTI) && (neighbor->style != Neighbor::MULTI)) error->all(FLERR,"Cannot use comm mode multi without multi-style neighbor lists"); if (multi_reduce) { if (force->newton == 0) error->all(FLERR,"Cannot use multi/reduce communication with Newton off"); if (neighbor->any_full()) error->all(FLERR,"Cannot use multi/reduce communication with a full neighbor list"); if (mode != Comm::MULTI) error->all(FLERR,"Cannot use multi/reduce communication without mode multi"); } } /* ---------------------------------------------------------------------- set maxexchange based on AtomVec and fixes ------------------------------------------------------------------------- */ void Comm::init_exchange() { maxexchange_fix = 0; for (const auto &fix : modify->get_fix_list()) maxexchange_fix += fix->maxexchange; maxexchange = maxexchange_atom + maxexchange_fix; bufextra = maxexchange + BUFEXTRA; } /* ---------------------------------------------------------------------- modify communication params invoked from input script by comm_modify command ------------------------------------------------------------------------- */ void Comm::modify_params(int narg, char **arg) { if (narg < 1) utils::missing_cmd_args(FLERR, "comm_modify", error); int iarg = 0; while (iarg < narg) { if (strcmp(arg[iarg],"mode") == 0) { if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "comm_modify mode", error); if (strcmp(arg[iarg+1],"single") == 0) { // need to reset cutghostuser when switching comm mode if (mode == Comm::MULTI) cutghostuser = 0.0; if (mode == Comm::MULTIOLD) cutghostuser = 0.0; memory->destroy(cutusermulti); memory->destroy(cutusermultiold); mode = Comm::SINGLE; } else if (strcmp(arg[iarg+1],"multi") == 0) { if (neighbor->style != Neighbor::MULTI) error->all(FLERR,"Cannot use comm mode 'multi' without 'multi' style neighbor lists"); // need to reset cutghostuser when switching comm mode if (mode == Comm::SINGLE) cutghostuser = 0.0; if (mode == Comm::MULTIOLD) cutghostuser = 0.0; memory->destroy(cutusermultiold); mode = Comm::MULTI; } else if (strcmp(arg[iarg+1],"multi/old") == 0) { if (neighbor->style == Neighbor::MULTI) error->all(FLERR,"Cannot use comm mode 'multi/old' with 'multi' style neighbor lists"); // need to reset cutghostuser when switching comm mode if (mode == Comm::SINGLE) cutghostuser = 0.0; if (mode == Comm::MULTI) cutghostuser = 0.0; memory->destroy(cutusermulti); mode = Comm::MULTIOLD; } else error->all(FLERR,"Unknown comm_modify mode argument: {}", arg[iarg+1]); iarg += 2; } else if (strcmp(arg[iarg],"group") == 0) { if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "comm_modify group", error); bordergroup = group->find(arg[iarg+1]); if (bordergroup < 0) error->all(FLERR, "Invalid comm_modify keyword: group {} not found", arg[iarg+1]); if (bordergroup && ((atom->firstgroupname == nullptr) || strcmp(arg[iarg+1],atom->firstgroupname) != 0)) error->all(FLERR, "Comm_modify group != atom_modify first group: {}", atom->firstgroupname); iarg += 2; } else if (strcmp(arg[iarg],"cutoff") == 0) { if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "comm_modify cutoff", error); if (mode == Comm::MULTI) error->all(FLERR, "Use cutoff/multi keyword to set cutoff in multi mode"); if (mode == Comm::MULTIOLD) error->all(FLERR, "Use cutoff/multi/old keyword to set cutoff in multi mode"); cutghostuser = utils::numeric(FLERR,arg[iarg+1],false,lmp); if (cutghostuser < 0.0) error->all(FLERR,"Invalid cutoff {} in comm_modify command", arg[iarg+1]); iarg += 2; } else if (strcmp(arg[iarg],"cutoff/multi") == 0) { int i,nlo,nhi; double cut; if (mode == Comm::SINGLE) error->all(FLERR,"Use cutoff keyword to set cutoff in single mode"); if (mode == Comm::MULTIOLD) error->all(FLERR,"Use cutoff/multi/old keyword to set cutoff in multi/old mode"); if (domain->box_exist == 0) error->all(FLERR, "Cannot set cutoff/multi before simulation box is defined"); // Check if # of collections has changed, if so erase any previously defined cutoffs // Neighbor will reset ncollections if collections are redefined if (! cutusermulti || ncollections_cutoff != neighbor->ncollections) { ncollections_cutoff = neighbor->ncollections; memory->destroy(cutusermulti); memory->create(cutusermulti,ncollections_cutoff,"comm:cutusermulti"); for (i=0; i < ncollections_cutoff; ++i) cutusermulti[i] = -1.0; } utils::bounds(FLERR,arg[iarg+1],1,ncollections_cutoff,nlo,nhi,error); cut = utils::numeric(FLERR,arg[iarg+2],false,lmp); cutghostuser = MAX(cutghostuser,cut); if (cut < 0.0) error->all(FLERR,"Invalid cutoff {} in comm_modify command", arg[iarg+2]); // collections use 1-based indexing externally and 0-based indexing internally for (i=nlo; i<=nhi; ++i) cutusermulti[i-1] = cut; iarg += 3; } else if (strcmp(arg[iarg],"cutoff/multi/old") == 0) { int i,nlo,nhi; double cut; if (mode == Comm::SINGLE) error->all(FLERR,"Use cutoff keyword to set cutoff in single mode"); if (mode == Comm::MULTI) error->all(FLERR,"Use cutoff/multi keyword to set cutoff in multi mode"); if (domain->box_exist == 0) error->all(FLERR, "Cannot set cutoff/multi before simulation box is defined"); const int ntypes = atom->ntypes; if (iarg+3 > narg) utils::missing_cmd_args(FLERR, "comm_modify cutoff/multi/old", error); if (cutusermultiold == nullptr) { memory->create(cutusermultiold,ntypes+1,"comm:cutusermultiold"); for (i=0; i < ntypes+1; ++i) cutusermultiold[i] = -1.0; } utils::bounds(FLERR,arg[iarg+1],1,ntypes,nlo,nhi,error); cut = utils::numeric(FLERR,arg[iarg+2],false,lmp); cutghostuser = MAX(cutghostuser,cut); if (cut < 0.0) error->all(FLERR,"Invalid cutoff {} in comm_modify command", arg[iarg+2]); for (i=nlo; i<=nhi; ++i) cutusermultiold[i] = cut; iarg += 3; } else if (strcmp(arg[iarg],"reduce/multi") == 0) { if (mode == Comm::SINGLE) error->all(FLERR,"Use reduce/multi in mode multi only"); multi_reduce = 1; iarg += 1; } else if (strcmp(arg[iarg],"vel") == 0) { if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "comm_modify vel", error); ghost_velocity = utils::logical(FLERR,arg[iarg+1],false,lmp); iarg += 2; } else error->all(FLERR,"Unknown comm_modify keyword: {}", arg[iarg]); } } /* ---------------------------------------------------------------------- set dimensions for 3d grid of processors, and associated flags invoked from input script by processors command ------------------------------------------------------------------------- */ void Comm::set_processors(int narg, char **arg) { if (narg < 3) error->all(FLERR,"Illegal processors command"); if (strcmp(arg[0],"*") == 0) user_procgrid[0] = 0; else user_procgrid[0] = utils::inumeric(FLERR,arg[0],false,lmp); if (strcmp(arg[1],"*") == 0) user_procgrid[1] = 0; else user_procgrid[1] = utils::inumeric(FLERR,arg[1],false,lmp); if (strcmp(arg[2],"*") == 0) user_procgrid[2] = 0; else user_procgrid[2] = utils::inumeric(FLERR,arg[2],false,lmp); if (user_procgrid[0] < 0 || user_procgrid[1] < 0 || user_procgrid[2] < 0) error->all(FLERR,"Illegal processors command"); int p = user_procgrid[0]*user_procgrid[1]*user_procgrid[2]; if (p && p != nprocs) error->all(FLERR,"Specified processors != physical processors"); int iarg = 3; while (iarg < narg) { if (strcmp(arg[iarg],"grid") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal processors command"); if (strcmp(arg[iarg+1],"onelevel") == 0) { gridflag = ONELEVEL; } else if (strcmp(arg[iarg+1],"twolevel") == 0) { if (iarg+6 > narg) error->all(FLERR,"Illegal processors command"); gridflag = TWOLEVEL; ncores = utils::inumeric(FLERR,arg[iarg+2],false,lmp); if (strcmp(arg[iarg+3],"*") == 0) user_coregrid[0] = 0; else user_coregrid[0] = utils::inumeric(FLERR,arg[iarg+3],false,lmp); if (strcmp(arg[iarg+4],"*") == 0) user_coregrid[1] = 0; else user_coregrid[1] = utils::inumeric(FLERR,arg[iarg+4],false,lmp); if (strcmp(arg[iarg+5],"*") == 0) user_coregrid[2] = 0; else user_coregrid[2] = utils::inumeric(FLERR,arg[iarg+5],false,lmp); if (ncores <= 0 || user_coregrid[0] < 0 || user_coregrid[1] < 0 || user_coregrid[2] < 0) error->all(FLERR,"Illegal processors command"); iarg += 4; } else if (strcmp(arg[iarg+1],"numa") == 0) { gridflag = NUMA; } else if (strcmp(arg[iarg+1],"custom") == 0) { if (iarg+3 > narg) error->all(FLERR,"Illegal processors command"); gridflag = CUSTOM; delete [] customfile; customfile = utils::strdup(arg[iarg+2]); iarg += 1; } else error->all(FLERR,"Illegal processors command"); iarg += 2; } else if (strcmp(arg[iarg],"map") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal processors command"); if (strcmp(arg[iarg+1],"cart") == 0) mapflag = CART; else if (strcmp(arg[iarg+1],"cart/reorder") == 0) mapflag = CARTREORDER; else if (strcmp(arg[iarg+1],"xyz") == 0 || strcmp(arg[iarg+1],"xzy") == 0 || strcmp(arg[iarg+1],"yxz") == 0 || strcmp(arg[iarg+1],"yzx") == 0 || strcmp(arg[iarg+1],"zxy") == 0 || strcmp(arg[iarg+1],"zyx") == 0) { mapflag = XYZ; strncpy(xyz,arg[iarg+1],3); } else error->all(FLERR,"Illegal processors command"); iarg += 2; } else if (strcmp(arg[iarg],"part") == 0) { if (iarg+4 > narg) error->all(FLERR,"Illegal processors command"); if (universe->nworlds == 1) error->all(FLERR, "Cannot use processors part command " "without using partitions"); int isend = utils::inumeric(FLERR,arg[iarg+1],false,lmp); int irecv = utils::inumeric(FLERR,arg[iarg+2],false,lmp); if (isend < 1 || isend > universe->nworlds || irecv < 1 || irecv > universe->nworlds || isend == irecv) error->all(FLERR,"Invalid partitions in processors part command"); if (isend-1 == universe->iworld) { if (send_to_partition >= 0) error->all(FLERR, "Sending partition in processors part command " "is already a sender"); send_to_partition = irecv-1; } if (irecv-1 == universe->iworld) { if (recv_from_partition >= 0) error->all(FLERR, "Receiving partition in processors part command " "is already a receiver"); recv_from_partition = isend-1; } // only receiver has otherflag dependency if (strcmp(arg[iarg+3],"multiple") == 0) { if (universe->iworld == irecv-1) { otherflag = 1; other_style = Comm::MULTIPLE; } } else error->all(FLERR,"Illegal processors command"); iarg += 4; } else if (strcmp(arg[iarg],"file") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal processors command"); delete [] outfile; outfile = utils::strdup(arg[iarg+1]); iarg += 2; } else error->all(FLERR,"Illegal processors command"); } // error checks if (gridflag == NUMA && mapflag != CART) error->all(FLERR,"Processors grid numa and map style are incompatible"); if (otherflag && (gridflag == NUMA || gridflag == CUSTOM)) error->all(FLERR, "Processors part option and grid style are incompatible"); } /* ---------------------------------------------------------------------- create a 3d grid of procs based on Nprocs and box size & shape map processors to grid, setup xyz split for a uniform grid ------------------------------------------------------------------------- */ void Comm::set_proc_grid(int outflag) { // recv 3d proc grid of another partition if my 3d grid depends on it if (recv_from_partition >= 0) { if (me == 0) { MPI_Recv(other_procgrid,3,MPI_INT, universe->root_proc[recv_from_partition],0, universe->uworld,MPI_STATUS_IGNORE); MPI_Recv(other_coregrid,3,MPI_INT, universe->root_proc[recv_from_partition],0, universe->uworld,MPI_STATUS_IGNORE); } MPI_Bcast(other_procgrid,3,MPI_INT,0,world); MPI_Bcast(other_coregrid,3,MPI_INT,0,world); } // create ProcMap class to create 3d grid and map procs to it auto pmap = new ProcMap(lmp); // create 3d grid of processors // produces procgrid and coregrid (if relevant) if (gridflag == ONELEVEL) { pmap->onelevel_grid(nprocs,user_procgrid,procgrid, otherflag,other_style,other_procgrid,other_coregrid); } else if (gridflag == TWOLEVEL) { pmap->twolevel_grid(nprocs,user_procgrid,procgrid, ncores,user_coregrid,coregrid, otherflag,other_style,other_procgrid,other_coregrid); } else if (gridflag == NUMA) { pmap->numa_grid(nprocs,user_procgrid,procgrid,coregrid); } else if (gridflag == CUSTOM) { pmap->custom_grid(customfile,nprocs,user_procgrid,procgrid); } // error check on procgrid // should not be necessary due to ProcMap if (procgrid[0]*procgrid[1]*procgrid[2] != nprocs) error->all(FLERR,"Bad grid of processors"); if (domain->dimension == 2 && procgrid[2] != 1) error->all(FLERR,"Processor count in z must be 1 for 2d simulation"); // grid2proc[i][j][k] = proc that owns i,j,k location in 3d grid if (grid2proc) memory->destroy(grid2proc); memory->create(grid2proc,procgrid[0],procgrid[1],procgrid[2], "comm:grid2proc"); // map processor IDs to 3d processor grid // produces myloc, procneigh, grid2proc if (gridflag == ONELEVEL) { if (mapflag == CART) pmap->cart_map(0,procgrid,myloc,procneigh,grid2proc); else if (mapflag == CARTREORDER) pmap->cart_map(1,procgrid,myloc,procneigh,grid2proc); else if (mapflag == XYZ) pmap->xyz_map(xyz,procgrid,myloc,procneigh,grid2proc); } else if (gridflag == TWOLEVEL) { if (mapflag == CART) pmap->cart_map(0,procgrid,ncores,coregrid,myloc,procneigh,grid2proc); else if (mapflag == CARTREORDER) pmap->cart_map(1,procgrid,ncores,coregrid,myloc,procneigh,grid2proc); else if (mapflag == XYZ) pmap->xyz_map(xyz,procgrid,ncores,coregrid,myloc,procneigh,grid2proc); } else if (gridflag == NUMA) { pmap->numa_map(0,coregrid,myloc,procneigh,grid2proc); } else if (gridflag == CUSTOM) { pmap->custom_map(procgrid,myloc,procneigh,grid2proc); } // print 3d grid info to screen and logfile if (outflag && me == 0) { auto mesg = fmt::format(" {} by {} by {} MPI processor grid\n", procgrid[0],procgrid[1],procgrid[2]); if (gridflag == NUMA || gridflag == TWOLEVEL) mesg += fmt::format(" {} by {} by {} core grid within node\n", coregrid[0],coregrid[1],coregrid[2]); utils::logmesg(lmp,mesg); } // print 3d grid details to outfile if (outfile) pmap->output(outfile,procgrid,grid2proc); // free ProcMap class delete pmap; // set xsplit,ysplit,zsplit for uniform spacings memory->destroy(xsplit); memory->destroy(ysplit); memory->destroy(zsplit); memory->create(xsplit,procgrid[0]+1,"comm:xsplit"); memory->create(ysplit,procgrid[1]+1,"comm:ysplit"); memory->create(zsplit,procgrid[2]+1,"comm:zsplit"); for (int i = 0; i < procgrid[0]; i++) xsplit[i] = i * 1.0/procgrid[0]; for (int i = 0; i < procgrid[1]; i++) ysplit[i] = i * 1.0/procgrid[1]; for (int i = 0; i < procgrid[2]; i++) zsplit[i] = i * 1.0/procgrid[2]; xsplit[procgrid[0]] = ysplit[procgrid[1]] = zsplit[procgrid[2]] = 1.0; // set lamda box params after procs are assigned // only set once unless load-balancing occurs if (domain->triclinic) domain->set_lamda_box(); // send my 3d proc grid to another partition if requested if (send_to_partition >= 0) { if (me == 0) { MPI_Send(procgrid,3,MPI_INT, universe->root_proc[send_to_partition],0, universe->uworld); MPI_Send(coregrid,3,MPI_INT, universe->root_proc[send_to_partition],0, universe->uworld); } } } /* ---------------------------------------------------------------------- determine suitable communication cutoff. this uses three inputs: 1) maximum neighborlist cutoff, 2) an estimate based on bond lengths and bonded interaction styles present, and 3) a user supplied communication cutoff. the neighbor list cutoff (1) is *always* used, since it is a requirement for neighborlists working correctly. the bond length based cutoff is *only* used, if no pair style is defined and no user cutoff is provided. otherwise, a warning is printed. if the bond length based estimate is larger than what is used. print a warning, if a user specified communication cutoff is overridden. ------------------------------------------------------------------------- */ double Comm::get_comm_cutoff() { double maxcommcutoff, maxbondcutoff = 0.0; if (force->bond) { int n = atom->nbondtypes; for (int i = 1; i <= n; ++i) maxbondcutoff = MAX(maxbondcutoff,force->bond->equilibrium_distance(i)); // apply bond length based heuristics. if (force->newton_bond) { if (force->dihedral || force->improper) { maxbondcutoff *= 2.25; } else { maxbondcutoff *=1.5; } } else { if (force->dihedral || force->improper) { maxbondcutoff *= 3.125; } else if (force->angle) { maxbondcutoff *= 2.25; } else { maxbondcutoff *=1.5; } } maxbondcutoff += neighbor->skin; } // always take the larger of max neighbor list and user specified cutoff maxcommcutoff = MAX(cutghostuser,neighbor->cutneighmax); // use cutoff estimate from bond length only if no user specified // cutoff was given and no pair style present. Otherwise print a // warning, if the estimated bond based cutoff is larger than what // is currently used. if (!force->pair && (cutghostuser == 0.0)) { maxcommcutoff = MAX(maxcommcutoff,maxbondcutoff); } else { if ((me == 0) && (maxbondcutoff > maxcommcutoff)) error->warning(FLERR,"Communication cutoff {} is shorter than a bond " "length based estimate of {}. This may lead to errors.", maxcommcutoff,maxbondcutoff); } // print warning if neighborlist cutoff overrides user cutoff if ((me == 0) && (update->setupflag == 1)) { if ((cutghostuser > 0.0) && (maxcommcutoff > cutghostuser)) error->warning(FLERR,"Communication cutoff adjusted to {}",maxcommcutoff); } // check maximum interval size for neighbor multi if (neighbor->interval_collection_flag) { for (int i = 0; i < neighbor->ncollections; i++){ maxcommcutoff = MAX(maxcommcutoff, neighbor->collection2cut[i]); } } return maxcommcutoff; } /* ---------------------------------------------------------------------- determine which proc owns atom with coord x[3] based on current decomp x will be in box (orthogonal) or lamda coords (triclinic) if layout = UNIFORM, calculate owning proc directly if layout = NONUNIFORM, iteratively find owning proc via binary search if layout = TILED, CommTiled has its own method return owning proc ID via grid2proc return igx,igy,igz = logical grid loc of owing proc within 3d grid of procs ------------------------------------------------------------------------- */ int Comm::coord2proc(double *x, int &igx, int &igy, int &igz) { double *prd = domain->prd; double *boxlo = domain->boxlo; // initialize triclinic b/c coord2proc can be called before Comm::init() // via Irregular::migrate_atoms() triclinic = domain->triclinic; if (layout == Comm::LAYOUT_UNIFORM) { if (triclinic == 0) { igx = static_cast (procgrid[0] * (x[0]-boxlo[0]) / prd[0]); igy = static_cast (procgrid[1] * (x[1]-boxlo[1]) / prd[1]); igz = static_cast (procgrid[2] * (x[2]-boxlo[2]) / prd[2]); } else { igx = static_cast (procgrid[0] * x[0]); igy = static_cast (procgrid[1] * x[1]); igz = static_cast (procgrid[2] * x[2]); } } else if (layout == Comm::LAYOUT_NONUNIFORM) { if (triclinic == 0) { igx = utils::binary_search((x[0]-boxlo[0])/prd[0],procgrid[0],xsplit); igy = utils::binary_search((x[1]-boxlo[1])/prd[1],procgrid[1],ysplit); igz = utils::binary_search((x[2]-boxlo[2])/prd[2],procgrid[2],zsplit); } else { igx = utils::binary_search(x[0],procgrid[0],xsplit); igy = utils::binary_search(x[1],procgrid[1],ysplit); igz = utils::binary_search(x[2],procgrid[2],zsplit); } } if (igx < 0) igx = 0; if (igx >= procgrid[0]) igx = procgrid[0] - 1; if (igy < 0) igy = 0; if (igy >= procgrid[1]) igy = procgrid[1] - 1; if (igz < 0) igz = 0; if (igz >= procgrid[2]) igz = procgrid[2] - 1; return grid2proc[igx][igy][igz]; } /* ---------------------------------------------------------------------- partition a global regular grid into one brick-shaped sub-grid per proc if grid point is inside my sub-domain I own it, this includes sub-domain lo boundary but excludes hi boundary nx,ny,nz = extent of global grid indices into the global grid range from 0 to N-1 in each dim zfactor = 0.0 if the grid exactly covers the simulation box zfactor > 1.0 if the grid extends beyond the +z boundary by this factor used by 2d slab-mode PPPM this effectively maps proc sub-grids to a smaller subset of the grid nxyz lo/hi = inclusive lo/hi bounds of global grid sub-brick I own if proc owns no grid cells in a dim, then nlo > nhi special case: 2 procs share boundary which a grid point is exactly on 2 equality if tests insure a consistent decision as to which proc owns it ------------------------------------------------------------------------- */ void Comm::partition_grid(int nx, int ny, int nz, double zfactor, int &nxlo, int &nxhi, int &nylo, int &nyhi, int &nzlo, int &nzhi) { double xfraclo,xfrachi,yfraclo,yfrachi,zfraclo,zfrachi; if (layout != LAYOUT_TILED) { xfraclo = xsplit[myloc[0]]; xfrachi = xsplit[myloc[0]+1]; yfraclo = ysplit[myloc[1]]; yfrachi = ysplit[myloc[1]+1]; zfraclo = zsplit[myloc[2]]; zfrachi = zsplit[myloc[2]+1]; } else { xfraclo = mysplit[0][0]; xfrachi = mysplit[0][1]; yfraclo = mysplit[1][0]; yfrachi = mysplit[1][1]; zfraclo = mysplit[2][0]; zfrachi = mysplit[2][1]; } nxlo = static_cast (xfraclo * nx); if (1.0*nxlo != xfraclo*nx) nxlo++; nxhi = static_cast (xfrachi * nx); if (1.0*nxhi == xfrachi*nx) nxhi--; nylo = static_cast (yfraclo * ny); if (1.0*nylo != yfraclo*ny) nylo++; nyhi = static_cast (yfrachi * ny); if (1.0*nyhi == yfrachi*ny) nyhi--; if (zfactor == 0.0) { nzlo = static_cast (zfraclo * nz); if (1.0*nzlo != zfraclo*nz) nzlo++; nzhi = static_cast (zfrachi * nz); if (1.0*nzhi == zfrachi*nz) nzhi--; } else { nzlo = static_cast (zfraclo * nz/zfactor); if (1.0*nzlo != zfraclo*nz) nzlo++; nzhi = static_cast (zfrachi * nz/zfactor); if (1.0*nzhi == zfrachi*nz) nzhi--; } // OLD code // could sometimes map grid points slightly outside a proc to the proc /* if (layout != LAYOUT_TILED) { nxlo = static_cast (xsplit[myloc[0]] * nx); nxhi = static_cast (xsplit[myloc[0]+1] * nx) - 1; nylo = static_cast (ysplit[myloc[1]] * ny); nyhi = static_cast (ysplit[myloc[1]+1] * ny) - 1; if (zfactor == 0.0) { nzlo = static_cast (zsplit[myloc[2]] * nz); nzhi = static_cast (zsplit[myloc[2]+1] * nz) - 1; } else { nzlo = static_cast (zsplit[myloc[2]] * nz/zfactor); nzhi = static_cast (zsplit[myloc[2]+1] * nz/zfactor) - 1; } } else { nxlo = static_cast (mysplit[0][0] * nx); nxhi = static_cast (mysplit[0][1] * nx) - 1; nylo = static_cast (mysplit[1][0] * ny); nyhi = static_cast (mysplit[1][1] * ny) - 1; if (zfactor == 0.0) { nzlo = static_cast (mysplit[2][0] * nz); nzhi = static_cast (mysplit[2][1] * nz) - 1; } else { nzlo = static_cast (mysplit[2][0] * nz/zfactor); nzhi = static_cast (mysplit[2][1] * nz/zfactor) - 1; } } */ } /* ---------------------------------------------------------------------- communicate inbuf around full ring of processors with messtag nbytes = size of inbuf = n datums * nper bytes callback() is invoked to allow caller to process/update each proc's inbuf if self=1 (default), then callback() is invoked on final iteration using original inbuf, which may have been updated for non-nullptr outbuf, final updated inbuf is copied to it ok to specify outbuf = inbuf the ptr argument is a pointer to the instance of calling class ------------------------------------------------------------------------- */ void Comm::ring(int n, int nper, void *inbuf, int messtag, void (*callback)(int, char *, void *), void *outbuf, void *ptr, int self) { MPI_Request request; MPI_Status status; int nbytes = n*nper; int maxbytes; MPI_Allreduce(&nbytes,&maxbytes,1,MPI_INT,MPI_MAX,world); // no need to communicate without data if (maxbytes == 0) return; // sanity check if ((nbytes > 0) && inbuf == nullptr) error->one(FLERR,"Cannot put data on ring from NULL pointer"); char *buf,*bufcopy; memory->create(buf,maxbytes,"comm:buf"); memory->create(bufcopy,maxbytes,"comm:bufcopy"); if (nbytes && inbuf) memcpy(buf,inbuf,nbytes); int next = me + 1; int prev = me - 1; if (next == nprocs) next = 0; if (prev < 0) prev = nprocs - 1; for (int loop = 0; loop < nprocs; loop++) { if (me != next) { MPI_Irecv(bufcopy,maxbytes,MPI_CHAR,prev,messtag,world,&request); MPI_Send(buf,nbytes,MPI_CHAR,next,messtag,world); MPI_Wait(&request,&status); MPI_Get_count(&status,MPI_CHAR,&nbytes); if (nbytes) memcpy(buf,bufcopy,nbytes); } if (self || loop < nprocs-1) callback(nbytes/nper,buf,ptr); } if (nbytes && outbuf) memcpy(outbuf,buf,nbytes); memory->destroy(buf); memory->destroy(bufcopy); } /* ---------------------------------------------------------------------- rendezvous communication operation three stages: first comm sends inbuf from caller decomp to rvous decomp callback operates on data in rendezvous decomp second comm sends outbuf from rvous decomp back to caller decomp inputs: which = perform (0) irregular or (1) MPI_All2allv communication n = # of datums in inbuf inbuf = vector of input datums insize = byte size of each input datum inorder = 0 for inbuf in random proc order, 1 for datums ordered by proc procs: inorder 0 = proc to send each datum to, 1 = # of datums/proc, callback = caller function to invoke in rendezvous decomposition takes input datums, returns output datums outorder = same as inorder, but for datums returned by callback() ptr = pointer to caller class, passed to callback() outputs: nout = # of output datums (function return) outbuf = vector of output datums outsize = byte size of each output datum callback inputs: nrvous = # of rvous decomp datums in inbuf_rvous inbuf_rvous = vector of rvous decomp input datums ptr = pointer to caller class callback outputs: nrvous_out = # of rvous decomp output datums (function return) flag = 0 for no second comm, 1 for outbuf_rvous = inbuf_rvous, 2 for second comm with new outbuf_rvous procs_rvous = outorder 0 = proc to send each datum to, 1 = # of datums/proc allocated outbuf_rvous = vector of rvous decomp output datums NOTE: could use MPI_INT or MPI_DOUBLE insead of MPI_CHAR to avoid checked-for overflow in MPI_Alltoallv? ------------------------------------------------------------------------- */ int Comm:: rendezvous(int which, int n, char *inbuf, int insize, int inorder, int *procs, int (*callback)(int, char *, int &, int *&, char *&, void *), int outorder, char *&outbuf, int outsize, void *ptr, int statflag) { if (which == 0) return rendezvous_irregular(n,inbuf,insize,inorder,procs,callback, outorder,outbuf,outsize,ptr,statflag); else return rendezvous_all2all(n,inbuf,insize,inorder,procs,callback, outorder,outbuf,outsize,ptr,statflag); } /* ---------------------------------------------------------------------- */ int Comm:: rendezvous_irregular(int n, char *inbuf, int insize, int inorder, int *procs, int (*callback)(int, char *, int &, int *&, char *&, void *), int outorder, char *&outbuf, int outsize, void *ptr, int statflag) { // irregular comm of inbuf from caller decomp to rendezvous decomp auto irregular = new Irregular(lmp); int nrvous; if (inorder) nrvous = irregular->create_data_grouped(n,procs); else nrvous = irregular->create_data(n,procs); // add 1 item to the allocated buffer size, so the returned pointer is not a null pointer auto inbuf_rvous = (char *) memory->smalloc((bigint) nrvous*insize+1, "rendezvous:inbuf"); irregular->exchange_data(inbuf,insize,inbuf_rvous); bigint irregular1_bytes = irregular->memory_usage(); irregular->destroy_data(); delete irregular; // peform rendezvous computation via callback() // callback() allocates/populates proclist_rvous and outbuf_rvous int flag; int *procs_rvous; char *outbuf_rvous; int nrvous_out = callback(nrvous,inbuf_rvous,flag, procs_rvous,outbuf_rvous,ptr); if (flag != 1) memory->sfree(inbuf_rvous); // outbuf_rvous = inbuf_vous if (flag == 0) { if (statflag) rendezvous_stats(n,0,nrvous,nrvous_out,insize,outsize, (bigint) nrvous_out*sizeof(int) + irregular1_bytes); return 0; // all nout_rvous are 0, no 2nd comm stage } // irregular comm of outbuf from rendezvous decomp back to caller decomp // caller will free outbuf irregular = new Irregular(lmp); int nout; if (outorder) nout = irregular->create_data_grouped(nrvous_out,procs_rvous); else nout = irregular->create_data(nrvous_out,procs_rvous); // add 1 item to the allocated buffer size, so the returned pointer is not a null pointer outbuf = (char *) memory->smalloc((bigint) nout*outsize+1, "rendezvous:outbuf"); irregular->exchange_data(outbuf_rvous,outsize,outbuf); bigint irregular2_bytes = irregular->memory_usage(); irregular->destroy_data(); delete irregular; memory->destroy(procs_rvous); memory->sfree(outbuf_rvous); // return number of output datums // last arg to stats() = memory for procs_rvous + irregular comm if (statflag) rendezvous_stats(n,nout,nrvous,nrvous_out,insize,outsize, (bigint) nrvous_out*sizeof(int) + MAX(irregular1_bytes,irregular2_bytes)); return nout; } /* ---------------------------------------------------------------------- */ int Comm:: rendezvous_all2all(int n, char *inbuf, int insize, int inorder, int *procs, int (*callback)(int, char *, int &, int *&, char *&, void *), int outorder, char *&outbuf, int outsize, void *ptr, int statflag) { int iproc; bigint all2all1_bytes,all2all2_bytes; int *sendcount,*sdispls,*recvcount,*rdispls; int *procs_a2a; bigint *offsets; char *inbuf_a2a,*outbuf_a2a; // create procs and inbuf for All2all if necessary if (!inorder) { memory->create(procs_a2a,nprocs,"rendezvous:procs"); // add 1 item to the allocated buffer size, so the returned pointer is not a null pointer inbuf_a2a = (char *) memory->smalloc((bigint) n*insize+1, "rendezvous:inbuf"); memset(inbuf_a2a,0,(bigint)n*insize*sizeof(char)); memory->create(offsets,nprocs,"rendezvous:offsets"); for (int i = 0; i < nprocs; i++) procs_a2a[i] = 0; for (int i = 0; i < n; i++) procs_a2a[procs[i]]++; offsets[0] = 0; for (int i = 1; i < nprocs; i++) offsets[i] = offsets[i-1] + (bigint)insize*procs_a2a[i-1]; bigint offset = 0; for (int i = 0; i < n; i++) { iproc = procs[i]; memcpy(&inbuf_a2a[offsets[iproc]],&inbuf[offset],insize); offsets[iproc] += insize; offset += insize; } all2all1_bytes = nprocs*sizeof(int) + nprocs*sizeof(bigint) + (bigint)n*insize; } else { procs_a2a = procs; inbuf_a2a = inbuf; all2all1_bytes = 0; } // create args for MPI_Alltoallv() on input data memory->create(sendcount,nprocs,"rendezvous:sendcount"); memcpy(sendcount,procs_a2a,nprocs*sizeof(int)); memory->create(recvcount,nprocs,"rendezvous:recvcount"); MPI_Alltoall(sendcount,1,MPI_INT,recvcount,1,MPI_INT,world); memory->create(sdispls,nprocs,"rendezvous:sdispls"); memory->create(rdispls,nprocs,"rendezvous:rdispls"); sdispls[0] = rdispls[0] = 0; for (int i = 1; i < nprocs; i++) { sdispls[i] = sdispls[i-1] + sendcount[i-1]; rdispls[i] = rdispls[i-1] + recvcount[i-1]; } int nrvous = rdispls[nprocs-1] + recvcount[nprocs-1]; // test for overflow of input data due to imbalance or insize // means that individual sdispls or rdispls values overflow int overflow = 0; if ((bigint) n*insize > MAXSMALLINT) overflow = 1; if ((bigint) nrvous*insize > MAXSMALLINT) overflow = 1; int overflowall; MPI_Allreduce(&overflow,&overflowall,1,MPI_INT,MPI_MAX,world); if (overflowall) error->all(FLERR,"Overflow input size in rendezvous_a2a"); for (int i = 0; i < nprocs; i++) { sendcount[i] *= insize; sdispls[i] *= insize; recvcount[i] *= insize; rdispls[i] *= insize; } // all2all comm of inbuf from caller decomp to rendezvous decomp // add 1 item to the allocated buffer size, so the returned pointer is not a null pointer auto inbuf_rvous = (char *) memory->smalloc((bigint) nrvous*insize+1, "rendezvous:inbuf"); memset(inbuf_rvous,0,(bigint) nrvous*insize*sizeof(char)); MPI_Alltoallv(inbuf_a2a,sendcount,sdispls,MPI_CHAR, inbuf_rvous,recvcount,rdispls,MPI_CHAR,world); if (!inorder) { memory->destroy(procs_a2a); memory->sfree(inbuf_a2a); memory->destroy(offsets); } // peform rendezvous computation via callback() // callback() allocates/populates proclist_rvous and outbuf_rvous int flag; int *procs_rvous; char *outbuf_rvous; int nrvous_out = callback(nrvous,inbuf_rvous,flag, procs_rvous,outbuf_rvous,ptr); if (flag != 1) memory->sfree(inbuf_rvous); // outbuf_rvous = inbuf_vous if (flag == 0) { memory->destroy(sendcount); memory->destroy(recvcount); memory->destroy(sdispls); memory->destroy(rdispls); if (statflag) rendezvous_stats(n,0,nrvous,nrvous_out,insize,outsize, (bigint) nrvous_out*sizeof(int) + 4*nprocs*sizeof(int) + all2all1_bytes); return 0; // all nout_rvous are 0, no 2nd irregular } // create procs and outbuf for All2all if necessary if (!outorder) { memory->create(procs_a2a,nprocs,"rendezvous_a2a:procs"); // add 1 item to the allocated buffer size, so the returned pointer is not a null pointer outbuf_a2a = (char *) memory->smalloc((bigint) nrvous_out*outsize+1, "rendezvous:outbuf"); memory->create(offsets,nprocs,"rendezvous:offsets"); for (int i = 0; i < nprocs; i++) procs_a2a[i] = 0; for (int i = 0; i < nrvous_out; i++) procs_a2a[procs_rvous[i]]++; offsets[0] = 0; for (int i = 1; i < nprocs; i++) offsets[i] = offsets[i-1] + (bigint)outsize*procs_a2a[i-1]; bigint offset = 0; for (int i = 0; i < nrvous_out; i++) { iproc = procs_rvous[i]; memcpy(&outbuf_a2a[offsets[iproc]],&outbuf_rvous[offset],outsize); offsets[iproc] += outsize; offset += outsize; } all2all2_bytes = nprocs*sizeof(int) + nprocs*sizeof(bigint) + (bigint)nrvous_out*outsize; } else { procs_a2a = procs_rvous; outbuf_a2a = outbuf_rvous; all2all2_bytes = 0; } // comm outbuf from rendezvous decomposition back to caller memcpy(sendcount,procs_a2a,nprocs*sizeof(int)); MPI_Alltoall(sendcount,1,MPI_INT,recvcount,1,MPI_INT,world); sdispls[0] = rdispls[0] = 0; for (int i = 1; i < nprocs; i++) { sdispls[i] = sdispls[i-1] + sendcount[i-1]; rdispls[i] = rdispls[i-1] + recvcount[i-1]; } int nout = rdispls[nprocs-1] + recvcount[nprocs-1]; // test for overflow of outbuf due to imbalance or outsize // means that individual sdispls or rdispls values overflow overflow = 0; if ((bigint) nrvous*outsize > MAXSMALLINT) overflow = 1; if ((bigint) nout*outsize > MAXSMALLINT) overflow = 1; MPI_Allreduce(&overflow,&overflowall,1,MPI_INT,MPI_MAX,world); if (overflowall) error->all(FLERR,"Overflow output in rendezvous_a2a"); for (int i = 0; i < nprocs; i++) { sendcount[i] *= outsize; sdispls[i] *= outsize; recvcount[i] *= outsize; rdispls[i] *= outsize; } // all2all comm of outbuf from rendezvous decomp back to caller decomp // caller will free outbuf // add 1 item to the allocated buffer size, so the returned pointer is not a null pointer outbuf = (char *) memory->smalloc((bigint) nout*outsize+1,"rendezvous:outbuf"); MPI_Alltoallv(outbuf_a2a,sendcount,sdispls,MPI_CHAR, outbuf,recvcount,rdispls,MPI_CHAR,world); memory->destroy(procs_rvous); memory->sfree(outbuf_rvous); if (!outorder) { memory->destroy(procs_a2a); memory->sfree(outbuf_a2a); memory->destroy(offsets); } // clean up memory->destroy(sendcount); memory->destroy(recvcount); memory->destroy(sdispls); memory->destroy(rdispls); // return number of output datums // last arg to stats() = mem for procs_rvous + per-proc vecs + reordering ops if (statflag) rendezvous_stats(n,nout,nrvous,nrvous_out,insize,outsize, (bigint) nrvous_out*sizeof(int) + 4*nprocs*sizeof(int) + MAX(all2all1_bytes,all2all2_bytes)); return nout; } /* ---------------------------------------------------------------------- print balance and memory info for rendezvous operation useful for debugging ------------------------------------------------------------------------- */ void Comm::rendezvous_stats(int n, int nout, int nrvous, int nrvous_out, int insize, int outsize, bigint commsize) { bigint size_in_all,size_in_max,size_in_min; bigint size_out_all,size_out_max,size_out_min; bigint size_inrvous_all,size_inrvous_max,size_inrvous_min; bigint size_outrvous_all,size_outrvous_max,size_outrvous_min; bigint size_comm_all,size_comm_max,size_comm_min; bigint size = (bigint) n*insize; MPI_Allreduce(&size,&size_in_all,1,MPI_LMP_BIGINT,MPI_SUM,world); MPI_Allreduce(&size,&size_in_max,1,MPI_LMP_BIGINT,MPI_MAX,world); MPI_Allreduce(&size,&size_in_min,1,MPI_LMP_BIGINT,MPI_MIN,world); size = (bigint) nout*outsize; MPI_Allreduce(&size,&size_out_all,1,MPI_LMP_BIGINT,MPI_SUM,world); MPI_Allreduce(&size,&size_out_max,1,MPI_LMP_BIGINT,MPI_MAX,world); MPI_Allreduce(&size,&size_out_min,1,MPI_LMP_BIGINT,MPI_MIN,world); size = (bigint) nrvous*insize; MPI_Allreduce(&size,&size_inrvous_all,1,MPI_LMP_BIGINT,MPI_SUM,world); MPI_Allreduce(&size,&size_inrvous_max,1,MPI_LMP_BIGINT,MPI_MAX,world); MPI_Allreduce(&size,&size_inrvous_min,1,MPI_LMP_BIGINT,MPI_MIN,world); size = (bigint) nrvous_out*insize; MPI_Allreduce(&size,&size_outrvous_all,1,MPI_LMP_BIGINT,MPI_SUM,world); MPI_Allreduce(&size,&size_outrvous_max,1,MPI_LMP_BIGINT,MPI_MAX,world); MPI_Allreduce(&size,&size_outrvous_min,1,MPI_LMP_BIGINT,MPI_MIN,world); size = commsize; MPI_Allreduce(&size,&size_comm_all,1,MPI_LMP_BIGINT,MPI_SUM,world); MPI_Allreduce(&size,&size_comm_max,1,MPI_LMP_BIGINT,MPI_MAX,world); MPI_Allreduce(&size,&size_comm_min,1,MPI_LMP_BIGINT,MPI_MIN,world); int mbytes = 1024*1024; if (me == 0) { std::string mesg = "Rendezvous balance and memory info: (tot,ave,max,min) \n"; mesg += fmt::format(" input datum count: {} {} {} {}\n", size_in_all/insize,1.0*size_in_all/nprocs/insize, size_in_max/insize,size_in_min/insize); mesg += fmt::format(" input data (MB): {:.6} {:.6} {:.6} {:.6}\n", 1.0*size_in_all/mbytes,1.0*size_in_all/nprocs/mbytes, 1.0*size_in_max/mbytes,1.0*size_in_min/mbytes); if (outsize) mesg += fmt::format(" output datum count: {} {} {} {}\n", size_out_all/outsize,1.0*size_out_all/nprocs/outsize, size_out_max/outsize,size_out_min/outsize); else mesg += fmt::format(" output datum count: {} {:.6} {} {}\n",0,0.0,0,0); mesg += fmt::format(" output data (MB): {:.6} {:.6} {:.6} {:.6}\n", 1.0*size_out_all/mbytes,1.0*size_out_all/nprocs/mbytes, 1.0*size_out_max/mbytes,1.0*size_out_min/mbytes); mesg += fmt::format(" input rvous datum count: {} {} {} {}\n", size_inrvous_all/insize,1.0*size_inrvous_all/nprocs/insize, size_inrvous_max/insize,size_inrvous_min/insize); mesg += fmt::format(" input rvous data (MB): {:.6} {:.6} {:.6} {:.6}\n", 1.0*size_inrvous_all/mbytes,1.0*size_inrvous_all/nprocs/mbytes, 1.0*size_inrvous_max/mbytes,1.0*size_inrvous_min/mbytes); if (outsize) mesg += fmt::format(" output rvous datum count: {} {} {} {}\n", size_outrvous_all/outsize,1.0*size_outrvous_all/nprocs/outsize, size_outrvous_max/outsize,size_outrvous_min/outsize); else mesg += fmt::format(" output rvous datum count: {} {:.6} {} {}\n",0,0.0,0,0); mesg += fmt::format(" output rvous data (MB): {:.6} {:.6} {:.6} {:.6}\n", 1.0*size_outrvous_all/mbytes,1.0*size_outrvous_all/nprocs/mbytes, 1.0*size_outrvous_max/mbytes,1.0*size_outrvous_min/mbytes); mesg += fmt::format(" rvous comm (MB): {:.6} {:.6} {:.6} {:.6}\n", 1.0*size_comm_all/mbytes,1.0*size_comm_all/nprocs/mbytes, 1.0*size_comm_max/mbytes,1.0*size_comm_min/mbytes); utils::logmesg(lmp,mesg); } }