mirror of
https://develop.openfoam.com/Development/openfoam.git
synced 2025-11-28 03:28:01 +00:00
456 lines
13 KiB
C
456 lines
13 KiB
C
/* * Copyright 1997, Regents of the University of Minnesota
|
|
*
|
|
* mdiffusion.c
|
|
*
|
|
* This file contains code that performs mc-diffusion
|
|
*
|
|
* Started 9/16/99
|
|
* George
|
|
*
|
|
* $Id: mdiffusion.c,v 1.2 2003/07/21 17:18:50 karypis Exp $
|
|
*/
|
|
|
|
#include <parmetislib.h>
|
|
|
|
#define PE -1
|
|
|
|
/*************************************************************************
|
|
* This function is the entry point of the initial partitioning algorithm.
|
|
* This algorithm assembles the graph to all the processors and preceed
|
|
* serially.
|
|
**************************************************************************/
|
|
int Moc_Diffusion(CtrlType *ctrl, GraphType *graph, idxtype *vtxdist,
|
|
idxtype *where, idxtype *home, WorkSpaceType *wspace, int npasses)
|
|
{
|
|
int h, i, j;
|
|
int nvtxs, nedges, ncon, pass, iter, domain, processor;
|
|
int nparts, mype, npes, nlinks, me, you, wsize;
|
|
int nvisited, nswaps = -1, tnswaps, done, alldone = -1;
|
|
idxtype *rowptr, *colind, *diff_where, *sr_where, *ehome, *map, *rmap;
|
|
idxtype *pack, *unpack, *match, *proc2sub, *sub2proc;
|
|
idxtype *visited, *gvisited;
|
|
float *transfer, *npwgts, maxdiff, minflow, maxflow;
|
|
float lbavg, oldlbavg, ubavg, lbvec[MAXNCON];
|
|
float diff_flows[MAXNCON], sr_flows[MAXNCON];
|
|
float diff_lbavg, sr_lbavg, diff_cost, sr_cost;
|
|
idxtype *rbuffer, *sbuffer;
|
|
int *rcount, *rdispl;
|
|
float *solution, *load, *workspace;
|
|
EdgeType *degrees;
|
|
MatrixType matrix;
|
|
GraphType *egraph;
|
|
RInfoType *rinfo;
|
|
|
|
if (graph->ncon > 3)
|
|
return 0;
|
|
|
|
nvtxs = graph->nvtxs;
|
|
nedges = graph->nedges;
|
|
ncon = graph->ncon;
|
|
|
|
nparts = ctrl->nparts;
|
|
mype = ctrl->mype;
|
|
npes = ctrl->npes;
|
|
ubavg = savg(ncon, ctrl->ubvec);
|
|
|
|
/********************************************/
|
|
/* initialize variables and allocate memory */
|
|
/********************************************/
|
|
load = fmalloc(nparts*(2+ncon)+nedges*(1+ncon), "load");
|
|
solution = load + nparts;
|
|
npwgts = graph->gnpwgts = load + 2*nparts;
|
|
matrix.values = load + (2+ncon)*nparts;
|
|
transfer = matrix.transfer = load + (2+ncon)*nparts + nedges;
|
|
|
|
proc2sub = idxmalloc(amax(nparts, npes*2), "Mc_Diffusion: proc2sub");
|
|
sub2proc = idxmalloc(nparts*3+nedges+1, "Mc_Diffusion: match");
|
|
match = sub2proc + nparts;
|
|
rowptr = matrix.rowptr = sub2proc + 2*nparts;
|
|
colind = matrix.colind = sub2proc + 3*nparts + 1;
|
|
|
|
rcount = imalloc(2*npes+1, "Mc_Diffusion: rcount");
|
|
rdispl = rcount + npes;
|
|
|
|
pack = idxmalloc(nvtxs*8, "Mc_Diffusion: pack");
|
|
unpack = pack + nvtxs;
|
|
rbuffer = pack + 2*nvtxs;
|
|
sbuffer = pack + 3*nvtxs;
|
|
map = pack + 4*nvtxs;
|
|
rmap = pack + 5*nvtxs;
|
|
diff_where = pack + 6*nvtxs;
|
|
ehome = pack + 7*nvtxs;
|
|
|
|
wsize = amax(sizeof(float)*nparts*6, sizeof(idxtype)*(nvtxs+nparts*2+1));
|
|
workspace = (float *)GKmalloc(wsize, "Moc_Diffusion: workspace");
|
|
degrees = GKmalloc(nedges*sizeof(EdgeType), "Mc_Diffusion: degrees");
|
|
rinfo = graph->rinfo = GKmalloc(nvtxs*sizeof(RInfoType), "Mc_Diffusion: rinfo");
|
|
|
|
/******************************************/
|
|
/* construct subdomain connectivity graph */
|
|
/******************************************/
|
|
matrix.nrows = nparts;
|
|
SetUpConnectGraph(graph, &matrix, (idxtype *)workspace);
|
|
nlinks = (matrix.nnzs-nparts) / 2;
|
|
|
|
visited = idxmalloc(matrix.nnzs*2, "visited");
|
|
gvisited = visited + matrix.nnzs;
|
|
|
|
for (pass=0; pass<npasses; pass++) {
|
|
sset(matrix.nnzs*ncon, 0.0, transfer);
|
|
idxset(matrix.nnzs, 0, gvisited);
|
|
idxset(matrix.nnzs, 0, visited);
|
|
iter = nvisited = 0;
|
|
|
|
/*******************************/
|
|
/* compute ncon flow solutions */
|
|
/*******************************/
|
|
for (h=0; h<ncon; h++) {
|
|
sset(nparts, 0.0, solution);
|
|
ComputeLoad(graph, nparts, load, ctrl->tpwgts, h);
|
|
|
|
lbvec[h] = (load[samax(nparts, load)]+1.0/(float)nparts) * (float)nparts;
|
|
|
|
ConjGrad2(&matrix, load, solution, 0.001, workspace);
|
|
ComputeTransferVector(ncon, &matrix, solution, transfer, h);
|
|
}
|
|
|
|
oldlbavg = savg(ncon, lbvec);
|
|
tnswaps = 0;
|
|
maxdiff = 0.0;
|
|
for (i=0; i<nparts; i++) {
|
|
for (j=rowptr[i]; j<rowptr[i+1]; j++) {
|
|
minflow = transfer[j*ncon+samin(ncon, transfer+j*ncon)];
|
|
maxflow = transfer[j*ncon+samax(ncon, transfer+j*ncon)];
|
|
maxdiff = (maxflow - minflow > maxdiff) ? maxflow - minflow : maxdiff;
|
|
}
|
|
}
|
|
|
|
while (nvisited < nlinks) {
|
|
|
|
/******************************************/
|
|
/* compute independent sets of subdomains */
|
|
/******************************************/
|
|
idxset(amax(nparts, npes*2), UNMATCHED, proc2sub);
|
|
CSR_Match_SHEM(&matrix, match, proc2sub, gvisited, ncon);
|
|
|
|
/*****************************/
|
|
/* Set up the packing arrays */
|
|
/*****************************/
|
|
idxset(nparts, UNMATCHED, sub2proc);
|
|
for (i=0; i<npes*2; i++) {
|
|
if (proc2sub[i] == UNMATCHED)
|
|
break;
|
|
|
|
sub2proc[proc2sub[i]] = i/2;
|
|
}
|
|
|
|
iset(npes, 0, rcount);
|
|
for (i=0; i<nvtxs; i++) {
|
|
domain = where[i];
|
|
processor = sub2proc[domain];
|
|
if (processor != UNMATCHED) {
|
|
rcount[processor]++;
|
|
}
|
|
}
|
|
|
|
rdispl[0] = 0;
|
|
for (i=1; i<npes+1; i++)
|
|
rdispl[i] = rdispl[i-1] + rcount[i-1];
|
|
|
|
idxset(nvtxs, UNMATCHED, unpack);
|
|
for (i=0; i<nvtxs; i++) {
|
|
domain = where[i];
|
|
processor = sub2proc[domain];
|
|
if (processor != UNMATCHED) {
|
|
unpack[rdispl[processor]++] = i;
|
|
}
|
|
}
|
|
|
|
for (i=npes; i>0; i--)
|
|
rdispl[i] = rdispl[i-1];
|
|
rdispl[0] = 0;
|
|
|
|
idxset(nvtxs, UNMATCHED, pack);
|
|
for (i=0; i<rdispl[npes]; i++) {
|
|
ASSERTS(unpack[i] != UNMATCHED);
|
|
domain = where[unpack[i]];
|
|
processor = sub2proc[domain];
|
|
if (processor != UNMATCHED) {
|
|
pack[unpack[i]] = i;
|
|
}
|
|
}
|
|
|
|
/*********************/
|
|
/* Compute the flows */
|
|
/*********************/
|
|
if (proc2sub[mype*2] != UNMATCHED) {
|
|
me = proc2sub[mype*2];
|
|
you = proc2sub[mype*2+1];
|
|
ASSERTS(me != you);
|
|
|
|
for (j=rowptr[me]; j<rowptr[me+1]; j++) {
|
|
if (colind[j] == you) {
|
|
visited[j] = 1;
|
|
scopy(ncon, transfer+j*ncon, diff_flows);
|
|
break;
|
|
}
|
|
}
|
|
|
|
for (j=rowptr[you]; j<rowptr[you+1]; j++) {
|
|
if (colind[j] == me) {
|
|
visited[j] = 1;
|
|
for (h=0; h<ncon; h++)
|
|
if (transfer[j*ncon+h] > 0.0)
|
|
diff_flows[h] = -1.0 * transfer[j*ncon+h];
|
|
break;
|
|
}
|
|
}
|
|
|
|
nswaps = 1;
|
|
scopy(ncon, diff_flows, sr_flows);
|
|
|
|
idxset(nvtxs, 0, sbuffer);
|
|
for (i=0; i<nvtxs; i++)
|
|
if (where[i] == me || where[i] == you)
|
|
sbuffer[i] = 1;
|
|
|
|
egraph = ExtractGraph(ctrl, graph, sbuffer, map, rmap);
|
|
|
|
if (egraph != NULL) {
|
|
idxcopy(egraph->nvtxs, egraph->where, diff_where);
|
|
for (j=0; j<egraph->nvtxs; j++)
|
|
ehome[j] = home[map[j]];
|
|
|
|
RedoMyLink(ctrl, egraph, ehome, me, you, sr_flows, &sr_cost, &sr_lbavg);
|
|
|
|
if (ncon <= 4) {
|
|
sr_where = egraph->where;
|
|
egraph->where = diff_where;
|
|
|
|
nswaps = BalanceMyLink(ctrl, egraph, ehome, me, you, diff_flows, maxdiff, &diff_cost, &diff_lbavg, 1.0/(float)nvtxs);
|
|
|
|
if ((sr_lbavg < diff_lbavg &&
|
|
(diff_lbavg >= ubavg-1.0 || sr_cost == diff_cost)) ||
|
|
(sr_lbavg < ubavg-1.0 && sr_cost < diff_cost)) {
|
|
for (i=0; i<egraph->nvtxs; i++)
|
|
where[map[i]] = sr_where[i];
|
|
}
|
|
else {
|
|
for (i=0; i<egraph->nvtxs; i++)
|
|
where[map[i]] = diff_where[i];
|
|
}
|
|
}
|
|
else {
|
|
for (i=0; i<egraph->nvtxs; i++)
|
|
where[map[i]] = egraph->where[i];
|
|
}
|
|
|
|
GKfree((void **)&egraph->xadj, (void **)&egraph->nvwgt, (void **)&egraph->adjncy, LTERM);
|
|
GKfree((void **)&egraph, LTERM);
|
|
}
|
|
|
|
/**********************/
|
|
/* Pack the flow data */
|
|
/**********************/
|
|
idxset(nvtxs, UNMATCHED, sbuffer);
|
|
for (i=0; i<nvtxs; i++) {
|
|
domain = where[i];
|
|
if (domain == you || domain == me) {
|
|
sbuffer[pack[i]] = where[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
/***************************/
|
|
/* Broadcast the flow data */
|
|
/***************************/
|
|
MPI_Allgatherv((void *)&sbuffer[rdispl[mype]], rcount[mype], IDX_DATATYPE, (void *)rbuffer, rcount, rdispl, IDX_DATATYPE, ctrl->comm);
|
|
|
|
|
|
/************************/
|
|
/* Unpack the flow data */
|
|
/************************/
|
|
for (i=0; i<rdispl[npes]; i++) {
|
|
if (rbuffer[i] != UNMATCHED) {
|
|
where[unpack[i]] = rbuffer[i];
|
|
}
|
|
}
|
|
|
|
|
|
/******************/
|
|
/* Do other stuff */
|
|
/******************/
|
|
MPI_Allreduce((void *)visited, (void *)gvisited, matrix.nnzs,
|
|
IDX_DATATYPE, MPI_MAX, ctrl->comm);
|
|
nvisited = idxsum(matrix.nnzs, gvisited)/2;
|
|
tnswaps += GlobalSESum(ctrl, nswaps);
|
|
|
|
if (iter++ == NGD_PASSES)
|
|
break;
|
|
}
|
|
|
|
/*****************************/
|
|
/* perform serial refinement */
|
|
/*****************************/
|
|
Moc_ComputeSerialPartitionParams(graph, nparts, degrees);
|
|
Moc_SerialKWayAdaptRefine(graph, nparts, home, ctrl->ubvec, 10);
|
|
|
|
|
|
/****************************/
|
|
/* check for early breakout */
|
|
/****************************/
|
|
for (h=0; h<ncon; h++) {
|
|
lbvec[h] = (float)(nparts) *
|
|
npwgts[samax_strd(nparts,npwgts+h,ncon)*ncon+h];
|
|
}
|
|
lbavg = savg(ncon, lbvec);
|
|
|
|
done = 0;
|
|
if (
|
|
tnswaps == 0 ||
|
|
lbavg >= oldlbavg ||
|
|
lbavg <= ubavg + 0.035
|
|
)
|
|
done = 1;
|
|
|
|
alldone = GlobalSEMax(ctrl, done);
|
|
if (alldone == 1)
|
|
break;
|
|
}
|
|
|
|
/*******************************************************/
|
|
/* ensure that all subdomains have at least one vertex */
|
|
/*******************************************************/
|
|
/*
|
|
idxset(nparts, 0, match);
|
|
for (i=0; i<nvtxs; i++)
|
|
match[where[i]]++;
|
|
|
|
done = 0;
|
|
while (done == 0) {
|
|
done = 1;
|
|
|
|
me = idxamin(nparts, match);
|
|
if (match[me] == 0) {
|
|
if (ctrl->mype == PE) printf("WARNING: empty subdomain %d in Moc_Diffusion\n", me);
|
|
you = idxamax(nparts, match);
|
|
for (i=0; i<nvtxs; i++) {
|
|
if (where[i] == you) {
|
|
where[i] = me;
|
|
match[you]--;
|
|
match[me]++;
|
|
done = 0;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
*/
|
|
|
|
/******************************/
|
|
/* now free memory and return */
|
|
/******************************/
|
|
GKfree((void **)&load, (void **)&proc2sub, (void **)&sub2proc, (void **)&rcount, LTERM);
|
|
GKfree((void **)&pack, (void **)&workspace, (void **)°rees, (void **)&rinfo, LTERM);
|
|
GKfree((void **)&visited, LTERM);
|
|
graph->gnpwgts = NULL;
|
|
graph->rinfo = NULL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
* This function extracts a subgraph from a graph given an indicator array.
|
|
**************************************************************************/
|
|
GraphType *ExtractGraph(CtrlType *ctrl, GraphType *graph, idxtype *indicator,
|
|
idxtype *map, idxtype *rmap)
|
|
{
|
|
int h, i, j;
|
|
int nvtxs, envtxs, enedges, ncon;
|
|
int vtx, count;
|
|
idxtype *xadj, *vsize, *adjncy, *adjwgt, *where;
|
|
idxtype *exadj, *evsize, *eadjncy, *eadjwgt, *ewhere;
|
|
float *nvwgt, *envwgt;
|
|
GraphType *egraph;
|
|
|
|
nvtxs = graph->nvtxs;
|
|
ncon = graph->ncon;
|
|
xadj = graph->xadj;
|
|
nvwgt = graph->nvwgt;
|
|
vsize = graph->vsize;
|
|
adjncy = graph->adjncy;
|
|
adjwgt = graph->adjwgt;
|
|
where = graph->where;
|
|
|
|
count = 0;
|
|
for (i=0; i<nvtxs; i++) {
|
|
if (indicator[i] == 1) {
|
|
map[count] = i;
|
|
rmap[i] = count;
|
|
count++;
|
|
}
|
|
}
|
|
|
|
if (count == 0) {
|
|
return NULL;
|
|
}
|
|
|
|
/*******************/
|
|
/* allocate memory */
|
|
/*******************/
|
|
egraph = CreateGraph();
|
|
envtxs = egraph->nvtxs = count;
|
|
egraph->ncon = graph->ncon;
|
|
|
|
exadj = egraph->xadj = idxmalloc(envtxs*3+1, "exadj");
|
|
ewhere = egraph->where = exadj + envtxs + 1;
|
|
evsize = egraph->vsize = exadj + 2*envtxs + 1;
|
|
|
|
envwgt = egraph->nvwgt = fmalloc(envtxs*ncon, "envwgt");
|
|
|
|
/************************************************/
|
|
/* compute xadj, where, nvwgt, and vsize arrays */
|
|
/************************************************/
|
|
idxset(envtxs+1, 0, exadj);
|
|
for (i=0; i<envtxs; i++) {
|
|
vtx = map[i];
|
|
|
|
ewhere[i] = where[vtx];
|
|
for (h=0; h<ncon; h++)
|
|
envwgt[i*ncon+h] = nvwgt[vtx*ncon+h];
|
|
|
|
if (ctrl->partType == ADAPTIVE_PARTITION || ctrl->partType == REFINE_PARTITION)
|
|
evsize[i] = vsize[vtx];
|
|
|
|
for (j=xadj[vtx]; j<xadj[vtx+1]; j++)
|
|
if (indicator[adjncy[j]] == 1)
|
|
exadj[i]++;
|
|
|
|
}
|
|
MAKECSR(i, envtxs, exadj);
|
|
|
|
/************************************/
|
|
/* compute adjncy and adjwgt arrays */
|
|
/************************************/
|
|
enedges = egraph->nedges = exadj[envtxs];
|
|
eadjncy = egraph->adjncy = idxmalloc(enedges*2, "eadjncy");
|
|
eadjwgt = egraph->adjwgt = eadjncy + enedges;
|
|
|
|
for (i=0; i<envtxs; i++) {
|
|
vtx = map[i];
|
|
for (j=xadj[vtx]; j<xadj[vtx+1]; j++) {
|
|
if (indicator[adjncy[j]] == 1) {
|
|
eadjncy[exadj[i]] = rmap[adjncy[j]];
|
|
eadjwgt[exadj[i]++] = adjwgt[j];
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i=envtxs; i>0; i--)
|
|
exadj[i] = exadj[i-1];
|
|
exadj[0] = 0;
|
|
|
|
return egraph;
|
|
}
|