mirror of
https://develop.openfoam.com/Development/openfoam.git
synced 2025-11-28 03:28:01 +00:00
343 lines
8.8 KiB
C
343 lines
8.8 KiB
C
/*
|
|
* Copyright 1997, Regents of the University of Minnesota
|
|
*
|
|
* balancemylink.c
|
|
*
|
|
* This file contains code that implements the edge-based FM refinement
|
|
*
|
|
* Started 7/23/97
|
|
* George
|
|
*
|
|
* $Id: balancemylink.c,v 1.2 2003/07/21 17:18:48 karypis Exp $
|
|
*/
|
|
|
|
#include <parmetislib.h>
|
|
#define PE 0
|
|
|
|
/*************************************************************************
|
|
* This function performs an edge-based FM refinement
|
|
**************************************************************************/
|
|
int BalanceMyLink(CtrlType *ctrl, GraphType *graph, idxtype *home, int me,
|
|
int you, float *flows, float maxdiff, float *diff_cost, float *diff_lbavg,
|
|
float avgvwgt)
|
|
{
|
|
int h, i, ii, j, k;
|
|
int nvtxs, ncon;
|
|
int nqueues, minval, maxval, higain, vtx, edge, totalv;
|
|
int from, to, qnum, index, nchanges, cut, tmp;
|
|
int pass, nswaps, nmoves, multiplier;
|
|
idxtype *xadj, *vsize, *adjncy, *adjwgt, *where, *ed, *id;
|
|
idxtype *hval, *nvpq, *inq, *map, *rmap, *ptr, *myqueue, *changes;
|
|
float *nvwgt, lbvec[MAXNCON], pwgts[MAXNCON*2], tpwgts[MAXNCON*2], my_wgt[MAXNCON];
|
|
float newgain, oldgain = 0.0;
|
|
float lbavg, bestflow, mycost;
|
|
float ipc_factor, redist_factor, ftmp;
|
|
FPQueueType *queues;
|
|
int mype;
|
|
MPI_Comm_rank(MPI_COMM_WORLD, &mype);
|
|
|
|
nvtxs = graph->nvtxs;
|
|
ncon = graph->ncon;
|
|
xadj = graph->xadj;
|
|
nvwgt = graph->nvwgt;
|
|
vsize = graph->vsize;
|
|
adjncy = graph->adjncy;
|
|
adjwgt = graph->adjwgt;
|
|
where = graph->where;
|
|
ipc_factor = ctrl->ipc_factor;
|
|
redist_factor = ctrl->redist_factor;
|
|
|
|
hval = idxmalloc(nvtxs*7, "hval");
|
|
id = hval + nvtxs;
|
|
ed = hval + nvtxs*2;
|
|
map = hval + nvtxs*3;
|
|
rmap = hval + nvtxs*4;
|
|
myqueue = hval + nvtxs*5;
|
|
changes = hval + nvtxs*6;
|
|
|
|
sset(ncon*2, 0.0, pwgts);
|
|
for (h=0; h<ncon; h++) {
|
|
tpwgts[h] = -1.0 * flows[h];
|
|
tpwgts[ncon+h] = flows[h];
|
|
}
|
|
|
|
for (i=0; i<nvtxs; i++) {
|
|
if (where[i] == me) {
|
|
for (h=0; h<ncon; h++) {
|
|
tpwgts[h] += nvwgt[i*ncon+h];
|
|
pwgts[h] += nvwgt[i*ncon+h];
|
|
}
|
|
}
|
|
else {
|
|
ASSERTS(where[i] == you);
|
|
for (h=0; h<ncon; h++) {
|
|
tpwgts[ncon+h] += nvwgt[i*ncon+h];
|
|
pwgts[ncon+h] += nvwgt[i*ncon+h];
|
|
}
|
|
}
|
|
}
|
|
|
|
/* we don't want any tpwgts to be less than zero */
|
|
for (h=0; h<ncon; h++) {
|
|
if (tpwgts[h] < 0.0) {
|
|
tpwgts[ncon+h] += tpwgts[h];
|
|
tpwgts[h] = 0.0;
|
|
}
|
|
|
|
if (tpwgts[ncon+h] < 0.0) {
|
|
tpwgts[h] += tpwgts[ncon+h];
|
|
tpwgts[ncon+h] = 0.0;
|
|
}
|
|
}
|
|
|
|
/*******************************/
|
|
/* insert vertices into queues */
|
|
/*******************************/
|
|
minval = maxval = 0;
|
|
multiplier = 1;
|
|
for (i=0; i<ncon; i++) {
|
|
multiplier *= (i+1);
|
|
maxval += i*multiplier;
|
|
minval += (ncon-1-i)*multiplier;
|
|
}
|
|
|
|
nqueues = maxval-minval+1;
|
|
nvpq = idxsmalloc(nqueues, 0, "nvpq");
|
|
ptr = idxmalloc(nqueues+1, "ptr");
|
|
inq = idxmalloc(nqueues*2, "inq");
|
|
queues = (FPQueueType *)(GKmalloc(sizeof(FPQueueType)*nqueues*2, "queues"));
|
|
|
|
for (i=0; i<nvtxs; i++)
|
|
hval[i] = Moc_HashVwgts(ncon, nvwgt+i*ncon) - minval;
|
|
|
|
for (i=0; i<nvtxs; i++)
|
|
nvpq[hval[i]]++;
|
|
|
|
ptr[0] = 0;
|
|
for (i=0; i<nqueues; i++)
|
|
ptr[i+1] = ptr[i] + nvpq[i];
|
|
|
|
for (i=0; i<nvtxs; i++) {
|
|
map[i] = ptr[hval[i]];
|
|
rmap[ptr[hval[i]]++] = i;
|
|
}
|
|
|
|
for (i=nqueues-1; i>0; i--)
|
|
ptr[i] = ptr[i-1];
|
|
ptr[0] = 0;
|
|
|
|
/* initialize queues */
|
|
for (i=0; i<nqueues; i++)
|
|
if (nvpq[i] > 0) {
|
|
FPQueueInit(queues+i, nvpq[i]);
|
|
FPQueueInit(queues+i+nqueues, nvpq[i]);
|
|
}
|
|
|
|
/* compute internal/external degrees */
|
|
idxset(nvtxs, 0, id);
|
|
idxset(nvtxs, 0, ed);
|
|
for (j=0; j<nvtxs; j++)
|
|
for (k=xadj[j]; k<xadj[j+1]; k++)
|
|
if (where[adjncy[k]] == where[j])
|
|
id[j] += adjwgt[k];
|
|
else
|
|
ed[j] += adjwgt[k];
|
|
|
|
nswaps = 0;
|
|
for (pass=0; pass<N_MOC_BAL_PASSES; pass++) {
|
|
idxset(nvtxs, -1, myqueue);
|
|
idxset(nqueues*2, 0, inq);
|
|
|
|
/* insert vertices into correct queues */
|
|
for (j=0; j<nvtxs; j++) {
|
|
index = (where[j] == me) ? 0 : nqueues;
|
|
|
|
newgain = ipc_factor*(float)(ed[j]-id[j]);
|
|
if (home[j] == me || home[j] == you) {
|
|
if (where[j] == home[j])
|
|
newgain -= redist_factor*(float)vsize[j];
|
|
else
|
|
newgain += redist_factor*(float)vsize[j];
|
|
}
|
|
|
|
FPQueueInsert(queues+hval[j]+index, map[j]-ptr[hval[j]], newgain);
|
|
myqueue[j] = (where[j] == me) ? 0 : 1;
|
|
inq[hval[j]+index]++;
|
|
}
|
|
|
|
/* bestflow = sfavg(ncon, flows); */
|
|
for (j=0, h=0; h<ncon; h++)
|
|
if (fabs(flows[h]) > fabs(flows[j])) j = h;
|
|
bestflow = fabs(flows[j]);
|
|
|
|
nchanges = nmoves = 0;
|
|
for (ii=0; ii<nvtxs/2; ii++) {
|
|
from = -1;
|
|
Moc_DynamicSelectQueue(nqueues, ncon, me, you, inq, flows, &from,
|
|
&qnum, minval, avgvwgt, maxdiff);
|
|
|
|
/* can't find a vertex in one subdomain, try the other */
|
|
if (from != -1 && qnum == -1) {
|
|
from = (from == me) ? you : me;
|
|
|
|
if (from == me) {
|
|
for (j=0; j<ncon; j++)
|
|
if (flows[j] > avgvwgt)
|
|
break;
|
|
}
|
|
else {
|
|
for (j=0; j<ncon; j++)
|
|
if (flows[j] < -1.0*avgvwgt)
|
|
break;
|
|
}
|
|
|
|
if (j != ncon)
|
|
Moc_DynamicSelectQueue(nqueues, ncon, me, you, inq, flows, &from,
|
|
&qnum, minval, avgvwgt, maxdiff);
|
|
}
|
|
|
|
if (qnum == -1)
|
|
break;
|
|
|
|
to = (from == me) ? you : me;
|
|
index = (from == me) ? 0 : nqueues;
|
|
higain = FPQueueGetMax(queues+qnum+index);
|
|
inq[qnum+index]--;
|
|
ASSERTS(higain != -1);
|
|
|
|
/*****************/
|
|
/* make the swap */
|
|
/*****************/
|
|
vtx = rmap[higain+ptr[qnum]];
|
|
myqueue[vtx] = -1;
|
|
where[vtx] = to;
|
|
nswaps++;
|
|
nmoves++;
|
|
|
|
/* update the flows */
|
|
for (j=0; j<ncon; j++)
|
|
flows[j] += (to == me) ? nvwgt[vtx*ncon+j] : -1.0*nvwgt[vtx*ncon+j];
|
|
|
|
/* ftmp = sfavg(ncon, flows); */
|
|
for (j=0, h=0; h<ncon; h++)
|
|
if (fabs(flows[h]) > fabs(flows[j])) j = h;
|
|
ftmp = fabs(flows[j]);
|
|
|
|
if (ftmp < bestflow) {
|
|
bestflow = ftmp;
|
|
nchanges = 0;
|
|
}
|
|
else {
|
|
changes[nchanges++] = vtx;
|
|
}
|
|
|
|
SWAP(id[vtx], ed[vtx], tmp);
|
|
|
|
for (j=xadj[vtx]; j<xadj[vtx+1]; j++) {
|
|
edge = adjncy[j];
|
|
|
|
/* must compute oldgain before changing id/ed */
|
|
if (myqueue[edge] != -1) {
|
|
oldgain = ipc_factor*(float)(ed[edge]-id[edge]);
|
|
if (home[edge] == me || home[edge] == you) {
|
|
if (where[edge] == home[edge])
|
|
oldgain -= redist_factor*(float)vsize[edge];
|
|
else
|
|
oldgain += redist_factor*(float)vsize[edge];
|
|
}
|
|
}
|
|
|
|
tmp = (to == where[edge] ? adjwgt[j] : -adjwgt[j]);
|
|
INC_DEC(id[edge], ed[edge], tmp);
|
|
|
|
if (myqueue[edge] != -1) {
|
|
newgain = ipc_factor*(float)(ed[edge]-id[edge]);
|
|
if (home[edge] == me || home[edge] == you) {
|
|
if (where[edge] == home[edge])
|
|
newgain -= redist_factor*(float)vsize[edge];
|
|
else
|
|
newgain += redist_factor*(float)vsize[edge];
|
|
}
|
|
|
|
FPQueueUpdate(queues+hval[edge]+(nqueues*myqueue[edge]),
|
|
map[edge]-ptr[hval[edge]], oldgain, newgain);
|
|
}
|
|
}
|
|
}
|
|
|
|
/****************************/
|
|
/* now go back to best flow */
|
|
/****************************/
|
|
nswaps -= nchanges;
|
|
nmoves -= nchanges;
|
|
for (i=0; i<nchanges; i++) {
|
|
vtx = changes[i];
|
|
from = where[vtx];
|
|
where[vtx] = to = (from == me) ? you : me;
|
|
|
|
SWAP(id[vtx], ed[vtx], tmp);
|
|
for (j=xadj[vtx]; j<xadj[vtx+1]; j++) {
|
|
edge = adjncy[j];
|
|
tmp = (to == where[edge] ? adjwgt[j] : -adjwgt[j]);
|
|
INC_DEC(id[edge], ed[edge], tmp);
|
|
}
|
|
}
|
|
|
|
for (i=0; i<nqueues; i++) {
|
|
if (nvpq[i] > 0) {
|
|
FPQueueReset(queues+i);
|
|
FPQueueReset(queues+i+nqueues);
|
|
}
|
|
}
|
|
|
|
if (nmoves == 0)
|
|
break;
|
|
}
|
|
|
|
/***************************/
|
|
/* compute 2-way imbalance */
|
|
/***************************/
|
|
sset(ncon, 0.0, my_wgt);
|
|
for (i=0; i<nvtxs; i++)
|
|
if (where[i] == me)
|
|
for (h=0; h<ncon; h++)
|
|
my_wgt[h] += nvwgt[i*ncon+h];
|
|
|
|
for (i=0; i<ncon; i++) {
|
|
ftmp = (pwgts[i]+pwgts[ncon+i])/2.0;
|
|
if (ftmp != 0.0)
|
|
lbvec[i] = fabs(my_wgt[i]-tpwgts[i]) / ftmp;
|
|
else
|
|
lbvec[i] = 0.0;
|
|
}
|
|
lbavg = savg(ncon, lbvec);
|
|
*diff_lbavg = lbavg;
|
|
|
|
/****************/
|
|
/* compute cost */
|
|
/****************/
|
|
cut = totalv = 0;
|
|
for (i=0; i<nvtxs; i++) {
|
|
if (where[i] != home[i])
|
|
totalv += vsize[i];
|
|
|
|
for (j=xadj[i]; j<xadj[i+1]; j++)
|
|
if (where[adjncy[j]] != where[i])
|
|
cut += adjwgt[j];
|
|
}
|
|
cut /= 2;
|
|
mycost = cut*ipc_factor + totalv*redist_factor;
|
|
*diff_cost = mycost;
|
|
|
|
/* free memory */
|
|
for (i=0; i<nqueues; i++)
|
|
if (nvpq[i] > 0) {
|
|
FPQueueFree(queues+i);
|
|
FPQueueFree(queues+i+nqueues);
|
|
}
|
|
|
|
GKfree((void **)&hval, (void **)&nvpq, (void **)&ptr, (void **)&inq, (void **)&queues, LTERM);
|
|
return nswaps;
|
|
}
|
|
|