/* * serial.c * * This file contains code that implements k-way refinement * * Started 7/28/97 * George * * $Id: serial.c,v 1.2 2003/07/21 17:18:53 karypis Exp $ * */ #include /************************************************************************* * This function performs k-way refinement **************************************************************************/ void Moc_SerialKWayAdaptRefine(GraphType *graph, int nparts, idxtype *home, float *orgubvec, int npasses) { int i, ii, iii, j, k; int nvtxs, ncon, pass, nmoves, myndegrees; int from, me, myhome, to, oldcut, gain, tmp; idxtype *xadj, *adjncy, *adjwgt; idxtype *where; EdgeType *mydegrees; RInfoType *rinfo, *myrinfo; float *npwgts, *nvwgt, *minwgt, *maxwgt, ubvec[MAXNCON]; int gain_is_greater, gain_is_same, fit_in_to, fit_in_from, going_home; int zero_gain, better_balance_ft, better_balance_tt; KeyValueType *cand; int mype; MPI_Comm_rank(MPI_COMM_WORLD, &mype); nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; rinfo = graph->rinfo; npwgts = graph->gnpwgts; /* Setup the weight intervals of the various subdomains */ cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand"); minwgt = fmalloc(nparts*ncon, "minwgt"); maxwgt = fmalloc(nparts*ncon, "maxwgt"); ComputeHKWayLoadImbalance(ncon, nparts, npwgts, ubvec); for (i=0; imincut; for (i=0; ied >= myrinfo->id) { from = where[i]; myhome = home[i]; nvwgt = graph->nvwgt+i*ncon; if (myrinfo->id > 0 && AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, -1.0, nvwgt, minwgt+from*ncon)) continue; mydegrees = myrinfo->degrees; myndegrees = myrinfo->ndegrees; for (k=0; kid; if (gain >= 0 && (AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon) || IsHBalanceBetterFT(ncon,npwgts+from*ncon,npwgts+to*ncon,nvwgt,ubvec))) { break; } } /* break out if you did not find a candidate */ if (k == myndegrees) continue; for (j=k+1; j mydegrees[k].ewgt); fit_in_to = AreAllHVwgtsBelow(ncon,1.0,npwgts+to*ncon,1.0,nvwgt,maxwgt+to*ncon); better_balance_ft = IsHBalanceBetterFT(ncon,npwgts+from*ncon, npwgts+to*ncon,nvwgt,ubvec); better_balance_tt = IsHBalanceBetterTT(ncon,npwgts+mydegrees[k].edge*ncon, npwgts+to*ncon,nvwgt,ubvec); if ( (gain_is_greater && (fit_in_to || better_balance_ft) ) || (gain_is_same && ( (fit_in_to && going_home) || better_balance_tt ) ) ) { k = j; } } to = mydegrees[k].edge; going_home = (myhome == to); zero_gain = (mydegrees[k].ewgt == myrinfo->id); fit_in_from = AreAllHVwgtsBelow(ncon,1.0,npwgts+from*ncon,0.0,npwgts+from*ncon, maxwgt+from*ncon); better_balance_ft = IsHBalanceBetterFT(ncon,npwgts+from*ncon, npwgts+to*ncon,nvwgt,ubvec); if (zero_gain && !going_home && !better_balance_ft && fit_in_from) continue; /*===================================================================== * If we got here, we can now move the vertex from 'from' to 'to' *======================================================================*/ graph->mincut -= mydegrees[k].ewgt-myrinfo->id; /* Update where, weight, and ID/ED information of the vertex you moved */ saxpy2(ncon, 1.0, nvwgt, 1, npwgts+to*ncon, 1); saxpy2(ncon, -1.0, nvwgt, 1, npwgts+from*ncon, 1); where[i] = to; myrinfo->ed += myrinfo->id-mydegrees[k].ewgt; SWAP(myrinfo->id, mydegrees[k].ewgt, tmp); if (mydegrees[k].ewgt == 0) { myrinfo->ndegrees--; mydegrees[k].edge = mydegrees[myrinfo->ndegrees].edge; mydegrees[k].ewgt = mydegrees[myrinfo->ndegrees].ewgt; } else mydegrees[k].edge = from; /* Update the degrees of adjacent vertices */ for (j=xadj[i]; jdegrees; if (me == from) { INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); } else { if (me == to) { INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); } } /* Remove contribution of the ed from 'from' */ if (me != from) { for (k=0; kndegrees; k++) { if (mydegrees[k].edge == from) { if (mydegrees[k].ewgt == adjwgt[j]) { myrinfo->ndegrees--; mydegrees[k].edge = mydegrees[myrinfo->ndegrees].edge; mydegrees[k].ewgt = mydegrees[myrinfo->ndegrees].ewgt; } else mydegrees[k].ewgt -= adjwgt[j]; break; } } } /* Add contribution of the ed to 'to' */ if (me != to) { for (k=0; kndegrees; k++) { if (mydegrees[k].edge == to) { mydegrees[k].ewgt += adjwgt[j]; break; } } if (k == myrinfo->ndegrees) { mydegrees[myrinfo->ndegrees].edge = to; mydegrees[myrinfo->ndegrees++].ewgt = adjwgt[j]; } } } nmoves++; } } if (graph->mincut == oldcut) break; } GKfree((void **)&minwgt, (void **)&maxwgt, (void **)&cand, LTERM); return; } /************************************************************************* * This function computes the initial id/ed **************************************************************************/ void Moc_ComputeSerialPartitionParams(GraphType *graph, int nparts, EdgeType *degrees) { int i, j, k; int nvtxs, nedges, ncon, mincut, me, other; idxtype *xadj, *adjncy, *adjwgt, *where; RInfoType *rinfo, *myrinfo; EdgeType *mydegrees; float *nvwgt, *npwgts; int mype; MPI_Comm_rank(MPI_COMM_WORLD, &mype); nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; rinfo = graph->rinfo; npwgts = sset(ncon*nparts, 0.0, graph->gnpwgts); /*------------------------------------------------------------ / Compute now the id/ed degrees /------------------------------------------------------------*/ nedges = mincut = 0; for (i=0; iid = myrinfo->ed = myrinfo->ndegrees = 0; myrinfo->degrees = degrees + nedges; nedges += xadj[i+1]-xadj[i]; for (j=xadj[i]; jid += adjwgt[j]; } else { myrinfo->ed += adjwgt[j]; } } mincut += myrinfo->ed; /* Time to compute the particular external degrees */ if (myrinfo->ed > 0) { mydegrees = myrinfo->degrees; for (j=xadj[i]; jndegrees; k++) { if (mydegrees[k].edge == other) { mydegrees[k].ewgt += adjwgt[j]; break; } } if (k == myrinfo->ndegrees) { mydegrees[myrinfo->ndegrees].edge = other; mydegrees[myrinfo->ndegrees++].ewgt = adjwgt[j]; } } } } } graph->mincut = mincut/2; return; } /************************************************************************* * This function checks if the vertex weights of two vertices are below * a given set of values **************************************************************************/ int AreAllHVwgtsBelow(int ncon, float alpha, float *vwgt1, float beta, float *vwgt2, float *limit) { int i; for (i=0; i limit[i]) return 0; return 1; } /************************************************************************* * This function computes the load imbalance over all the constrains * For now assume that we just want balanced partitionings **************************************************************************/ void ComputeHKWayLoadImbalance(int ncon, int nparts, float *npwgts, float *lbvec) { int i, j; float max; for (i=0; i max) max = npwgts[j*ncon+i]; } lbvec[i] = max*nparts; } } /************************************************************** * This subroutine remaps a partitioning on a single processor **************************************************************/ void SerialRemap(GraphType *graph, int nparts, idxtype *base, idxtype *scratch, idxtype *remap, float *tpwgts) { int i, ii, j, k; int nvtxs, nmapped, max_mult; int from, to, current_from, smallcount, bigcount; KeyValueType *flowto, *bestflow; KeyKeyValueType *sortvtx; idxtype *vsize, *htable, *map, *rowmap; nvtxs = graph->nvtxs; vsize = graph->vsize; max_mult = amin(MAX_NPARTS_MULTIPLIER, nparts); sortvtx = (KeyKeyValueType *)GKmalloc(nvtxs*sizeof(KeyKeyValueType), "sortvtx"); flowto = (KeyValueType *)GKmalloc((nparts*max_mult+nparts)*sizeof(KeyValueType), "flowto"); bestflow = flowto+nparts; map = htable = idxsmalloc(nparts*2, -1, "htable"); rowmap = map+nparts; for (i=0; i current_from) { /* reset the hash table */ for (j=0; jncon, j, k)) { map[j] = k; rowmap[k] = j; nmapped++; } if (nmapped == nparts) break; } /* remap the rest */ /* it may help try remapping to the same label first */ if (nmapped < nparts) { for (j=0; jncon, i, j)) { map[j] = i; rowmap[i] = j; nmapped++; break; } } } } } /* check to see if remapping fails (due to dis-similar tpwgts) */ /* if remapping fails, revert to original mapping */ if (nmapped < nparts) for (i=0; ikey1 > second->key1) return 1; if (first->key1 < second->key1) return -1; if (first->key2 < second->key2) return 1; if (first->key2 > second->key2) return -1; return 0; } /************************************************************************* * This function performs an edge-based FM refinement **************************************************************************/ void Moc_Serial_FM_2WayRefine(GraphType *graph, float *tpwgts, int npasses) { int i, ii, j, k; int kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, limit, tmp, cnum; idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; idxtype *moved, *swaps, *qnum; float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal; FPQueueType parts[MAXNCON][2]; int higain, oldgain, mincut, initcut, newcut, mincutorder; float rtpwgts[MAXNCON*2]; KeyValueType *cand; int mype; MPI_Comm_rank(MPI_COMM_WORLD, &mype); nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; id = graph->sendind; ed = graph->recvind; npwgts = graph->gnpwgts; bndptr = graph->sendptr; bndind = graph->recvptr; moved = idxmalloc(nvtxs, "moved"); swaps = idxmalloc(nvtxs, "swaps"); qnum = idxmalloc(nvtxs, "qnum"); cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand"); limit = amin(amax(0.01*nvtxs, 25), 150); /* Initialize the queues */ for (i=0; imincut; for (i=0; ignvtxs; for (i=0; i limit) { /* We hit the limit, undo last move */ newcut += (ed[higain]-id[higain]); saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); break; } where[higain] = to; moved[higain] = nswaps; swaps[nswaps] = higain; /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j 0) { /* It will now become a boundary vertex */ BNDInsert(nbnd, bndind, bndptr, k); if (moved[k] == -1) FPQueueInsert(&parts[qnum[k]][where[k]], k, (float)(ed[k]-id[k])); } } } } /**************************************************************** * Roll back computations *****************************************************************/ for (i=0; imincutorder; nswaps--) { higain = swaps[nswaps]; to = where[higain] = (where[higain]+1)%2; SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); else if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); for (j=xadj[higain]; j 0) BNDInsert(nbnd, bndind, bndptr, k); } } graph->mincut = mincut; graph->gnvtxs = nbnd; if (mincutorder == -1 || mincut == initcut) break; } for (i=0; i= maxdiff) { maxdiff = npwgts[part*ncon+i]-tpwgts[part*ncon+i]; *from = part; *cnum = i; } } } if (*from != -1 && FPQueueGetQSize(&queues[*cnum][*from]) == 0) { /* The desired queue is empty, select a node from that side anyway */ for (i=0; i 0) { max = npwgts[(*from)*ncon + i]; *cnum = i; break; } } for (i++; i max && FPQueueGetQSize(&queues[i][*from]) > 0) { max = npwgts[(*from)*ncon + i]; *cnum = i; } } } /* Check to see if you can focus on the cut */ if (maxdiff <= 0.0 || *from == -1) { maxgain = -100000.0; for (part=0; part<2; part++) { for (i=0; i 0 && FPQueueSeeMaxGain(&queues[i][part]) > maxgain) { maxgain = FPQueueSeeMaxGain(&queues[i][part]); *from = part; *cnum = i; } } } } return; } /************************************************************************* * This function checks if the balance achieved is better than the diff * For now, it uses a 2-norm measure **************************************************************************/ int Serial_BetterBalance(int ncon, float *npwgts, float *tpwgts, float *diff) { int i; float ndiff[MAXNCON]; for (i=0; invtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; id = graph->sendind; ed = graph->recvind; npwgts = graph->gnpwgts; bndptr = graph->sendptr; bndind = graph->recvptr; moved = idxmalloc(nvtxs, "moved"); swaps = idxmalloc(nvtxs, "swaps"); qnum = idxmalloc(nvtxs, "qnum"); cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand"); limit = amin(amax(0.01*nvtxs, 15), 100); /* Initialize the queues */ for (i=0; i qsizes[j][from] && nvwgt[i*ncon+qnum[i]] < 1.3*nvwgt[i*ncon+j]) { qsizes[qnum[i]][from]--; qsizes[j][from]++; qnum[i] = j; } } } } } for (i=0; imincut; mincutorder = -1; idxset(nvtxs, -1, moved); /* Insert all nodes in the priority queues */ nbnd = graph->gnvtxs; for (i=0; i limit) { /* We hit the limit, undo last move */ newcut += (ed[higain]-id[higain]); saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); break; } where[higain] = to; moved[higain] = nswaps; swaps[nswaps] = higain; /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j 0 && bndptr[k] == -1) BNDInsert(nbnd, bndind, bndptr, k); } } /**************************************************************** * Roll back computations *****************************************************************/ for (nswaps--; nswaps>mincutorder; nswaps--) { higain = swaps[nswaps]; to = where[higain] = (where[higain]+1)%2; SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); else if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); for (j=xadj[higain]; j 0) BNDInsert(nbnd, bndind, bndptr, k); } } graph->mincut = mincut; graph->gnvtxs = nbnd; for (i=0; invtxs; ncon = graph->ncon; xadj = graph->xadj; adjncy = graph->adjncy; nvwgt = graph->nvwgt; adjwgt = graph->adjwgt; where = graph->where; id = graph->sendind; ed = graph->recvind; npwgts = graph->gnpwgts; bndptr = graph->sendptr; bndind = graph->recvptr; qnum = idxmalloc(nvtxs, "qnum"); cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand"); /* This is called for initial partitioning so we know from where to pick nodes */ from = 1; to = (from+1)%2; for (i=0; i 0) FPQueueInsert(&parts[qnum[i]][0], i, (float)(ed[i]-id[i])); else FPQueueInsert(&parts[qnum[i]][1], i, (float)(ed[i]-id[i])); } } mincut = graph->mincut; nbnd = graph->gnvtxs; for (nswaps=0; nswaps 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j 0 && bndptr[k] == -1) { /* It moves in boundary */ FPQueueDelete(&parts[qnum[k]][1], k); FPQueueInsert(&parts[qnum[k]][0], k, (float)(ed[k]-id[k])); } else { /* It must be in the boundary already */ FPQueueUpdate(&parts[qnum[k]][0], k, (float)(oldgain), (float)(ed[k]-id[k])); } } /* Update its boundary information */ if (ed[k] == 0 && bndptr[k] != -1) BNDDelete(nbnd, bndind, bndptr, k); else if (ed[k] > 0 && bndptr[k] == -1) BNDInsert(nbnd, bndind, bndptr, k); } } graph->mincut = mincut; graph->gnvtxs = nbnd; for (i=0; i= max && FPQueueGetQSize(&queues[i][0]) + FPQueueGetQSize(&queues[i][1]) > 0) { max = npwgts[from*ncon+i]-tpwgts[i]; cnum = i; } } return cnum; } /************************************************************************* * This function computes the initial id/ed **************************************************************************/ void Moc_Serial_Compute2WayPartitionParams(GraphType *graph) { int i, j, me, nvtxs, ncon, nbnd, mincut; idxtype *xadj, *adjncy, *adjwgt; float *nvwgt, *npwgts; idxtype *id, *ed, *where; idxtype *bndptr, *bndind; nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; npwgts = sset(2*ncon, 0.0, graph->gnpwgts); id = idxset(nvtxs, 0, graph->sendind); ed = idxset(nvtxs, 0, graph->recvind); bndptr = idxset(nvtxs, -1, graph->sendptr); bndind = graph->recvptr; /*------------------------------------------------------------ / Compute now the id/ed degrees /------------------------------------------------------------*/ nbnd = mincut = 0; for (i=0; i 0 || xadj[i] == xadj[i+1]) { mincut += ed[i]; bndptr[i] = nbnd; bndind[nbnd++] = i; } } graph->mincut = mincut/2; graph->gnvtxs = nbnd; } /************************************************************************* * This function checks if the vertex weights of two vertices are below * a given set of values **************************************************************************/ int Serial_AreAnyVwgtsBelow(int ncon, float alpha, float *vwgt1, float beta, float *vwgt2, float *limit) { int i; for (i=0; invtxs; i++) { for (j=graph->xadj[i]; jxadj[i+1]; j++) if (graph->where[i] != graph->where[graph->adjncy[j]]) cut += graph->adjwgt[j]; } graph->mincut = cut/2; return graph->mincut; } /************************************************************************* * This function computes the TotalV of a serial graph. **************************************************************************/ int ComputeSerialTotalV(GraphType *graph, idxtype *home) { int i; int totalv = 0; for (i=0; invtxs; i++) if (graph->where[i] != home[i]) totalv += (graph->vsize == NULL) ? graph->vwgt[i] : graph->vsize[i]; return totalv; }