From bac07b76cef28705d599842e6b92e5e7d07f0b04 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Wed, 14 Dec 2011 16:18:15 +0000 Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7364 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/comm.cpp | 8 ++-- src/procmap.cpp | 120 ++++++++++++++++++++++++++++++++++++++++-------- src/procmap.h | 12 ++--- 3 files changed, 112 insertions(+), 28 deletions(-) diff --git a/src/comm.cpp b/src/comm.cpp index 1ee622cf67..774d00a23f 100644 --- a/src/comm.cpp +++ b/src/comm.cpp @@ -207,14 +207,14 @@ void Comm::set_proc_grid() } else if (gridflag == TWOLEVEL) { if (mapflag == CART) - pmap->cart_map(0,procgrid,coregrid,myloc,procneigh,grid2proc); + pmap->cart_map(0,procgrid,ncores,coregrid,myloc,procneigh,grid2proc); else if (mapflag == CARTREORDER) - pmap->cart_map(1,procgrid,coregrid,myloc,procneigh,grid2proc); + pmap->cart_map(1,procgrid,ncores,coregrid,myloc,procneigh,grid2proc); else if (mapflag == XYZ) - pmap->xyz_map(xyz,procgrid,coregrid,myloc,procneigh,grid2proc); + pmap->xyz_map(xyz,procgrid,ncores,coregrid,myloc,procneigh,grid2proc); } else if (gridflag == NUMA) { - pmap->numa_map(coregrid,myloc,procneigh,grid2proc); + pmap->numa_map(0,coregrid,myloc,procneigh,grid2proc); } else if (gridflag == CUSTOM) { pmap->custom_map(procgrid,myloc,procneigh,grid2proc); diff --git a/src/procmap.cpp b/src/procmap.cpp index 1893fb9757..b3d02e59d9 100644 --- a/src/procmap.cpp +++ b/src/procmap.cpp @@ -374,9 +374,24 @@ void ProcMap::cart_map(int reorder, int *procgrid, MPI may do layout in machine-optimized fashion ------------------------------------------------------------------------- */ -void ProcMap::cart_map(int reorder, int *procgrid, int *coregrid, +void ProcMap::cart_map(int reorder, int *procgrid, int ncores, int *coregrid, int *myloc, int procneigh[3][2], int ***grid2proc) { + // setup NUMA params that numa_grid() sets up + + int me; + MPI_Comm_rank(world,&me); + + procs_per_node = ncores; + procs_per_numa = ncores; + node_id = me/ncores; + nodegrid[0] = procgrid[0] / coregrid[0]; + nodegrid[1] = procgrid[1] / coregrid[1]; + nodegrid[2] = procgrid[2] / coregrid[2]; + + // now can use numa_map() to perform mapping + + numa_map(reorder,coregrid,myloc,procneigh,grid2proc); } /* ---------------------------------------------------------------------- @@ -393,7 +408,6 @@ void ProcMap::xyz_map(char *xyz, int *procgrid, for (i = 0; i < procgrid[0]; i++) for (j = 0; j < procgrid[1]; j++) for (k = 0; k < procgrid[2]; k++) { - grid2proc[i][j][k] = k*procgrid[1]*procgrid[0] + j*procgrid[0] + i; if (xyz[0] == 'x' && xyz[1] == 'y' && xyz[2] == 'z') grid2proc[i][j][k] = k*procgrid[1]*procgrid[0] + j*procgrid[0] + i; else if (xyz[0] == 'x' && xyz[1] == 'z' && xyz[2] == 'y') @@ -412,6 +426,8 @@ void ProcMap::xyz_map(char *xyz, int *procgrid, } } + // proc IDs of neighbors + int minus,plus; grid_shift(myloc[0],procgrid[0],minus,plus); procneigh[0][0] = grid2proc[minus][myloc[1]][myloc[2]]; @@ -431,35 +447,59 @@ void ProcMap::xyz_map(char *xyz, int *procgrid, respect sub-grid of cores within each node ------------------------------------------------------------------------- */ -void ProcMap::xyz_map(char *xyz, int *procgrid, int *coregrid, +void ProcMap::xyz_map(char *xyz, int *procgrid, int ncores, int *coregrid, int *myloc, int procneigh[3][2], int ***grid2proc) { int me; MPI_Comm_rank(world,&me); - int i,j,k; + nodegrid[0] = procgrid[0] / coregrid[0]; + nodegrid[1] = procgrid[1] / coregrid[1]; + nodegrid[2] = procgrid[2] / coregrid[2]; + + int i,j,k,inode,jnode,knode,icore,jcore,kcore; for (i = 0; i < procgrid[0]; i++) for (j = 0; j < procgrid[1]; j++) for (k = 0; k < procgrid[2]; k++) { - grid2proc[i][j][k] = k*procgrid[1]*procgrid[0] + j*procgrid[0] + i; + inode = i/coregrid[0]; + jnode = j/coregrid[1]; + knode = k/coregrid[2]; + icore = i - inode*icore; + jcore = j - jnode*jcore; + kcore = k - knode*kcore; + if (xyz[0] == 'x' && xyz[1] == 'y' && xyz[2] == 'z') - grid2proc[i][j][k] = k*procgrid[1]*procgrid[0] + j*procgrid[0] + i; + grid2proc[i][j][k] = ncores * + (knode*nodegrid[1]*nodegrid[0] + jnode*nodegrid[0] + inode) + + (kcore*coregrid[1]*coregrid[0] + jcore*coregrid[0] + icore); else if (xyz[0] == 'x' && xyz[1] == 'z' && xyz[2] == 'y') - grid2proc[i][j][k] = j*procgrid[2]*procgrid[0] + k*procgrid[0] + i; + grid2proc[i][j][k] = ncores * + (jnode*nodegrid[2]*nodegrid[0] + knode*nodegrid[0] + inode) + + (jcore*coregrid[2]*coregrid[0] + kcore*coregrid[0] + icore); else if (xyz[0] == 'y' && xyz[1] == 'x' && xyz[2] == 'z') - grid2proc[i][j][k] = k*procgrid[0]*procgrid[1] + i*procgrid[1] + j; + grid2proc[i][j][k] = ncores * + (knode*nodegrid[0]*nodegrid[1] + inode*nodegrid[1] + jnode) + + (kcore*coregrid[0]*coregrid[1] + icore*coregrid[1] + jcore); else if (xyz[0] == 'y' && xyz[1] == 'z' && xyz[2] == 'x') - grid2proc[i][j][k] = i*procgrid[2]*procgrid[1] + k*procgrid[1] + j; + grid2proc[i][j][k] = ncores * + (inode*nodegrid[2]*nodegrid[1] + knode*nodegrid[1] + jnode) + + (icore*coregrid[2]*coregrid[1] + kcore*coregrid[1] + jcore); else if (xyz[0] == 'z' && xyz[1] == 'x' && xyz[2] == 'y') - grid2proc[i][j][k] = j*procgrid[0]*procgrid[2] + i*procgrid[2] + k; + grid2proc[i][j][k] = ncores * + (jnode*nodegrid[0]*nodegrid[2] + inode*nodegrid[2] + knode) + + (jcore*coregrid[0]*coregrid[2] + icore*coregrid[2] + kcore); else if (xyz[0] == 'z' && xyz[1] == 'y' && xyz[2] == 'x') - grid2proc[i][j][k] = i*procgrid[1]*procgrid[2] + j*procgrid[2] + k; + grid2proc[i][j][k] = ncores * + (inode*nodegrid[1]*nodegrid[2] + jnode*nodegrid[2] + knode) + + (icore*coregrid[1]*coregrid[2] + jcore*coregrid[2] + kcore); if (grid2proc[i][j][k] == me) { myloc[0] = i; myloc[1] = j, myloc[2] = k; } } + // proc IDs of neighbors + int minus,plus; grid_shift(myloc[0],procgrid[0],minus,plus); procneigh[0][0] = grid2proc[minus][myloc[1]][myloc[2]]; @@ -478,7 +518,7 @@ void ProcMap::xyz_map(char *xyz, int *procgrid, int *coregrid, map processors to 3d grid in 2-level NUMA ordering ------------------------------------------------------------------------- */ -void ProcMap::numa_map(int *numagrid, +void ProcMap::numa_map(int reorder, int *numagrid, int *myloc, int procneigh[3][2], int ***grid2proc) { // setup a per node communicator and find rank within @@ -502,9 +542,7 @@ void ProcMap::numa_map(int *numagrid, MPI_Comm_split(world,numa_rank,0,&numa_leaders); // use the MPI Cartesian routines to map the nodes to the grid - // could implement xyz mapflag as in non-NUMA case? - int reorder = 0; int periods[3]; periods[0] = periods[1] = periods[2] = 1; MPI_Comm cartesian; @@ -581,6 +619,8 @@ void ProcMap::custom_map(int *procgrid, } } + // proc IDs of neighbors + int minus,plus; grid_shift(myloc[0],procgrid[0],minus,plus); procneigh[0][0] = grid2proc[minus][myloc[1]][myloc[2]]; @@ -698,39 +738,83 @@ int ProcMap::factor(int n, int **factors) } /* ---------------------------------------------------------------------- + create N1*N2 new factors (procs) from factors1 (nodes) and factors2 (cores) + store index of corresponding core factors in factors[][3] ------------------------------------------------------------------------- */ int ProcMap::combine_factors(int n1, int **factors1, int n2, int **factors2, int **factors) { int m = 0; - + for (int i = 0; i < n1; i++) + for (int j = 0; j < n2; j++) { + factors[m][0] = factors1[i][0]*factors2[j][0]; + factors[m][1] = factors1[i][1]*factors2[j][1]; + factors[m][2] = factors1[i][2]*factors2[j][2]; + factors[m][3] = j; + m++; + } return n1*n2; } /* ---------------------------------------------------------------------- + remove any factors where Pz != 1 for 2d ------------------------------------------------------------------------- */ int ProcMap::cull_2d(int n, int **factors, int m) { - return 0; + int i = 0; + while (i < n) { + if (factors[i][2] != 1) { + for (int j = 0; j < m; j++) factors[i][j] = factors[n-1][j]; + n--; + } else i++; + } + return n; } /* ---------------------------------------------------------------------- + remove any factors that do not match non-zero user_factors Px,Py,Pz ------------------------------------------------------------------------- */ int ProcMap::cull_user(int n, int **factors, int m, int *user_factors) { - return 0; + int i = 0; + while (i < n) { + int flag = 0; + if (user_factors[0] && factors[i][0] != user_factors[0]) flag = 1; + if (user_factors[1] && factors[i][1] != user_factors[1]) flag = 1; + if (user_factors[2] && factors[i][2] != user_factors[2]) flag = 1; + if (flag) { + for (int j = 0; j < m; j++) factors[i][j] = factors[n-1][j]; + n--; + } else i++; + } + return n; } /* ---------------------------------------------------------------------- + remove any factors that do not match settings from other partition + MULTIPLE = other Px,Py,Pz must be multiple of my Px,Py,Pz ------------------------------------------------------------------------- */ int ProcMap::cull_other(int n, int **factors, int m, int other_style, int *other_grid) { - return 0; + int i = 0; + while (i < n) { + if (other_style == MULTIPLE) { + int flag = 0; + if (other_grid[0] % factors[i][0]) flag = 1; + if (other_grid[1] % factors[i][1]) flag = 1; + if (other_grid[2] % factors[i][2]) flag = 1; + if (flag) { + for (int j = 0; j < m; j++) factors[i][j] = factors[n-1][j]; + n--; + } else i++; + } + } + return n; } /* ---------------------------------------------------------------------- diff --git a/src/procmap.h b/src/procmap.h index 2704a4e339..8884f70770 100644 --- a/src/procmap.h +++ b/src/procmap.h @@ -27,18 +27,18 @@ class ProcMap : protected Pointers { void numa_grid(int, int *, int *, int *); void custom_grid(char *, int, int *, int *); void cart_map(int, int *, int *, int [3][2], int ***); - void cart_map(int, int *, int *, int *, int [3][2], int ***); + void cart_map(int, int *, int, int *, int *, int [3][2], int ***); void xyz_map(char *, int *, int *, int [3][2], int ***); - void xyz_map(char *, int *, int *, int *, int [3][2], int ***); - void numa_map(int *, int *, int [3][2], int ***); + void xyz_map(char *, int *, int, int *, int *, int [3][2], int ***); + void numa_map(int, int *, int *, int [3][2], int ***); void custom_map(int *, int *, int [3][2], int ***); void output(char *, int *, int ***); private: - int nodegrid[3]; // NUMA params - int node_id; - int procs_per_node; + int procs_per_node; // NUMA params int procs_per_numa; + int node_id; // which node I am in + int nodegrid[3]; // 3d grid of nodes int **cmap; // info in custom grid file