diff --git a/src/irregular.cpp b/src/irregular.cpp index 3947001541..fe08e04be8 100644 --- a/src/irregular.cpp +++ b/src/irregular.cpp @@ -297,11 +297,9 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag) // setup for collective comm // work1 = 1 for procs I send a message to, not including self - // work2 = 1 for all procs, used for ReduceScatter for (i = 0; i < nprocs; i++) { work1[i] = 0; - work2[i] = 1; } for (i = 0; i < n; i++) work1[proclist[i]] = 1; work1[me] = 0; @@ -318,7 +316,7 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag) MPI_Allreduce(work1,work2,nprocs,MPI_INT,MPI_SUM,world); nrecv_proc = work2[me]; #else - MPI_Reduce_scatter(work1,&nrecv_proc,work2,MPI_INT,MPI_SUM,world); + MPI_Reduce_scatter_block(work1,&nrecv_proc,1,MPI_INT,MPI_SUM,world); #endif #endif @@ -395,7 +393,9 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag) sendmax_proc = 0; for (i = 0; i < nsend_proc; i++) { - MPI_Send(&length_send[i],1,MPI_INT,proc_send[i],0,world); + MPI_Request tmpReq; // Use non-blocking send to avoid possible deadlock + MPI_Isend(&length_send[i],1,MPI_INT,proc_send[i],0,world,&tmpReq); + MPI_Request_free(&tmpReq); // the MPI_Barrier below marks completion sendmax_proc = MAX(sendmax_proc,length_send[i]); } @@ -543,11 +543,9 @@ int Irregular::create_data(int n, int *proclist, int sortflag) // setup for collective comm // work1 = 1 for procs I send a message to, not including self - // work2 = 1 for all procs, used for ReduceScatter for (i = 0; i < nprocs; i++) { work1[i] = 0; - work2[i] = 1; } for (i = 0; i < n; i++) work1[proclist[i]] = 1; work1[me] = 0; @@ -564,7 +562,7 @@ int Irregular::create_data(int n, int *proclist, int sortflag) MPI_Allreduce(work1,work2,nprocs,MPI_INT,MPI_SUM,world); nrecv_proc = work2[me]; #else - MPI_Reduce_scatter(work1,&nrecv_proc,work2,MPI_INT,MPI_SUM,world); + MPI_Reduce_scatter_block(work1,&nrecv_proc,1,MPI_INT,MPI_SUM,world); #endif #endif @@ -641,7 +639,9 @@ int Irregular::create_data(int n, int *proclist, int sortflag) sendmax_proc = 0; for (i = 0; i < nsend_proc; i++) { - MPI_Send(&num_send[i],1,MPI_INT,proc_send[i],0,world); + MPI_Request tmpReq; // Use non-blocking send to avoid possible deadlock + MPI_Isend(&num_send[i],1,MPI_INT,proc_send[i],0,world,&tmpReq); + MPI_Request_free(&tmpReq); // the MPI_Barrier below marks completion sendmax_proc = MAX(sendmax_proc,num_send[i]); }