diff --git a/src/image.cpp b/src/image.cpp index 4b181ee8b0..0acef0bceb 100644 --- a/src/image.cpp +++ b/src/image.cpp @@ -113,6 +113,11 @@ Image::Image(LAMMPS *lmp, int nmap_caller) : Pointers(lmp) backLightColor[2] = 0.9; random = nullptr; + + // MPI_Gatherv vectors + + recvcounts = nullptr; + displs = nullptr; } /* ---------------------------------------------------------------------- */ @@ -134,6 +139,9 @@ Image::~Image() memory->destroy(rgbcopy); if (random) delete random; + + memory->destroy(recvcounts); + memory->destroy(displs); } /* ---------------------------------------------------------------------- @@ -334,16 +342,37 @@ void Image::merge() // extra SSAO enhancement // bcast full image to all procs // each works on subset of pixels - // gather result back to proc 0 + // MPI_Gather() result back to proc 0 + // use Gatherv() if subset of pixels is not the same size on every proc if (ssao) { MPI_Bcast(imageBuffer,npixels*3,MPI_BYTE,0,world); MPI_Bcast(surfaceBuffer,npixels*2,MPI_DOUBLE,0,world); MPI_Bcast(depthBuffer,npixels,MPI_DOUBLE,0,world); compute_SSAO(); - int pixelPart = height/nprocs * width*3; - MPI_Gather(imageBuffer+me*pixelPart,pixelPart,MPI_BYTE, - rgbcopy,pixelPart,MPI_BYTE,0,world); + + int pixelstart = 3 * static_cast (1.0*me/nprocs * npixels); + int pixelstop = 3 * static_cast (1.0*(me+1)/nprocs * npixels); + int mypixels = pixelstop - pixelstart; + + if (npixels % nprocs == 0) { + MPI_Gather(imageBuffer+pixelstart,mypixels,MPI_BYTE, + rgbcopy,mypixels,MPI_BYTE,0,world); + + } else { + if (recvcounts == nullptr) { + memory->create(recvcounts,nprocs,"image:recvcounts"); + memory->create(displs,nprocs,"image:displs"); + MPI_Allgather(&mypixels,1,MPI_INT,recvcounts,1,MPI_INT,world); + displs[0] = 0; + for (int i = 1; i < nprocs; i++) + displs[i] = displs[i-1] + recvcounts[i-1]; + } + + MPI_Gatherv(imageBuffer+pixelstart,mypixels,MPI_BYTE, + rgbcopy,recvcounts,displs,MPI_BYTE,0,world); + } + writeBuffer = rgbcopy; } else { writeBuffer = imageBuffer; @@ -880,110 +909,117 @@ void Image::compute_SSAO() -tanPerPixel / zoom; int pixelRadius = (int) trunc (SSAORadius / pixelWidth + 0.5); - int x,y,s; - int hPart = height / nprocs; - int index = me * hPart * width; - for (y = me * hPart; y < (me + 1) * hPart; y ++) { - for (x = 0; x < width; x ++, index ++) { - double cdepth = depthBuffer[index]; - if (cdepth < 0) { continue; } + // each proc is assigned a subset of contiguous pixels from the full image + // pixels are contiguous in x (columns within a row), then by row + // index = pixels from 0 to npixel-1 + // x = column # from 0 to width-1 + // y = row # from 0 to height-1 - double sx = surfaceBuffer[index * 2 + 0]; - double sy = surfaceBuffer[index * 2 + 1]; - double sin_t = -sqrt(sx*sx + sy*sy); + int pixelstart = static_cast (1.0*me/nprocs * npixels); + int pixelstop = static_cast (1.0*(me+1)/nprocs * npixels); - double mytheta = random->uniform() * SSAOJitter; - double ao = 0.0; + for (int index = pixelstart; index < pixelstop; index++) { + int x = index % width; + int y = index / width; - for (s = 0; s < SSAOSamples; s ++) { - double hx = cos(mytheta); - double hy = sin(mytheta); - mytheta += delTheta; + double cdepth = depthBuffer[index]; + if (cdepth < 0) { continue; } - // multiply by z cross surface tangent - // so that dot (aka cos) works here + double sx = surfaceBuffer[index * 2 + 0]; + double sy = surfaceBuffer[index * 2 + 1]; + double sin_t = -sqrt(sx*sx + sy*sy); - double scaled_sin_t = sin_t * (hx*sy + hy*sx); + double mytheta = random->uniform() * SSAOJitter; + double ao = 0.0; - // Bresenham's line algorithm to march over depthBuffer + for (int s = 0; s < SSAOSamples; s ++) { + double hx = cos(mytheta); + double hy = sin(mytheta); + mytheta += delTheta; - int dx = static_cast (hx * pixelRadius); - int dy = static_cast (hy * pixelRadius); - int ex = x + dx; - if (ex < 0) { ex = 0; } if (ex >= width) { ex = width - 1; } - int ey = y + dy; - if (ey < 0) { ey = 0; } if (ey >= height) { ey = height - 1; } - double delta; - int small, large; - double lenIncr; - if (fabs(hx) > fabs(hy)) { - small = (hx > 0) ? 1 : -1; - large = (hy > 0) ? width : -width; - delta = fabs(hy / hx); - } else { - small = (hy > 0) ? width : -width; - large = (hx > 0) ? 1 : -1; - delta = fabs(hx / hy); + // multiply by z cross surface tangent + // so that dot (aka cos) works here + + double scaled_sin_t = sin_t * (hx*sy + hy*sx); + + // Bresenham's line algorithm to march over depthBuffer + + int dx = static_cast (hx * pixelRadius); + int dy = static_cast (hy * pixelRadius); + int ex = x + dx; + if (ex < 0) { ex = 0; } if (ex >= width) { ex = width - 1; } + int ey = y + dy; + if (ey < 0) { ey = 0; } if (ey >= height) { ey = height - 1; } + double delta; + int small, large; + double lenIncr; + if (fabs(hx) > fabs(hy)) { + small = (hx > 0) ? 1 : -1; + large = (hy > 0) ? width : -width; + delta = fabs(hy / hx); + } else { + small = (hy > 0) ? width : -width; + large = (hx > 0) ? 1 : -1; + delta = fabs(hx / hy); + } + lenIncr = sqrt (1 + delta * delta) * pixelWidth; + + // initialize with one step + // because the center point doesn't need testing + + int end = ex + ey * width; + int ind = index + small; + double len = lenIncr; + double err = delta; + if (err >= 1.0) { + ind += large; + err -= 1.0; + } + + double minPeak = -1; + double peakLen = 0.0; + int stepsTaken = 1; + while ((small > 0 && ind <= end) || (small < 0 && ind >= end)) { + if (ind < 0 || ind >= (width*height)) { + break; } - lenIncr = sqrt (1 + delta * delta) * pixelWidth; - // initialize with one step - // because the center point doesn't need testing + // cdepth - depthBuffer B/C we want it in the negative z direction - int end = ex + ey * width; - int ind = index + small; - double len = lenIncr; - double err = delta; + if (minPeak < 0 || (depthBuffer[ind] >= 0 && + depthBuffer[ind] < minPeak)) { + minPeak = depthBuffer[ind]; + peakLen = len; + } + ind += small; + len += lenIncr; + err += delta; if (err >= 1.0) { ind += large; err -= 1.0; } - - double minPeak = -1; - double peakLen = 0.0; - int stepsTaken = 1; - while ((small > 0 && ind <= end) || (small < 0 && ind >= end)) { - if (ind < 0 || ind >= (width*height)) { - break; - } - - // cdepth - depthBuffer B/C we want it in the negative z direction - - if (minPeak < 0 || (depthBuffer[ind] >= 0 && - depthBuffer[ind] < minPeak)) { - minPeak = depthBuffer[ind]; - peakLen = len; - } - ind += small; - len += lenIncr; - err += delta; - if (err >= 1.0) { - ind += large; - err -= 1.0; - } - stepsTaken ++; - } - - if (peakLen > 0) { - double h = atan ((cdepth - minPeak) / peakLen); - ao += saturate(sin (h) - scaled_sin_t); - } else { - ao += saturate(-scaled_sin_t); - } + stepsTaken ++; } - ao /= (double)SSAOSamples; - double c[3]; - c[0] = (double) (*(unsigned char *) &imageBuffer[index * 3 + 0]); - c[1] = (double) (*(unsigned char *) &imageBuffer[index * 3 + 1]); - c[2] = (double) (*(unsigned char *) &imageBuffer[index * 3 + 2]); - c[0] *= (1.0 - ao); - c[1] *= (1.0 - ao); - c[2] *= (1.0 - ao); - imageBuffer[index * 3 + 0] = (int) c[0]; - imageBuffer[index * 3 + 1] = (int) c[1]; - imageBuffer[index * 3 + 2] = (int) c[2]; + if (peakLen > 0) { + double h = atan ((cdepth - minPeak) / peakLen); + ao += saturate(sin (h) - scaled_sin_t); + } else { + ao += saturate(-scaled_sin_t); + } } + ao /= (double)SSAOSamples; + + double c[3]; + c[0] = (double) (*(unsigned char *) &imageBuffer[index * 3 + 0]); + c[1] = (double) (*(unsigned char *) &imageBuffer[index * 3 + 1]); + c[2] = (double) (*(unsigned char *) &imageBuffer[index * 3 + 2]); + c[0] *= (1.0 - ao); + c[1] *= (1.0 - ao); + c[2] *= (1.0 - ao); + imageBuffer[index * 3 + 0] = (int) c[0]; + imageBuffer[index * 3 + 1] = (int) c[1]; + imageBuffer[index * 3 + 2] = (int) c[2]; } } diff --git a/src/image.h b/src/image.h index 7df81425d9..1de455d4bd 100644 --- a/src/image.h +++ b/src/image.h @@ -73,6 +73,10 @@ class Image : protected Pointers { double *depthcopy,*surfacecopy; unsigned char *imageBuffer,*rgbcopy,*writeBuffer; + // MPI_Gatherv + + int *recvcounts,*displs; + // constant view params double FOV;