813 lines
23 KiB
C++
813 lines
23 KiB
C++
// clang-format off
|
|
/* ----------------------------------------------------------------------
|
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
|
https://www.lammps.org/ Sandia National Laboratories
|
|
LAMMPS development team: developers@lammps.org
|
|
|
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
|
certain rights in this software. This software is distributed under
|
|
the GNU General Public License.
|
|
|
|
See the README file in the top-level LAMMPS directory.
|
|
------------------------------------------------------------------------- */
|
|
|
|
#include "amoeba_convolution.h"
|
|
|
|
#include "comm.h"
|
|
#include "fft3d_wrap.h"
|
|
#include "grid3d.h"
|
|
#include "memory.h"
|
|
#include "neighbor.h"
|
|
#include "remap_wrap.h"
|
|
#include "timer.h"
|
|
|
|
#include <cstring>
|
|
|
|
using namespace LAMMPS_NS;
|
|
|
|
// DEBUG
|
|
|
|
#define DEBUG_AMOEBA 0
|
|
#if DEBUG_AMOEBA
|
|
char *labels[7] =
|
|
{(char *) "MPOLE_GRID", (char *) "POLAR_GRID",
|
|
(char *) "POLAR_GRIDC", (char *) "DISP_GRID",
|
|
(char *) "INDUCE_GRID", (char *) "INDUCE_GRIDC"};
|
|
|
|
enum{GRIDBRICK_OUT,GRIDBRICK_IN,FFT,CFFT1,CFFT2};
|
|
#endif
|
|
// END DEBUG
|
|
|
|
enum{MPOLE_GRID,POLAR_GRID,POLAR_GRIDC,DISP_GRID,INDUCE_GRID,INDUCE_GRIDC};
|
|
|
|
//#define SCALE 1
|
|
#define SCALE 0
|
|
|
|
static constexpr FFT_SCALAR ZEROF = 0.0;
|
|
|
|
/* ----------------------------------------------------------------------
|
|
partition an FFT grid across processors
|
|
both for a brick and FFT x pencil decomposition
|
|
nx,nz,nz = global FFT grid size
|
|
order = size of stencil in each dimension that maps atoms to grid
|
|
adapted from PPPM::set_grid_local()
|
|
------------------------------------------------------------------------- */
|
|
|
|
AmoebaConvolution::AmoebaConvolution(LAMMPS *lmp, Pair *pair,
|
|
int nx_caller, int ny_caller, int nz_caller,
|
|
int order_caller, int which_caller) :
|
|
Pointers(lmp)
|
|
{
|
|
amoeba = pair;
|
|
nx = nx_caller;
|
|
ny = ny_caller;
|
|
nz = nz_caller;
|
|
order = order_caller;
|
|
which = which_caller;
|
|
|
|
flag3d = 1;
|
|
if (which == POLAR_GRIDC || which == INDUCE_GRIDC) flag3d = 0;
|
|
|
|
nfft_global = (bigint) nx * ny * nz;
|
|
|
|
allocate_grid();
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
free all memory
|
|
------------------------------------------------------------------------- */
|
|
|
|
AmoebaConvolution::~AmoebaConvolution()
|
|
{
|
|
deallocate_grid();
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
subset of FFT grids assigned to each proc may have changed
|
|
called by load balancer when proc subdomains are adjusted
|
|
------------------------------------------------------------------------- */
|
|
|
|
void AmoebaConvolution::reset_grid()
|
|
{
|
|
deallocate_grid();
|
|
allocate_grid();
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
allocate all local grid data structs: FFT, Grid3d, Remap
|
|
------------------------------------------------------------------------- */
|
|
|
|
void AmoebaConvolution::allocate_grid()
|
|
{
|
|
// maxdist = max distance outside of subbox my owned atom may be
|
|
// nlower,nupper = stencil size for mapping particles to FFT grid
|
|
|
|
double maxdist = 0.5*neighbor->skin;
|
|
int nlower = -(order-1)/2;
|
|
int nupper = order/2;
|
|
|
|
// Grid3d determines my owned + ghost grid cells
|
|
// ghost cell extent depends on maxdist, nlower, nupper
|
|
|
|
gc = new Grid3d(lmp,world,nx,ny,nz);
|
|
gc->set_distance(maxdist);
|
|
gc->set_stencil_atom(-nlower,nupper);
|
|
|
|
gc->setup_grid(nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
|
|
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out);
|
|
|
|
int nqty = flag3d ? 1 : 2;
|
|
int ngc_buf1,ngc_buf2;
|
|
|
|
gc->setup_comm(ngc_buf1,ngc_buf2);
|
|
memory->create(gc_buf1,nqty*ngc_buf1,"amoeba:gc_buf1");
|
|
memory->create(gc_buf2,nqty*ngc_buf2,"amoeba:gc_buf2");
|
|
|
|
// x-pencil decomposition of FFT mesh
|
|
// global indices range from 0 to N-1
|
|
// each proc owns entire x-dimension, clumps of columns in y,z dimensions
|
|
// npey_fft,npez_fft = # of procs in y,z dims
|
|
// if nprocs is small enough, proc can own 1 or more entire xy planes,
|
|
// else proc owns 2d sub-blocks of yz plane
|
|
// me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
|
|
// nlo_fft,nhi_fft = lower/upper limit of the section
|
|
// of the global FFT mesh that I own in x-pencil decomposition
|
|
|
|
int me = comm->me;
|
|
int nprocs = comm->nprocs;
|
|
|
|
int npey_fft,npez_fft;
|
|
if (nz >= nprocs) {
|
|
npey_fft = 1;
|
|
npez_fft = nprocs;
|
|
} else procs2grid2d(nprocs,ny,nz,npey_fft,npez_fft);
|
|
|
|
int me_y = me % npey_fft;
|
|
int me_z = me / npey_fft;
|
|
|
|
nxlo_fft = 0;
|
|
nxhi_fft = nx - 1;
|
|
nylo_fft = me_y*ny/npey_fft;
|
|
nyhi_fft = (me_y+1)*ny/npey_fft - 1;
|
|
nzlo_fft = me_z*nz/npez_fft;
|
|
nzhi_fft = (me_z+1)*nz/npez_fft - 1;
|
|
|
|
// FFT grid sizes
|
|
// nbrick_owned = owned grid points in brick decomp
|
|
// nbrick_ghosts = owned + ghost grid points in grid decomp
|
|
// nfft_owned = owned grid points in FFT decomp
|
|
// ngrid_either = max of nbrick_onwed and nfft_owned
|
|
// nfft = total FFT grid points
|
|
|
|
nbrick_owned = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) *
|
|
(nzhi_in-nzlo_in+1);
|
|
nbrick_ghosts = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
|
|
(nzhi_out-nzlo_out+1);
|
|
nfft_owned = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) *
|
|
(nzhi_fft-nzlo_fft+1);
|
|
|
|
ngrid_either = MAX(nbrick_owned,nfft_owned);
|
|
|
|
// instantiate FFT and Remap
|
|
|
|
int tmp;
|
|
|
|
fft1 = new FFT3d(lmp,world,nx,ny,nz,
|
|
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
|
|
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
|
|
1,0,&tmp,0);
|
|
|
|
fft2 = new FFT3d(lmp,world,nx,ny,nz,
|
|
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
|
|
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
|
|
0,0,&tmp,0);
|
|
|
|
remap = new Remap(lmp,world,
|
|
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
|
|
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
|
|
nqty,0,0,FFT_PRECISION,0);
|
|
|
|
// memory allocations
|
|
|
|
if (flag3d) {
|
|
memory->create3d_offset(grid_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
|
|
nxlo_out,nxhi_out,"amoeba:grid_brick");
|
|
grid_brick_start = &grid_brick[nzlo_out][nylo_out][nxlo_out];
|
|
cgrid_brick = nullptr;
|
|
} else {
|
|
memory->create4d_offset_last(cgrid_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
|
|
nxlo_out,nxhi_out,2,"amoeba:cgrid_brick");
|
|
grid_brick_start = &cgrid_brick[nzlo_out][nylo_out][nxlo_out][0];
|
|
grid_brick = nullptr;
|
|
}
|
|
|
|
memory->create(grid_fft,ngrid_either,"amoeba:grid_fft");
|
|
memory->create(cfft,2*ngrid_either,"amoeba:cfft");
|
|
memory->create(remap_buf,nqty*nfft_owned,"amoeba:remap_buf");
|
|
}
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
void AmoebaConvolution::deallocate_grid()
|
|
{
|
|
delete gc;
|
|
|
|
memory->destroy(gc_buf1);
|
|
memory->destroy(gc_buf2);
|
|
|
|
delete fft1;
|
|
delete fft2;
|
|
delete remap;
|
|
|
|
memory->destroy3d_offset(grid_brick,nzlo_out,nylo_out,nxlo_out);
|
|
memory->destroy4d_offset_last(cgrid_brick,nzlo_out,nylo_out,nxlo_out);
|
|
memory->destroy(grid_fft);
|
|
memory->destroy(cfft);
|
|
memory->destroy(remap_buf);
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
zero brick grid, including ghosts
|
|
can be 3d real or 4d complex array
|
|
return pointer to data in brick grid, caller casts to 3d or 4d
|
|
------------------------------------------------------------------------- */
|
|
|
|
void *AmoebaConvolution::zero()
|
|
{
|
|
if (flag3d) return zero_3d();
|
|
return zero_4d();
|
|
}
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
void *AmoebaConvolution::zero_3d()
|
|
{
|
|
if (!grid_brick) return nullptr;
|
|
memset(&(grid_brick[nzlo_out][nylo_out][nxlo_out]),0,
|
|
nbrick_ghosts*sizeof(FFT_SCALAR));
|
|
return (void *) grid_brick;
|
|
}
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
void *AmoebaConvolution::zero_4d()
|
|
{
|
|
if (!cgrid_brick) return nullptr;
|
|
memset(&(cgrid_brick[nzlo_out][nylo_out][nxlo_out][0]),0,
|
|
2*nbrick_ghosts*sizeof(FFT_SCALAR));
|
|
return (void *) cgrid_brick;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
perform pre-convolution grid operations
|
|
can be 3d real or 4d complex array
|
|
return pointer to complex cfft vector
|
|
------------------------------------------------------------------------- */
|
|
|
|
FFT_SCALAR *AmoebaConvolution::pre_convolution()
|
|
{
|
|
if (flag3d) return pre_convolution_3d();
|
|
return pre_convolution_4d();
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
perform pre-convolution grid operations for 3d grid_brick array
|
|
------------------------------------------------------------------------- */
|
|
|
|
FFT_SCALAR *AmoebaConvolution::pre_convolution_3d()
|
|
{
|
|
int ix,iy,iz,n;
|
|
|
|
// reverse comm for 3d brick grid + ghosts
|
|
|
|
#if DEBUG_AMOEBA
|
|
debug_scalar(GRIDBRICK_OUT,"PRE Convo / PRE Grid3d");
|
|
#endif
|
|
|
|
gc->reverse_comm(Grid3d::PAIR,amoeba,which,1,sizeof(FFT_SCALAR),
|
|
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
|
|
|
#if DEBUG_AMOEBA
|
|
debug_scalar(GRIDBRICK_IN,"PRE Convo / POST Grid3d");
|
|
debug_file(GRIDBRICK_IN,"pre.convo.post.grid3d");
|
|
#endif
|
|
|
|
// copy owned 3d brick grid values to FFT grid
|
|
|
|
n = 0;
|
|
for (iz = nzlo_in; iz <= nzhi_in; iz++)
|
|
for (iy = nylo_in; iy <= nyhi_in; iy++)
|
|
for (ix = nxlo_in; ix <= nxhi_in; ix++)
|
|
grid_fft[n++] = grid_brick[iz][iy][ix];
|
|
|
|
// remap FFT grid from brick to x pencil partitioning
|
|
|
|
remap->perform(grid_fft,grid_fft,remap_buf);
|
|
|
|
#if DEBUG_AMOEBA
|
|
debug_scalar(FFT,"PRE Convo / POST Remap");
|
|
debug_file(FFT,"pre.convo.post.remap");
|
|
#endif
|
|
|
|
// copy real values into complex grid
|
|
|
|
n = 0;
|
|
for (int i = 0; i < nfft_owned; i++) {
|
|
cfft[n++] = grid_fft[i];
|
|
cfft[n++] = ZEROF;
|
|
}
|
|
|
|
double time0,time1;
|
|
|
|
if (timer->has_sync()) MPI_Barrier(world);
|
|
time0 = platform::walltime();
|
|
|
|
// perform forward FFT
|
|
|
|
fft1->compute(cfft,cfft,FFT3d::FORWARD);
|
|
time1 = platform::walltime();
|
|
|
|
if (SCALE) {
|
|
FFT_SCALAR scale = 1.0/nfft_global;
|
|
for (int i = 0; i < 2*nfft_owned; i++) cfft[i] *= scale;
|
|
}
|
|
|
|
time_fft += time1 - time0;
|
|
|
|
#if DEBUG_AMOEBA
|
|
debug_scalar(CFFT1,"PRE Convo / POST FFT");
|
|
debug_file(CFFT1,"pre.convo.post.fft");
|
|
#endif
|
|
return cfft;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
perform pre-convolution grid operations for 4d cgrid_brick array
|
|
------------------------------------------------------------------------- */
|
|
|
|
FFT_SCALAR *AmoebaConvolution::pre_convolution_4d()
|
|
{
|
|
int ix,iy,iz,n;
|
|
|
|
// reverse comm for 4d brick grid + ghosts
|
|
|
|
#if DEBUG_AMOEBA
|
|
debug_scalar(GRIDBRICK_OUT,"PRE Convo / PRE Grid3d");
|
|
#endif
|
|
|
|
gc->reverse_comm(Grid3d::PAIR,amoeba,which,2,sizeof(FFT_SCALAR),
|
|
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
|
|
|
#if DEBUG_AMOEBA
|
|
debug_scalar(GRIDBRICK_IN,"PRE Convo / POST Grid3d");
|
|
debug_file(GRIDBRICK_IN,"pre.convo.post.grid3d");
|
|
#endif
|
|
// copy owned 4d brick grid values to FFT grid
|
|
|
|
n = 0;
|
|
for (iz = nzlo_in; iz <= nzhi_in; iz++)
|
|
for (iy = nylo_in; iy <= nyhi_in; iy++)
|
|
for (ix = nxlo_in; ix <= nxhi_in; ix++) {
|
|
cfft[n++] = cgrid_brick[iz][iy][ix][0];
|
|
cfft[n++] = cgrid_brick[iz][iy][ix][1];
|
|
}
|
|
|
|
// remap FFT grid from brick to x pencil partitioning
|
|
// NOTE: could just setup FFT to start from brick decomp and skip remap
|
|
|
|
remap->perform(cfft,cfft,remap_buf);
|
|
|
|
#if DEBUG_AMOEBA
|
|
debug_scalar(FFT,"PRE Convo / POST Remap");
|
|
debug_file(FFT,"pre.convo.post.remap");
|
|
#endif
|
|
|
|
double time0,time1;
|
|
|
|
if (timer->has_sync()) MPI_Barrier(world);
|
|
time0 = platform::walltime();
|
|
|
|
// perform forward FFT
|
|
|
|
fft1->compute(cfft,cfft,FFT3d::FORWARD);
|
|
time1 = platform::walltime();
|
|
|
|
if (SCALE) {
|
|
FFT_SCALAR scale = 1.0/nfft_global;
|
|
for (int i = 0; i < 2*nfft_owned; i++) cfft[i] *= scale;
|
|
}
|
|
|
|
time_fft += time1 - time0;
|
|
|
|
#if DEBUG_AMOEBA
|
|
debug_scalar(CFFT1,"PRE Convo / POST FFT");
|
|
debug_file(CFFT1,"pre.convo.post.fft");
|
|
#endif
|
|
return cfft;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
perform post-convolution grid operations
|
|
can be 3d real or 4d complex array
|
|
return pointer to data in brick grid, caller casts to 3d or 4d
|
|
------------------------------------------------------------------------- */
|
|
|
|
void *AmoebaConvolution::post_convolution()
|
|
{
|
|
if (flag3d) return post_convolution_3d();
|
|
return post_convolution_4d();
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
perform post-convolution grid operations for 3d grid_brick array
|
|
------------------------------------------------------------------------- */
|
|
|
|
void *AmoebaConvolution::post_convolution_3d()
|
|
{
|
|
int ix,iy,iz,n;
|
|
|
|
// perform backward FFT
|
|
#if DEBUG_AMOEBA
|
|
debug_scalar(CFFT1,"POST Convo / PRE FFT");
|
|
debug_file(CFFT1,"post.convo.pre.fft");
|
|
#endif
|
|
|
|
double time0,time1;
|
|
|
|
if (timer->has_sync()) MPI_Barrier(world);
|
|
time0 = platform::walltime();
|
|
|
|
fft2->compute(cfft,cfft,FFT3d::BACKWARD);
|
|
time1 = platform::walltime();
|
|
|
|
time_fft += time1 - time0;
|
|
|
|
#if DEBUG_AMOEBA
|
|
debug_scalar(CFFT2,"POST Convo / POST FFT");
|
|
debug_file(CFFT2,"post.convo.post.fft");
|
|
#endif
|
|
// copy real portion of 1d complex values into 3d real grid
|
|
|
|
n = 0;
|
|
for (iz = nzlo_in; iz <= nzhi_in; iz++)
|
|
for (iy = nylo_in; iy <= nyhi_in; iy++)
|
|
for (ix = nxlo_in; ix <= nxhi_in; ix++) {
|
|
grid_brick[iz][iy][ix] = cfft[n];
|
|
n += 2;
|
|
}
|
|
|
|
// forward comm to populate ghost grid values
|
|
|
|
#if DEBUG_AMOEBA
|
|
debug_scalar(GRIDBRICK_IN,"POST Convo / PRE grid3d");
|
|
debug_file(GRIDBRICK_IN,"post.convo.pre.grid3d");
|
|
#endif
|
|
gc->forward_comm(Grid3d::PAIR,amoeba,which,1,sizeof(FFT_SCALAR),
|
|
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
|
|
|
return (void *) grid_brick;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
perform post-convolution grid operations for 4d cgrid_brick array
|
|
------------------------------------------------------------------------- */
|
|
|
|
void *AmoebaConvolution::post_convolution_4d()
|
|
{
|
|
int ix,iy,iz,n;
|
|
|
|
// perform backward FFT
|
|
|
|
#if DEBUG_AMOEBA
|
|
debug_scalar(CFFT1,"POST Convo / PRE FFT");
|
|
debug_file(CFFT1,"post.convo.pre.fft");
|
|
#endif
|
|
|
|
double time0,time1;
|
|
|
|
if (timer->has_sync()) MPI_Barrier(world);
|
|
time0 = platform::walltime();
|
|
|
|
fft2->compute(cfft,cfft,FFT3d::BACKWARD);
|
|
|
|
time1 = platform::walltime();
|
|
|
|
time_fft += time1 - time0;
|
|
|
|
#if DEBUG_AMOEBA
|
|
debug_scalar(CFFT2,"POST Convo / POST FFT");
|
|
debug_file(CFFT2,"post.convo.post.fft");
|
|
#endif
|
|
// copy 1d complex values into 4d complex grid
|
|
|
|
n = 0;
|
|
for (iz = nzlo_in; iz <= nzhi_in; iz++)
|
|
for (iy = nylo_in; iy <= nyhi_in; iy++)
|
|
for (ix = nxlo_in; ix <= nxhi_in; ix++) {
|
|
cgrid_brick[iz][iy][ix][0] = cfft[n++];
|
|
cgrid_brick[iz][iy][ix][1] = cfft[n++];
|
|
}
|
|
|
|
// forward comm to populate ghost grid values
|
|
|
|
#if DEBUG_AMOEBA
|
|
debug_scalar(GRIDBRICK_IN,"POST Convo / PRE grid3d");
|
|
debug_file(GRIDBRICK_IN,"post.convo.pre.grid3d");
|
|
#endif
|
|
gc->forward_comm(Grid3d::PAIR,amoeba,which,2,sizeof(FFT_SCALAR),
|
|
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
|
|
|
return (void *) cgrid_brick;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
map nprocs to NX by NY grid as PX by PY procs - return optimal px,py
|
|
copy of PPPM::procs2grid2d()
|
|
------------------------------------------------------------------------- */
|
|
|
|
void AmoebaConvolution::procs2grid2d(int nprocs, int nx, int ny, int &px, int &py)
|
|
{
|
|
// loop thru all possible factorizations of nprocs
|
|
// surf = surface area of largest proc sub-domain
|
|
// innermost if test minimizes surface area and surface/volume ratio
|
|
|
|
int bestsurf = 2 * (nx + ny);
|
|
int bestboxx = 0;
|
|
int bestboxy = 0;
|
|
|
|
int boxx,boxy,surf,ipx,ipy;
|
|
|
|
ipx = 1;
|
|
while (ipx <= nprocs) {
|
|
if (nprocs % ipx == 0) {
|
|
ipy = nprocs/ipx;
|
|
boxx = nx/ipx;
|
|
if (nx % ipx) boxx++;
|
|
boxy = ny/ipy;
|
|
if (ny % ipy) boxy++;
|
|
surf = boxx + boxy;
|
|
if (surf < bestsurf ||
|
|
(surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) {
|
|
bestsurf = surf;
|
|
bestboxx = boxx;
|
|
bestboxy = boxy;
|
|
px = ipx;
|
|
py = ipy;
|
|
}
|
|
}
|
|
ipx++;
|
|
}
|
|
}
|
|
|
|
#if DEBUG_AMOEBA
|
|
/* ----------------------------------------------------------------------
|
|
output a scalar value to screen
|
|
array = which array is being summed over
|
|
---------------------------------------------------------------------- */
|
|
|
|
void AmoebaConvolution::debug_scalar(int array, const char *label)
|
|
{
|
|
double sum = 0.0;
|
|
|
|
if (array == GRIDBRICK_OUT) {
|
|
if (flag3d) {
|
|
for (int iz = nzlo_out; iz <= nzhi_out; iz++)
|
|
for (int iy = nylo_out; iy <= nyhi_out; iy++)
|
|
for (int ix = nxlo_out; ix <= nxhi_out; ix++)
|
|
sum += grid_brick[iz][iy][ix];
|
|
} else {
|
|
for (int iz = nzlo_out; iz <= nzhi_out; iz++)
|
|
for (int iy = nylo_out; iy <= nyhi_out; iy++)
|
|
for (int ix = nxlo_out; ix <= nxhi_out; ix++) {
|
|
sum += cgrid_brick[iz][iy][ix][0];
|
|
sum += cgrid_brick[iz][iy][ix][1];
|
|
}
|
|
}
|
|
}
|
|
|
|
if (array == GRIDBRICK_IN) {
|
|
if (flag3d) {
|
|
for (int iz = nzlo_in; iz <= nzhi_in; iz++)
|
|
for (int iy = nylo_in; iy <= nyhi_in; iy++)
|
|
for (int ix = nxlo_in; ix <= nxhi_in; ix++)
|
|
sum += grid_brick[iz][iy][ix];
|
|
} else {
|
|
for (int iz = nzlo_in; iz <= nzhi_in; iz++)
|
|
for (int iy = nylo_in; iy <= nyhi_in; iy++)
|
|
for (int ix = nxlo_in; ix <= nxhi_in; ix++) {
|
|
sum += cgrid_brick[iz][iy][ix][0];
|
|
sum += cgrid_brick[iz][iy][ix][1];
|
|
}
|
|
}
|
|
}
|
|
|
|
if (array == FFT) {
|
|
if (flag3d) {
|
|
for (int i = 0; i < nfft_owned; i++)
|
|
sum += grid_fft[i];
|
|
} else {
|
|
for (int i = 0; i < 2*nfft_owned; i++)
|
|
sum += cfft[i];
|
|
}
|
|
}
|
|
|
|
if (array == CFFT1) {
|
|
for (int i = 0; i < 2*nfft_owned; i++)
|
|
sum += cfft[i];
|
|
}
|
|
|
|
if (array == CFFT2) {
|
|
for (int i = 0; i < 2*nbrick_owned; i++)
|
|
sum += cfft[i];
|
|
}
|
|
|
|
/*
|
|
double sumall;
|
|
MPI_Allreduce(&sum,&sumall,1,MPI_DOUBLE,MPI_SUM,world);
|
|
if (comm->me == 0) printf("%s: %s: %12.8g\n",labels[which],label,sumall);
|
|
*/
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
dump grid values to a file
|
|
array = which array is being output
|
|
---------------------------------------------------------------------- */
|
|
|
|
void AmoebaConvolution::debug_file(int array, const char *label)
|
|
{
|
|
FILE *fp;
|
|
|
|
int me = comm->me;
|
|
int nprocs = comm->nprocs;
|
|
|
|
// open file
|
|
|
|
if (me == 0) fp = fopen(fmt::format("tmp.{}.{}", labels[which], label).c_str(), "w");
|
|
|
|
// file header
|
|
// ncol = # of columns, including grid cell ID
|
|
|
|
bigint ntot = nx * ny * nz;
|
|
|
|
int ncol;
|
|
char *columns;
|
|
|
|
if (array == CFFT1 || array == CFFT2 || !flag3d) {
|
|
ncol = 3;
|
|
columns = (char *) "id real imag";
|
|
} else {
|
|
ncol = 2;
|
|
columns = (char *) "id value";
|
|
}
|
|
|
|
char boundstr[9]; // encoding of boundary flags
|
|
domain->boundary_string(boundstr);
|
|
|
|
if (me == 0) {
|
|
fprintf(fp,"ITEM: TIMESTEP\n");
|
|
fprintf(fp,BIGINT_FORMAT "\n",update->ntimestep);
|
|
fprintf(fp,"ITEM: NUMBER OF ATOMS\n");
|
|
fprintf(fp,BIGINT_FORMAT "\n",ntot);
|
|
fprintf(fp,"ITEM: BOX BOUNDS %s\n",boundstr);
|
|
fprintf(fp,"%-1.16e %-1.16e\n",domain->boxlo[0],domain->boxhi[0]);
|
|
fprintf(fp,"%-1.16e %-1.16e\n",domain->boxlo[1],domain->boxhi[1]);
|
|
fprintf(fp,"%-1.16e %-1.16e\n",domain->boxlo[2],domain->boxhi[2]);
|
|
fprintf(fp,"ITEM: ATOMS %s\n",columns);
|
|
}
|
|
|
|
// pack my values
|
|
// ngrid = # of grid cells I own
|
|
|
|
int ngrid;
|
|
if (array == GRIDBRICK_IN) ngrid = nbrick_owned;
|
|
else if (array == FFT) ngrid = nfft_owned;
|
|
else if (array == CFFT1) ngrid = nfft_owned;
|
|
else if (array == CFFT2) ngrid = nbrick_owned;
|
|
|
|
int ngridmax;
|
|
MPI_Allreduce(&ngrid,&ngridmax,1,MPI_INT,MPI_MAX,world);
|
|
|
|
double *buf,*buf2;
|
|
memory->create(buf,ncol*ngridmax,"amoeba:buf");
|
|
memory->create(buf2,ncol*ngridmax,"amoeba:buf2");
|
|
|
|
ngrid = 0;
|
|
|
|
if (array == GRIDBRICK_IN) {
|
|
if (flag3d) {
|
|
for (int iz = nzlo_in; iz <= nzhi_in; iz++)
|
|
for (int iy = nylo_in; iy <= nyhi_in; iy++)
|
|
for (int ix = nxlo_in; ix <= nxhi_in; ix++) {
|
|
int id = iz*ny*nx + iy*nx + ix + 1;
|
|
buf[ncol*ngrid] = id;
|
|
buf[ncol*ngrid+1] = grid_brick[iz][iy][ix];
|
|
ngrid++;
|
|
}
|
|
} else {
|
|
for (int iz = nzlo_in; iz <= nzhi_in; iz++)
|
|
for (int iy = nylo_in; iy <= nyhi_in; iy++)
|
|
for (int ix = nxlo_in; ix <= nxhi_in; ix++) {
|
|
int id = iz*ny*nx + iy*nx + ix + 1;
|
|
buf[ncol*ngrid] = id;
|
|
buf[ncol*ngrid+1] = cgrid_brick[iz][iy][ix][0];
|
|
buf[ncol*ngrid+2] = cgrid_brick[iz][iy][ix][1];
|
|
ngrid++;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (array == FFT) {
|
|
if (flag3d) {
|
|
int m = 0;
|
|
for (int iz = nzlo_fft; iz <= nzhi_fft; iz++)
|
|
for (int iy = nylo_fft; iy <= nyhi_fft; iy++)
|
|
for (int ix = nxlo_fft; ix <= nxhi_fft; ix++) {
|
|
int id = iz*ny*nx + iy*nx + ix + 1;
|
|
buf[ncol*ngrid] = id;
|
|
buf[ncol*ngrid+1] = grid_fft[m++];
|
|
ngrid++;
|
|
}
|
|
} else {
|
|
int m = 0;
|
|
for (int iz = nzlo_fft; iz <= nzhi_fft; iz++)
|
|
for (int iy = nylo_fft; iy <= nyhi_fft; iy++)
|
|
for (int ix = nxlo_fft; ix <= nxhi_fft; ix++) {
|
|
int id = iz*ny*nx + iy*nx + ix + 1;
|
|
buf[ncol*ngrid] = id;
|
|
buf[ncol*ngrid+1] = cfft[m++];
|
|
buf[ncol*ngrid+2] = cfft[m++];
|
|
ngrid++;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (array == CFFT1) {
|
|
int m = 0;
|
|
for (int iz = nzlo_fft; iz <= nzhi_fft; iz++)
|
|
for (int iy = nylo_fft; iy <= nyhi_fft; iy++)
|
|
for (int ix = nxlo_fft; ix <= nxhi_fft; ix++) {
|
|
int id = iz*ny*nx + iy*nx + ix + 1;
|
|
buf[ncol*ngrid] = id;
|
|
buf[ncol*ngrid+1] = cfft[m++];
|
|
buf[ncol*ngrid+2] = cfft[m++];
|
|
ngrid++;
|
|
}
|
|
}
|
|
|
|
if (array == CFFT2) {
|
|
int m = 0;
|
|
for (int iz = nzlo_in; iz <= nzhi_in; iz++)
|
|
for (int iy = nylo_in; iy <= nyhi_in; iy++)
|
|
for (int ix = nxlo_in; ix <= nxhi_in; ix++) {
|
|
int id = iz*ny*nx + iy*nx + ix + 1;
|
|
buf[ncol*ngrid] = id;
|
|
buf[ncol*ngrid+1] = cfft[m++];
|
|
buf[ncol*ngrid+2] = cfft[m++];
|
|
ngrid++;
|
|
}
|
|
}
|
|
|
|
// proc 0 outputs values
|
|
// pings other procs, send/recv of their values
|
|
|
|
int tmp,nlines;
|
|
MPI_Request request;
|
|
MPI_Status status;
|
|
|
|
if (me == 0) {
|
|
for (int iproc = 0; iproc < nprocs; iproc++) {
|
|
if (iproc) {
|
|
MPI_Irecv(buf,ngridmax*ncol,MPI_DOUBLE,iproc,0,world,&request);
|
|
MPI_Send(&tmp,0,MPI_INT,me+iproc,0,world);
|
|
MPI_Wait(&request,&status);
|
|
MPI_Get_count(&status,MPI_DOUBLE,&nlines);
|
|
nlines /= ncol;
|
|
} else nlines = ngrid;
|
|
|
|
int n = 0;
|
|
for (int m = 0; m < nlines; m++) {
|
|
if (ncol == 2)
|
|
fprintf(fp,"%d %12.8g\n",(int) buf[n],buf[n+1]);
|
|
else if (ncol == 3)
|
|
fprintf(fp,"%d %12.8g %12.8g\n",(int ) buf[n],buf[n+1],buf[n+2]);
|
|
n += ncol;
|
|
}
|
|
}
|
|
|
|
} else {
|
|
MPI_Recv(&tmp,0,MPI_INT,0,0,world,MPI_STATUS_IGNORE);
|
|
MPI_Rsend(buf,ngrid*ncol,MPI_DOUBLE,0,0,world);
|
|
}
|
|
|
|
// close file
|
|
|
|
if (me == 0) fclose(fp);
|
|
|
|
// clean up
|
|
|
|
memory->destroy(buf);
|
|
memory->destroy(buf2);
|
|
}
|
|
#endif
|