diff --git a/src/KSPACE/msm.cpp b/src/KSPACE/msm.cpp index 00dfb2a583..0705a91e64 100644 --- a/src/KSPACE/msm.cpp +++ b/src/KSPACE/msm.cpp @@ -594,8 +594,24 @@ void MSM::allocate() memory->create2d_offset(phi1d,3,-order,order,"msm:phi1d"); memory->create2d_offset(dphi1d,3,-order,order,"msm:dphi1d"); - // commgrid using all processors for finest grid level + // one Grid3d using all processors for finest grid level + gcall = new Grid3d(lmp,world,nx_msm[0],ny_msm[0],nz_msm[0]); + gcall->set_distance(0.5*neighbor->skin); + gcall->set_stencil_atom(-nlower,nupper); + + gcall->setup_grid(nxlo_in[0],nxhi_in[0],nylo_in[0], + nyhi_in[0],nzlo_in[0],nzhi_in[0], + nxlo_out_all,nxhi_out_all,nylo_out_all, + nyhi_out_all,nzlo_out_all,nzhi_out_all); + + gcall->set_larger_grid(nxlo_out[0],nxhi_out[0],nylo_out[0], + nyhi_out[0],nzlo_out[0],nzhi_out[0]); + + // NOTE: or is it out[0] ?? + // NOTE: worry about flag = 1 here, 2 later + + /* gcall = new Grid3d(lmp,world,1,nx_msm[0],ny_msm[0],nz_msm[0], nxlo_in[0],nxhi_in[0],nylo_in[0], nyhi_in[0],nzlo_in[0],nzhi_in[0], @@ -603,7 +619,8 @@ void MSM::allocate() nyhi_out_all,nzlo_out_all,nzhi_out_all, nxlo_out[0],nxhi_out[0],nylo_out[0], nyhi_out[0],nzlo_out[0],nzhi_out[0]); - + */ + gcall->setup_comm(ngcall_buf1,ngcall_buf2); npergrid = 1; memory->destroy(gcall_buf1); @@ -613,7 +630,7 @@ void MSM::allocate() // allocate memory for each grid level - for (int n=0; ndestroy3d_offset(qgrid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]); memory->create3d_offset(qgrid[n],nzlo_out[n],nzhi_out[n], nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:qgrid"); @@ -626,15 +643,29 @@ void MSM::allocate() if (active_flag[n]) { delete gc[n]; - int **procneigh = procneigh_levels[n]; + // NOTE: why is n = 0 same as all for grid size ? + + gc[n] = new Grid3d(lmp,world_levels[n],nx_msm[n],ny_msm[n],nz_msm[n]); + gc[n]->set_stencil_atom(-nlower,nupper); + + gc[n]->setup_grid(nxlo_in[n],nxhi_in[n],nylo_in[n], + nyhi_in[n],nzlo_in[n],nzhi_in[n], + nxlo_out[n],nxhi_out[n],nylo_out[n], + nyhi_out[n],nzlo_out[n],nzhi_out[n]); + + int **procneigh = procneigh_levels[n]; + gc[n]->set_proc_neighs(procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + + /* gc[n] = new Grid3d(lmp,world_levels[n],2,nx_msm[n],ny_msm[n],nz_msm[n], nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n], nzlo_in[n],nzhi_in[n], nxlo_out[n],nxhi_out[n],nylo_out[n],nyhi_out[n], - nzlo_out[n],nzhi_out[n], procneigh[0][0],procneigh[0][1],procneigh[1][0], procneigh[1][1],procneigh[2][0],procneigh[2][1]); + */ gc[n]->setup_comm(ngc_buf1[n],ngc_buf2[n]); npergrid = 1; @@ -642,6 +673,7 @@ void MSM::allocate() memory->destroy(gc_buf2[n]); memory->create(gc_buf1[n],npergrid*ngc_buf1[n],"msm:gc_buf1"); memory->create(gc_buf2[n],npergrid*ngc_buf2[n],"msm:gc_buf2"); + } else { delete gc[n]; memory->destroy(gc_buf1[n]); @@ -1160,9 +1192,10 @@ void MSM::set_grid_local() { // loop over grid levels - for (int n=0; ndestroy3d_offset(qgrid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]); memory->destroy3d_offset(egrid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]); @@ -1170,9 +1203,9 @@ void MSM::set_grid_local() // n xyz lo/hi in[] = lower/upper bounds of global grid this proc owns // indices range from 0 to N-1 inclusive in each dim - comm->partition_grid(nx_msm[n],ny_msm[n],nz_msm[n],0.0, - nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n], - nzlo_in[n],nzhi_in[n]); + //comm->partition_grid(nx_msm[n],ny_msm[n],nz_msm[n],0.0, + // nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n], + // nzlo_in[n],nzhi_in[n]); nlower = -(order-1)/2; nupper = order/2; @@ -1195,11 +1228,9 @@ void MSM::set_grid_local() double yprd = prd[1]; double zprd = prd[2]; - // shift values for particle <-> grid mapping - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of // global MSM grid that my particles can contribute charge to + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 // effectively nlo_in,nhi_in + ghost cells // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest // position a particle in my box can be at @@ -1223,7 +1254,9 @@ void MSM::set_grid_local() nxlo_out_all = nlo + nlower; nxhi_out_all = nhi + nupper; } + // a larger ghost region is needed for the direct sum and for restriction/prolongation + nxlo_out[n] = nlo + MIN(-order,nxlo_direct); nxhi_out[n] = nhi + MAX(order,nxhi_direct); @@ -1246,8 +1279,10 @@ void MSM::set_grid_local() nzlo_out_all = nlo + nlower; nzhi_out_all = nhi + nupper; } - // a hemisphere is used for direct sum interactions, - // so no ghosting is needed for direct sum in the -z direction + + // hemisphere is used for direct sum interactions + // so no ghosting is needed for direct sum in the -z direction + nzlo_out[n] = nlo - order; nzhi_out[n] = nhi + MAX(order,nzhi_direct); @@ -1255,7 +1290,6 @@ void MSM::set_grid_local() // skip reset of lo/hi for procs who do not own any grid cells if (domain->nonperiodic) { - if (!domain->xperiodic && nxlo_in[n] <= nxhi_in[n]) { if (nxlo_in[n] == 0) nxlo_in[n] = alpha[n]; nxlo_out[n] = MAX(nxlo_out[n],alpha[n]); diff --git a/src/KSPACE/pppm.cpp b/src/KSPACE/pppm.cpp index 2ad3b15c72..6c29a817e0 100644 --- a/src/KSPACE/pppm.cpp +++ b/src/KSPACE/pppm.cpp @@ -308,7 +308,7 @@ void PPPM::init() gc = new Grid3d(lmp,world,nx_pppm,ny_pppm,nz_pppm); gc->set_distance(0.5*neighbor->skin + qdist); gc->set_stencil_atom(-nlower,nupper); - gc->set_shift_atom(shiftatom); + gc->set_shift_atom(shiftatom_lo,shiftatom_hi); gc->set_zfactor(slab_volfactor); gc->setup_grid(nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, @@ -336,6 +336,18 @@ void PPPM::init() double estimated_accuracy = final_accuracy(); + // allocate K-space dependent memory + // don't invoke allocate peratom() or group(), will be allocated when needed + + allocate(); + + // pre-compute Green's function denomiator expansion + // pre-compute 1d charge distribution coefficients + + compute_gf_denom(); + if (differentiation_flag == 1) compute_sf_precoeff(); + compute_rho_coeff(); + // print stats int ngrid_max,nfft_both_max; @@ -355,18 +367,6 @@ void PPPM::init() ngrid_max,nfft_both_max); utils::logmesg(lmp,mesg); } - - // allocate K-space dependent memory - // don't invoke allocate peratom() or group(), will be allocated when needed - - allocate(); - - // pre-compute Green's function denomiator expansion - // pre-compute 1d charge distribution coefficients - - compute_gf_denom(); - if (differentiation_flag == 1) compute_sf_precoeff(); - compute_rho_coeff(); } /* ---------------------------------------------------------------------- @@ -751,7 +751,7 @@ void PPPM::allocate() gc = new Grid3d(lmp,world,nx_pppm,ny_pppm,nz_pppm); gc->set_distance(0.5*neighbor->skin + qdist); gc->set_stencil_atom(-nlower,nupper); - gc->set_shift_atom(shiftatom); + gc->set_shift_atom(shiftatom_lo,shiftatom_hi); gc->set_zfactor(slab_volfactor); gc->setup_grid(nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, @@ -765,6 +765,24 @@ void PPPM::allocate() memory->create(gc_buf1,npergrid*ngc_buf1,"pppm:gc_buf1"); memory->create(gc_buf2,npergrid*ngc_buf2,"pppm:gc_buf2"); + // tally local grid sizes + // ngrid = count of owned+ghost grid cells on this proc + // nfft_brick = FFT points in 3d brick-decomposition on this proc + // same as count of owned grid cells + // nfft = FFT points in x-pencil FFT decomposition on this proc + // nfft_both = greater of nfft and nfft_brick + + ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * + (nzhi_out-nzlo_out+1); + + nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) * + (nzhi_in-nzlo_in+1); + + nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) * + (nzhi_fft-nzlo_fft+1); + + nfft_both = MAX(nfft,nfft_brick); + // allocate distributed grid data memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, @@ -1324,77 +1342,46 @@ double PPPM::final_accuracy() /* ---------------------------------------------------------------------- set params which determine which owned and ghost cells this proc owns - Grid3d will use these params to partition grid + Grid3d uses these params to partition grid also partition FFT grid n xyz lo/hi fft = FFT columns that I own (all of x dim, 2d decomp in yz) ------------------------------------------------------------------------- */ void PPPM::set_grid_local() { - // shift values for particle <-> grid mapping + // shift values for particle <-> grid mapping depend on stencil order // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - if (order % 2) shiftatom = 0.5; - else shiftatom = 0.0; - shift = OFFSET + shiftatom; + // used in particle_map() and make_rho() and fieldforce() + + if (order % 2) shift = OFFSET + 0.5; + else shift = OFFSET; if (order % 2) shiftone = 0.0; else shiftone = 0.5; - // nlower,nupper = stencil size for mapping particles to PPPM grid + // nlower/nupper = stencil size for mapping particles to grid nlower = -(order-1)/2; nupper = order/2; + // shiftatom lo/hi are passed to Grid3d to determine ghost cell extents + // shiftatom_lo = min shift on lo side + // shiftatom_hi = max shift on hi side + // for PPPMStagger, stagger value (0.0 or 0.5) also affects this - - - // NOTE: still to do: stagger and zperiod effects - - /* - - // extent of zprd when 2d slab mode is selected - - double zprd_slab = zprd*slab_volfactor; - - // for slab PPPM, assign z grid as if it were not extended - - nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * - nz_pppm/zprd_slab + shift) - OFFSET; - nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * - nz_pppm/zprd_slab + shift) - OFFSET; - nzlo_out = nlo + nlower; - nzhi_out = nhi + nupper; - - - if (stagger_flag) { - nxhi_out++; - nyhi_out++; - nzhi_out++; + if ((order % 2) && !stagger_flag) { + shiftatom_lo = 0.5; + shiftatom_hi = 0.5; + } else if ((order % 2) && stagger_flag) { + shiftatom_lo = 0.5; + shiftatom_hi = 0.5 + 0.5; + } else if ((order % 2 == 0) && !stagger_flag) { + shiftatom_lo = 0.0; + shiftatom_hi = 0.0; + } else if ((order % 2 == 0) && stagger_flag) { + shiftatom_lo = 0.0; + shiftatom_hi = 0.0 + 0.5; } - - // for slab PPPM, change the grid boundary for processors at +z end - // to include the empty volume between periodically repeating slabs - // for slab PPPM, want charge data communicated from -z proc to +z proc, - // but not vice versa, also want field data communicated from +z proc to - // -z proc, but not vice versa - // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells) - // also insure no other procs use ghost cells beyond +z limit - // differnet logic for non-tiled vs tiled decomposition - - if (slabflag == 1) { - if (comm->layout != Comm::LAYOUT_TILED) { - if (comm->myloc[2] == comm->procgrid[2]-1) nzhi_in = nzhi_out = nz_pppm - 1; - } else { - if (comm->mysplit[2][1] == 1.0) nzhi_in = nzhi_out = nz_pppm - 1; - } - nzhi_out = MIN(nzhi_out,nz_pppm-1); - } - - */ - - - // x-pencil decomposition of FFT mesh // global indices range from 0 to N-1 @@ -1421,26 +1408,6 @@ void PPPM::set_grid_local() nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; nzlo_fft = me_z*nz_pppm/npez_fft; nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; - - // nfft = FFT points in x-pencil FFT decomposition on this proc - - nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) * - (nzhi_fft-nzlo_fft+1); - - - - // ngrid = count of PPPM grid pts owned by this proc, including ghosts - - ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * - (nzhi_out-nzlo_out+1); - - // count of FFT grids pts owned by this proc, without ghosts - // nfft_brick = FFT points in 3d brick-decomposition on this proc - // nfft_both = greater of 2 values - - int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) * - (nzhi_in-nzlo_in+1); - nfft_both = MAX(nfft,nfft_brick); } /* ---------------------------------------------------------------------- diff --git a/src/KSPACE/pppm.h b/src/KSPACE/pppm.h index 6166961cba..5d8fa76ec7 100644 --- a/src/KSPACE/pppm.h +++ b/src/KSPACE/pppm.h @@ -48,7 +48,7 @@ class PPPM : public KSpace { double volume; double delxinv, delyinv, delzinv, delvolinv; double h_x, h_y, h_z; - double shift, shiftone, shiftatom; + double shift, shiftone, shiftatom_lo, shiftatom_hi; int peratom_allocate_flag; int nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in; @@ -56,7 +56,7 @@ class PPPM : public KSpace { int nxlo_ghost, nxhi_ghost, nylo_ghost, nyhi_ghost, nzlo_ghost, nzhi_ghost; int nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft; int nlower, nupper; - int ngrid, nfft, nfft_both; + int ngrid, nfft_brick, nfft, nfft_both; FFT_SCALAR ***density_brick; FFT_SCALAR ***vdx_brick, ***vdy_brick, ***vdz_brick; diff --git a/src/KSPACE/pppm_dipole.cpp b/src/KSPACE/pppm_dipole.cpp index a2abc98961..04c358841f 100644 --- a/src/KSPACE/pppm_dipole.cpp +++ b/src/KSPACE/pppm_dipole.cpp @@ -205,7 +205,7 @@ void PPPMDipole::init() gc_dipole = new Grid3d(lmp,world,nx_pppm,ny_pppm,nz_pppm); gc_dipole->set_distance(0.5*neighbor->skin + qdist); gc_dipole->set_stencil_atom(-nlower,nupper); - gc_dipole->set_shift_atom(shiftatom); + gc_dipole->set_shift_atom(shiftatom_lo,shiftatom_hi); gc_dipole->set_zfactor(slab_volfactor); gc_dipole->setup_grid(nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, @@ -541,7 +541,7 @@ void PPPMDipole::allocate() gc_dipole = new Grid3d(lmp,world,nx_pppm,ny_pppm,nz_pppm); gc_dipole->set_distance(0.5*neighbor->skin + qdist); gc_dipole->set_stencil_atom(-nlower,nupper); - gc_dipole->set_shift_atom(shiftatom); + gc_dipole->set_shift_atom(shiftatom_lo,shiftatom_hi); gc_dipole->set_zfactor(slab_volfactor); gc_dipole->setup_grid(nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, diff --git a/src/KSPACE/pppm_dipole_spin.cpp b/src/KSPACE/pppm_dipole_spin.cpp index 4352ac03ae..912acdec15 100644 --- a/src/KSPACE/pppm_dipole_spin.cpp +++ b/src/KSPACE/pppm_dipole_spin.cpp @@ -190,7 +190,7 @@ void PPPMDipoleSpin::init() gc_dipole = new Grid3d(lmp,world,nx_pppm,ny_pppm,nz_pppm); gc_dipole->set_distance(0.5*neighbor->skin + qdist); gc_dipole->set_stencil_atom(-nlower,nupper); - gc_dipole->set_shift_atom(shiftatom); + gc_dipole->set_shift_atom(shiftatom_lo,shiftatom_hi); gc_dipole->set_zfactor(slab_volfactor); gc_dipole->setup_grid(nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, diff --git a/src/KSPACE/pppm_disp.cpp b/src/KSPACE/pppm_disp.cpp index 336e7ca1c1..61b50f5364 100644 --- a/src/KSPACE/pppm_disp.cpp +++ b/src/KSPACE/pppm_disp.cpp @@ -413,7 +413,7 @@ void PPPMDisp::init() int iteration = 0; if (function[0]) { - Grid3d *gctmp = nullptr; + gc = nullptr; while (order >= minorder) { if (iteration && me == 0) @@ -423,41 +423,40 @@ void PPPMDisp::init() // set grid for dispersion interaction and coulomb interactions - set_grid(); + set_grid_global(); if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) - error->all(FLERR,"PPPMDisp Coulomb grid is too large"); + error->all(FLERR,"PPPMDisp Coulomb grid is too large"); - set_fft_parameters(nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nylo_fft,nzlo_fft, - nxhi_fft,nyhi_fft,nzhi_fft, - nxlo_in,nylo_in,nzlo_in, - nxhi_in,nyhi_in,nzhi_in, - nxlo_out,nylo_out,nzlo_out, - nxhi_out,nyhi_out,nzhi_out, - nlower,nupper, - ngrid,nfft,nfft_both, - shift,shiftone,order); + set_grid_local(order,nx_pppm,ny_pppm,nz_pppm, + shift,shiftone,shiftatom,nlower,nupper, + nxlo_fft,nylo_fft,nzlo_fft, + nxhi_fft,nyhi_fft,nzhi_fft); if (overlap_allowed) break; - gctmp = new Grid3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out); + gc = new Grid3d(lmp,world,nx_pppm,ny_pppm,nz_pppm); + gc->set_distance(0.5*neighbor->skin + qdist); + gc->set_stencil_atom(-nlower,nupper); + gc->set_shift_atom(shiftatom,shiftatom); + gc->set_zfactor(slab_volfactor); + + gc->setup_grid(nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out); int tmp1,tmp2; - gctmp->setup_comm(tmp1,tmp2); - if (gctmp->ghost_adjacent()) break; - delete gctmp; + gc->setup_comm(tmp1,tmp2); + if (gc->ghost_adjacent()) break; + delete gc; order--; } if (order < minorder) error->all(FLERR,"Coulomb PPPMDisp order has been reduced below minorder"); - if (!overlap_allowed && !gctmp->ghost_adjacent()) + if (!overlap_allowed && !gc->ghost_adjacent()) error->all(FLERR,"PPPMDisp grid stencil extends beyond nearest neighbor processor"); - if (gctmp) delete gctmp; + if (gc) delete gc; // adjust g_ewald @@ -493,7 +492,7 @@ void PPPMDisp::init() iteration = 0; if (function[1] + function[2] + function[3]) { - Grid3d *gctmp = nullptr; + gc6 = nullptr; while (order_6 >= minorder) { if (iteration && me == 0) @@ -501,34 +500,31 @@ void PPPMDisp::init() "b/c stencil extends beyond neighbor processor"); iteration++; - set_grid_6(); + set_grid_global_6(); if (nx_pppm_6 >= OFFSET || ny_pppm_6 >= OFFSET || nz_pppm_6 >= OFFSET) - error->all(FLERR,"PPPMDisp Dispersion grid is too large"); + error->all(FLERR,"PPPMDisp Dispersion grid is too large"); - set_fft_parameters(nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nylo_fft_6,nzlo_fft_6, - nxhi_fft_6,nyhi_fft_6,nzhi_fft_6, - nxlo_in_6,nylo_in_6,nzlo_in_6, - nxhi_in_6,nyhi_in_6,nzhi_in_6, - nxlo_out_6,nylo_out_6,nzlo_out_6, - nxhi_out_6,nyhi_out_6,nzhi_out_6, - nlower_6,nupper_6, - ngrid_6,nfft_6,nfft_both_6, - shift_6,shiftone_6,order_6); + set_grid_local(order_6,nx_pppm_6,ny_pppm_6,nz_pppm_6, + shift_6,shiftone_6,shiftatom_6,nlower_6,nupper_6, + nxlo_fft_6,nylo_fft_6,nzlo_fft_6, + nxhi_fft_6,nyhi_fft_6,nzhi_fft_6); if (overlap_allowed) break; - gctmp = new Grid3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6, - nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6, - nzlo_out_6,nzhi_out_6); + gc6 = new Grid3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6); + gc6->set_distance(0.5*neighbor->skin + qdist); + gc6->set_stencil_atom(-nlower_6,nupper_6); + gc6->set_shift_atom(shiftatom_6,shiftatom_6); + gc6->set_zfactor(slab_volfactor); + + gc->setup_grid(nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6); int tmp1,tmp2; - gctmp->setup_comm(tmp1,tmp2); - if (gctmp->ghost_adjacent()) break; - delete gctmp; + gc6->setup_comm(tmp1,tmp2); + if (gc6->ghost_adjacent()) break; + delete gc6; order_6--; } @@ -536,14 +532,13 @@ void PPPMDisp::init() if (order_6 < minorder) error->all(FLERR,"Dispersion PPPMDisp order has been " "reduced below minorder"); - if (!overlap_allowed && !gctmp->ghost_adjacent()) + if (!overlap_allowed && !gc6->ghost_adjacent()) error->all(FLERR,"Dispersion PPPMDisp grid stencil extends beyond nearest neighbor proc"); - if (gctmp) delete gctmp; + if (gc6) delete gc6; // adjust g_ewald_6 - if (!gewaldflag_6 && accuracy_kspace_6 == accuracy_real_6) - adjust_gewald_6(); + if (!gewaldflag_6 && accuracy_kspace_6 == accuracy_real_6) adjust_gewald_6(); // calculate the final accuracy @@ -804,28 +799,16 @@ void PPPMDisp::reset_grid() // reset portion of global grid that each proc owns if (function[0]) - set_fft_parameters(nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nylo_fft,nzlo_fft, - nxhi_fft,nyhi_fft,nzhi_fft, - nxlo_in,nylo_in,nzlo_in, - nxhi_in,nyhi_in,nzhi_in, - nxlo_out,nylo_out,nzlo_out, - nxhi_out,nyhi_out,nzhi_out, - nlower,nupper, - ngrid,nfft,nfft_both, - shift,shiftone,order); + set_grid_local(order,nx_pppm,ny_pppm,nz_pppm, + shift,shiftone,shiftatom,nlower,nupper, + nxlo_fft,nylo_fft,nzlo_fft, + nxhi_fft,nyhi_fft,nzhi_fft); if (function[1] + function[2] + function[3]) - set_fft_parameters(nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nylo_fft_6,nzlo_fft_6, - nxhi_fft_6,nyhi_fft_6,nzhi_fft_6, - nxlo_in_6,nylo_in_6,nzlo_in_6, - nxhi_in_6,nyhi_in_6,nzhi_in_6, - nxlo_out_6,nylo_out_6,nzlo_out_6, - nxhi_out_6,nyhi_out_6,nzhi_out_6, - nlower_6,nupper_6, - ngrid_6,nfft_6,nfft_both_6, - shift_6,shiftone_6,order_6); + set_grid_local(order_6,nx_pppm_6,ny_pppm_6,nz_pppm_6, + shift_6,shiftone_6,shiftatom_6,nlower_6,nupper_6, + nxlo_fft_6,nylo_fft_6,nzlo_fft_6, + nxhi_fft_6,nyhi_fft_6,nzhi_fft_6); // reallocate K-space dependent memory // check if grid communication is now overlapping if not allowed @@ -1685,7 +1668,53 @@ int PPPMDisp::check_convergence(double** A, double** Q, double** A0, void _noopt PPPMDisp::allocate() { + // -------------------------------------- + // Coulomb grids + // -------------------------------------- + if (function[0]) { + + // create ghost grid object for rho and electric field communication + // returns local owned and ghost grid bounds + // setup communication patterns and buffers + + gc = new Grid3d(lmp,world,nx_pppm,ny_pppm,nz_pppm); + gc->set_distance(0.5*neighbor->skin + qdist); + gc->set_stencil_atom(-nlower,nupper); + gc->set_shift_atom(shiftatom,shiftatom); + gc->set_zfactor(slab_volfactor); + + gc->setup_grid(nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out); + + gc->setup_comm(ngc_buf1,ngc_buf2); + + if (differentiation_flag) npergrid = 1; + else npergrid = 3; + + memory->create(gc_buf1,npergrid*ngc_buf1,"pppm/disp:gc_buf1"); + memory->create(gc_buf2,npergrid*ngc_buf2,"pppm/disp:gc_buf2"); + + // tally local grid sizes + // ngrid = count of owned+ghost grid cells on this proc + // nfft_brick = FFT points in 3d brick-decomposition on this proc + // same as count of owned grid cells + // nfft = FFT points in x-pencil FFT decomposition on this proc + // nfft_both = greater of nfft and nfft_brick + + ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * + (nzhi_out-nzlo_out+1); + + nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) * + (nzhi_in-nzlo_in+1); + + nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) * + (nzhi_fft-nzlo_fft+1); + + nfft_both = MAX(nfft,nfft_brick); + + // allocate distributed grid data + memory->create(work1,2*nfft_both,"pppm/disp:work1"); memory->create(work2,2*nfft_both,"pppm/disp:work2"); @@ -1710,6 +1739,8 @@ void _noopt PPPMDisp::allocate() memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, nxlo_out,nxhi_out,"pppm/disp:density_brick"); + memory->create(density_fft,nfft_both,"pppm/disp:density_fft"); + if (differentiation_flag == 1) { memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, nxlo_out,nxhi_out,"pppm/disp:u_brick"); @@ -1728,7 +1759,11 @@ void _noopt PPPMDisp::allocate() memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, nxlo_out,nxhi_out,"pppm/disp:vdz_brick"); } - memory->create(density_fft,nfft_both,"pppm/disp:density_fft"); + + // create 2 FFTs and a Remap + // 1st FFT keeps data in FFT decomposition + // 2nd FFT returns data in 3d brick decomposition + // remap takes data from 3d brick to FFT decomposition int tmp; @@ -1746,23 +1781,91 @@ void _noopt PPPMDisp::allocate() nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, 1,0,0,FFT_PRECISION,collective_flag); - - // create ghost grid object for rho and electric field communication - // also create 2 bufs for ghost grid cell comm, passed to Grid3d methods - - gc = new Grid3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out); - - gc->setup_comm(ngc_buf1,ngc_buf2); - - if (differentiation_flag) npergrid = 1; - else npergrid = 3; - - memory->create(gc_buf1,npergrid*ngc_buf1,"pppm:gc_buf1"); - memory->create(gc_buf2,npergrid*ngc_buf2,"pppm:gc_buf2"); } + // -------------------------------------- + // allocations common to all dispersion options + // -------------------------------------- + + if (function[1] + function[2] + function[3]) { + + // create ghost grid object for dispersion communication + // returns local owned and ghost grid bounds + // setup communication patterns and buffers + + gc6 = new Grid3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6); + gc6->set_distance(0.5*neighbor->skin + qdist); + gc6->set_stencil_atom(-nlower_6,nupper_6); + gc6->set_shift_atom(shiftatom_6,shiftatom_6); + gc6->set_zfactor(slab_volfactor); + + gc6->setup_grid(nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6); + + gc6->setup_comm(ngc6_buf1,ngc6_buf2); + + if (function[1]) { + if (differentiation_flag) npergrid6 = 1; + else npergrid6 = 3; + } else if (function[2]) { + if (differentiation_flag) npergrid6 = 7; + else npergrid6 = 21; + } else if (function[3]) { + if (differentiation_flag) npergrid6 = 1*nsplit_alloc; + else npergrid6 = 3*nsplit_alloc; + } + + memory->create(gc6_buf1,npergrid6*ngc6_buf1,"pppm:gc_buf1"); + memory->create(gc6_buf2,npergrid6*ngc6_buf2,"pppm:gc_buf2"); + + // tally local grid sizes + // ngrid = count of owned+ghost grid cells on this proc + // nfft_brick = FFT points in 3d brick-decomposition on this proc + // same as count of owned grid cells + // nfft = FFT points in x-pencil FFT decomposition on this proc + // nfft_both = greater of nfft and nfft_brick + + ngrid_6 = (nxhi_out_6-nxlo_out_6+1) * (nyhi_out_6-nylo_out_6+1) * + (nzhi_out_6-nzlo_out_6+1); + + nfft_brick_6 = (nxhi_in_6-nxlo_in_6+1) * (nyhi_in_6-nylo_in_6+1) * + (nzhi_in_6-nzlo_in_6+1); + + nfft_6 = (nxhi_fft_6-nxlo_fft_6+1) * (nyhi_fft_6-nylo_fft_6+1) * + (nzhi_fft_6-nzlo_fft_6+1); + + nfft_both_6 = MAX(nfft_6,nfft_brick_6); + + // create 2 FFTs and a Remap + // 1st FFT keeps data in FFT decomposition + // 2nd FFT returns data in 3d brick decomposition + // remap takes data from 3d brick to FFT decomposition + + int tmp; + + fft1_6 = + new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + 0,0,&tmp,collective_flag); + + fft2_6 = + new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + 0,0,&tmp,collective_flag); + + remap_6 = + new Remap(lmp,world, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + 1,0,0,FFT_PRECISION,collective_flag); + } + + // -------------------------------------- + // dispersion grids with geometric mixing + // -------------------------------------- + if (function[1]) { memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6"); memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6"); @@ -1789,6 +1892,8 @@ void _noopt PPPMDisp::allocate() memory->create3d_offset(density_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_g"); + memory->create(density_fft_g,nfft_both_6,"pppm/disp:density_fft_g"); + if (differentiation_flag == 1) { memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g"); @@ -1808,45 +1913,12 @@ void _noopt PPPMDisp::allocate() memory->create3d_offset(vdz_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_g"); } - memory->create(density_fft_g,nfft_both_6,"pppm/disp:density_fft_g"); - - int tmp; - - fft1_6 = - new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - 0,0,&tmp,collective_flag); - - fft2_6 = - new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - 0,0,&tmp,collective_flag); - - remap_6 = - new Remap(lmp,world, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - 1,0,0,FFT_PRECISION,collective_flag); - - // create ghost grid object for rho and electric field communication - // also create 2 bufs for ghost grid cell comm, passed to Grid3d methods - - gc6 = - new Grid3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6); - - gc6->setup_comm(ngc6_buf1,ngc6_buf2); - - if (differentiation_flag) npergrid6 = 1; - else npergrid6 = 3; - - memory->create(gc6_buf1,npergrid6*ngc6_buf1,"pppm:gc_buf1"); - memory->create(gc6_buf2,npergrid6*ngc6_buf2,"pppm:gc_buf2"); } + // -------------------------------------- + // dispersion grids with arithmetic mixing + // -------------------------------------- + if (function[2]) { memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6"); memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6"); @@ -1975,41 +2047,12 @@ void _noopt PPPMDisp::allocate() memory->create3d_offset(vdz_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a6"); } - - int tmp; - - fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - 0,0,&tmp,collective_flag); - - fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - 0,0,&tmp,collective_flag); - - remap_6 = new Remap(lmp,world, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - 1,0,0,FFT_PRECISION,collective_flag); - - // create ghost grid object for rho and electric field communication - // also create 2 bufs for ghost grid cell comm, passed to Grid3d methods - - gc6 = - new Grid3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6); - - gc6->setup_comm(ngc6_buf1,ngc6_buf2); - - if (differentiation_flag) npergrid6 = 7; - else npergrid6 = 21; - - memory->create(gc6_buf1,npergrid6*ngc6_buf1,"pppm:gc_buf1"); - memory->create(gc6_buf2,npergrid6*ngc6_buf2,"pppm:gc_buf2"); } + // -------------------------------------- + // dispersion grids with no mixing + // -------------------------------------- + if (function[3]) { memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6"); memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6"); @@ -2037,6 +2080,9 @@ void _noopt PPPMDisp::allocate() memory->create4d_offset(density_brick_none,nsplit_alloc, nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_none"); + memory->create(density_fft_none,nsplit_alloc,nfft_both_6, + "pppm/disp:density_fft_none"); + if (differentiation_flag == 1) { memory->create4d_offset(u_brick_none,nsplit_alloc, nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, @@ -2060,41 +2106,6 @@ void _noopt PPPMDisp::allocate() nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_none"); } - memory->create(density_fft_none,nsplit_alloc,nfft_both_6, - "pppm/disp:density_fft_none"); - - int tmp; - - fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - 0,0,&tmp,collective_flag); - - fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - 0,0,&tmp,collective_flag); - - remap_6 = new Remap(lmp,world, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - 1,0,0,FFT_PRECISION,collective_flag); - - // create ghost grid object for rho and electric field communication - // also create 2 bufs for ghost grid cell comm, passed to Grid3d methods - - gc6 = - new Grid3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6); - - gc6->setup_comm(ngc6_buf1,ngc6_buf2); - - if (differentiation_flag) npergrid6 = 1*nsplit_alloc; - else npergrid6 = 3*nsplit_alloc; - - memory->create(gc6_buf1,npergrid6*ngc6_buf1,"pppm:gc6_buf1"); - memory->create(gc6_buf2,npergrid6*ngc6_buf2,"pppm:gc6_buf2"); } } @@ -2107,6 +2118,10 @@ void PPPMDisp::allocate_peratom() { peratom_allocate_flag = 1; + // -------------------------------------- + // Coulomb grids + // -------------------------------------- + if (function[0]) { if (differentiation_flag != 1) memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, @@ -2136,6 +2151,10 @@ void PPPMDisp::allocate_peratom() memory->create(gc_buf2,npergrid*ngc_buf2,"pppm:gc_buf2"); } + // -------------------------------------- + // dispersion grids with geometric mixing + // -------------------------------------- + if (function[1]) { if (differentiation_flag != 1 ) memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, @@ -2165,6 +2184,10 @@ void PPPMDisp::allocate_peratom() memory->create(gc6_buf2,npergrid6*ngc6_buf2,"pppm:gc6_buf2"); } + // -------------------------------------- + // dispersion grids with arithmetic mixing + // -------------------------------------- + if (function[2]) { if (differentiation_flag != 1) { memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, @@ -2286,6 +2309,10 @@ void PPPMDisp::allocate_peratom() memory->create(gc6_buf2,npergrid6*ngc6_buf2,"pppm:gc6_buf2"); } + // -------------------------------------- + // dispersion grids with no mixing + // -------------------------------------- + if (function[3]) { if (differentiation_flag != 1) memory->create4d_offset(u_brick_none,nsplit_alloc, @@ -2602,11 +2629,10 @@ void PPPMDisp::deallocate_peratom() } /* ---------------------------------------------------------------------- - set size of FFT grid (nx,ny,nz_pppm) and g_ewald - for Coulomb interactions + set global grid and g_ewald for Coulomb interactions ------------------------------------------------------------------------- */ -void PPPMDisp::set_grid() +void PPPMDisp::set_grid_global() { double q2 = qsqsum * force->qqrd2e; @@ -2678,152 +2704,61 @@ void PPPMDisp::set_grid() } /* ---------------------------------------------------------------------- - set the FFT parameters + set params which determine which owned and ghost cells this proc owns + for Coulomb or dipsersion interactions + Grid3d uses these params to partition grids + also partition FFT grids + n xyz lo/hi fft = FFT columns that I own (all of x dim, 2d decomp in yz) ------------------------------------------------------------------------- */ -void PPPMDisp::set_fft_parameters(int& nx_p, int& ny_p, int& nz_p, - int& nxlo_f, int& nylo_f, int& nzlo_f, - int& nxhi_f, int& nyhi_f, int& nzhi_f, - int& nxlo_i, int& nylo_i, int& nzlo_i, - int& nxhi_i, int& nyhi_i, int& nzhi_i, - int& nxlo_o, int& nylo_o, int& nzlo_o, - int& nxhi_o, int& nyhi_o, int& nzhi_o, - int& nlow, int& nupp, - int& ng, int& nf, int& nfb, - double& sft, double& sftone, int& ord) +void PPPMDisp::set_grid_local(int order_either, + int nx_either, int ny_either, int nz_either, + double &shift_either, double &shiftone_either, + double &shiftatom_either, + int &nlower_either, int &nupper_either, + int &nxlo_fft_either, int &nylo_fft_either, int &nzlo_fft_either, + int &nxhi_fft_either, int &nyhi_fft_either, int &nzhi_fft_either) { - // partition global grid across procs - // n xyz lo/hi i = lower/upper bounds of global grid this proc owns - // indices range from 0 to N-1 inclusive in each dim - - comm->partition_grid(nx_p,ny_p,nz_p,slab_volfactor, - nxlo_i,nxhi_i,nylo_i,nyhi_i,nzlo_i,nzhi_i); - - // nlow,nupp = stencil size for mapping particles to PPPM grid - - nlow = -(ord-1)/2; - nupp = ord/2; - - // sft values for particle <-> grid mapping + // shift values for particle <-> grid mapping depend on stencil order // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + // used in particle_map() and make_rho() and fieldforce() + + if (order_either % 2) shift_either = OFFSET + 0.5; + else shift_either = OFFSET; + + if (order_either % 2) shiftone_either = 0.0; + else shiftone_either = 0.5; - if (ord % 2) sft = OFFSET + 0.5; - else sft = OFFSET; - if (ord % 2) sftone = 0.0; - else sftone = 0.5; + // nlower/nupper = stencil size for mapping particles to grid - // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of - // global PPPM grid that my particles can contribute charge to - // effectively nlo_in,nhi_in + ghost cells - // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest - // position a particle in my box can be at - // dist[3] = particle position bound = subbox + skin/2.0 + qdist - // qdist = offset due to TIP4P fictitious charge - // convert to triclinic if necessary - // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping - // for slab PPPM, assign z grid as if it were not extended + nlower_either = -(order_either-1)/2; + nupper_either = order_either/2; - double *prd,*sublo,*subhi; - - if (triclinic == 0) { - prd = domain->prd; - boxlo = domain->boxlo; - sublo = domain->sublo; - subhi = domain->subhi; - } else { - prd = domain->prd_lamda; - boxlo = domain->boxlo_lamda; - sublo = domain->sublo_lamda; - subhi = domain->subhi_lamda; - } - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double dist[3]; - double cuthalf = 0.5*neighbor->skin + qdist; - if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; - else { - dist[0] = cuthalf/domain->prd[0]; - dist[1] = cuthalf/domain->prd[1]; - dist[2] = cuthalf/domain->prd[2]; - } - - int nlo,nhi; - - nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) * - nx_p/xprd + sft) - OFFSET; - nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) * - nx_p/xprd + sft) - OFFSET; - nxlo_o = nlo + nlow; - nxhi_o = nhi + nupp; - - nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) * - ny_p/yprd + sft) - OFFSET; - nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) * - ny_p/yprd + sft) - OFFSET; - nylo_o = nlo + nlow; - nyhi_o = nhi + nupp; - - nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * - nz_p/zprd_slab + sft) - OFFSET; - nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * - nz_p/zprd_slab + sft) - OFFSET; - nzlo_o = nlo + nlow; - nzhi_o = nhi + nupp; - - // for slab PPPM, change the grid boundary for processors at +z end - // to include the empty volume between periodically repeating slabs - // for slab PPPM, want charge data communicated from -z proc to +z proc, - // but not vice versa, also want field data communicated from +z proc to - // -z proc, but not vice versa - // this is accomplished by nzhi_i = nzhi_o on +z end (no ghost cells) - - if (slabflag && (comm->myloc[2] == comm->procgrid[2]-1)) { - nzhi_i = nz_p - 1; - nzhi_o = nz_p - 1; - } - - // decomposition of FFT mesh + // x-pencil decomposition of Coulomb FFT mesh // global indices range from 0 to N-1 - // proc owns entire x-dimension, clump of columns in y,z dimensions + // each proc owns entire x-dimension, clumps of columns in y,z dimensions // npey_fft,npez_fft = # of procs in y,z dims // if nprocs is small enough, proc can own 1 or more entire xy planes, // else proc owns 2d sub-blocks of yz plane // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions // nlo_fft,nhi_fft = lower/upper limit of the section - // of the global FFT mesh that I own + // of the global FFT mesh that I own in x-pencil decomposition int npey_fft,npez_fft; - if (nz_p >= nprocs) { + if (nz_either >= nprocs) { npey_fft = 1; npez_fft = nprocs; - } else procs2grid2d(nprocs,ny_p,nz_p,&npey_fft,&npez_fft); + } else procs2grid2d(nprocs,ny_either,nz_either,&npey_fft,&npez_fft); int me_y = me % npey_fft; int me_z = me / npey_fft; - nxlo_f = 0; - nxhi_f = nx_p - 1; - nylo_f = me_y*ny_p/npey_fft; - nyhi_f = (me_y+1)*ny_p/npey_fft - 1; - nzlo_f = me_z*nz_p/npez_fft; - nzhi_f = (me_z+1)*nz_p/npez_fft - 1; - - // PPPM grid for this proc, including ghosts - - ng = (nxhi_o-nxlo_o+1) * (nyhi_o-nylo_o+1) * (nzhi_o-nzlo_o+1); - - // FFT arrays on this proc, without ghosts - // nf = nfft = FFT points in FFT decomposition on this proc - // nfft_brick = FFT points in 3d brick-decomposition on this proc - // nfb = nfft_both = greater of 2 values - - nf = (nxhi_f-nxlo_f+1) * (nyhi_f-nylo_f+1) * (nzhi_f-nzlo_f+1); - int nfft_brick = (nxhi_i-nxlo_i+1) * (nyhi_i-nylo_i+1) * (nzhi_i-nzlo_i+1); - nfb = MAX(nf,nfft_brick); + nxlo_fft_either = 0; + nxhi_fft_either = nx_either - 1; + nylo_fft_either = me_y*ny_either/npey_fft; + nyhi_fft_either = (me_y+1)*ny_either/npey_fft - 1; + nzlo_fft_either = me_z*nz_either/npez_fft; + nzhi_fft_either = (me_z+1)*nz_either/npez_fft - 1; } /* ---------------------------------------------------------------------- @@ -3365,17 +3300,15 @@ double PPPMDisp::compute_qopt_6_ad() } /* ---------------------------------------------------------------------- - set size of FFT grid and g_ewald_6 - for Dispersion interactions + set global grid and g_ewald_6 for dispersion interactions ------------------------------------------------------------------------- */ -void PPPMDisp::set_grid_6() +void PPPMDisp::set_grid_global_6() { - // calculate csum - if (!csumflag) calc_csum(); if (!gewaldflag_6) set_init_g6(); if (!gridflag_6) set_n_pppm_6(); + while (!factorable(nx_pppm_6)) nx_pppm_6++; while (!factorable(ny_pppm_6)) ny_pppm_6++; while (!factorable(nz_pppm_6)) nz_pppm_6++; diff --git a/src/KSPACE/pppm_disp.h b/src/KSPACE/pppm_disp.h index 5bcf7e01b0..f23382c1e1 100644 --- a/src/KSPACE/pppm_disp.h +++ b/src/KSPACE/pppm_disp.h @@ -64,19 +64,19 @@ class PPPMDisp : public KSpace { double delxinv, delyinv, delzinv, delvolinv; double delxinv_6, delyinv_6, delzinv_6, delvolinv_6; - double shift, shiftone; + double shift, shiftone, shiftatom; int nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in; int nxlo_out, nylo_out, nzlo_out, nxhi_out, nyhi_out, nzhi_out; int nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft; int nlower, nupper; - int ngrid, nfft, nfft_both; + int ngrid, nfft_brick, nfft, nfft_both; - double shift_6, shiftone_6; + double shift_6, shiftone_6, shiftatom_6; int nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6; int nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6; int nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6; int nlower_6, nupper_6; - int ngrid_6, nfft_6, nfft_both_6; + int ngrid_6, nfft_brick_6, nfft_6, nfft_both_6; // the following variables are needed for every structure factor @@ -149,7 +149,8 @@ class PPPMDisp : public KSpace { FFT_SCALAR ****v0_brick_none, ****v1_brick_none, ****v2_brick_none, ****v3_brick_none, ****v4_brick_none, ****v5_brick_none; - //// needed for each interaction type + // needed for each interaction type + double *greensfn; double **vg; double **vg2; @@ -190,7 +191,9 @@ class PPPMDisp : public KSpace { int triclinic; // domain settings, orthog or triclinic double *boxlo; + // TIP4P settings + int typeH, typeO; // atom types of TIP4P water H and O atoms double qdist; // distance from O site to negative charge double alpha; // geometric factor @@ -202,15 +205,16 @@ class PPPMDisp : public KSpace { void mmult(double **, double **, double **, int); int check_convergence(double **, double **, double **, double **, double **, double **, int); - void set_grid(); - void set_grid_6(); + void set_grid_global(); + void set_grid_global_6(); + void set_grid_local(int, int, int, int, double &, double &, double &, + int &, int &, int &, int &, int &, int &, int &, int &); void set_init_g6(); - void set_fft_parameters(int &, int &, int &, int &, int &, int &, int &, int &, int &, int &, - int &, int &, int &, int &, int &, int &, int &, int &, int &, int &, - int &, int &, int &, int &, int &, int &, double &, double &, int &); void set_n_pppm_6(); + void adjust_gewald(); void adjust_gewald_6(); + double f(); double derivf(); double f_6(); @@ -218,6 +222,7 @@ class PPPMDisp : public KSpace { double final_accuracy(); void final_accuracy_6(double &, double &, double &); double lj_rspace_error(); + double compute_qopt(); double compute_qopt_6(); double compute_qopt_ik(); @@ -231,6 +236,7 @@ class PPPMDisp : public KSpace { virtual void allocate_peratom(); virtual void deallocate(); virtual void deallocate_peratom(); + int factorable(int); double rms(double, double, bigint, double, double **); double diffpr(double, double, double, double, double **); diff --git a/src/KSPACE/pppm_stagger.cpp b/src/KSPACE/pppm_stagger.cpp index b12f50f561..fb9b5ee4c1 100644 --- a/src/KSPACE/pppm_stagger.cpp +++ b/src/KSPACE/pppm_stagger.cpp @@ -145,7 +145,7 @@ void PPPMStagger::compute(int eflag, int vflag) nstagger = 2; stagger = 0.0; - for (int n=0; n 1.0 if the grid extends beyond the +z boundary by this factor - used by 2d slab-mode PPPM - this effectively maps proc sub-grids to a smaller subset of the grid - nxyz lo/hi = inclusive lo/hi bounds of global grid sub-brick I own - if proc owns no grid cells in a dim, then nlo > nhi - special case: 2 procs share boundary which a grid point is exactly on - 2 equality if tests insure a consistent decision as to which proc owns it -------------------------------------------------------------------------- */ - -void Comm::partition_grid(int nx, int ny, int nz, double zfactor, - int &nxlo, int &nxhi, int &nylo, int &nyhi, - int &nzlo, int &nzhi) -{ - double xfraclo,xfrachi,yfraclo,yfrachi,zfraclo,zfrachi; - - if (layout != LAYOUT_TILED) { - xfraclo = xsplit[myloc[0]]; - xfrachi = xsplit[myloc[0]+1]; - yfraclo = ysplit[myloc[1]]; - yfrachi = ysplit[myloc[1]+1]; - zfraclo = zsplit[myloc[2]]; - zfrachi = zsplit[myloc[2]+1]; - } else { - xfraclo = mysplit[0][0]; - xfrachi = mysplit[0][1]; - yfraclo = mysplit[1][0]; - yfrachi = mysplit[1][1]; - zfraclo = mysplit[2][0]; - zfrachi = mysplit[2][1]; - } - - nxlo = static_cast (xfraclo * nx); - if (1.0*nxlo != xfraclo*nx) nxlo++; - nxhi = static_cast (xfrachi * nx); - if (1.0*nxhi == xfrachi*nx) nxhi--; - - nylo = static_cast (yfraclo * ny); - if (1.0*nylo != yfraclo*ny) nylo++; - nyhi = static_cast (yfrachi * ny); - if (1.0*nyhi == yfrachi*ny) nyhi--; - - if (zfactor == 0.0) { - nzlo = static_cast (zfraclo * nz); - if (1.0*nzlo != zfraclo*nz) nzlo++; - nzhi = static_cast (zfrachi * nz); - if (1.0*nzhi == zfrachi*nz) nzhi--; - } else { - nzlo = static_cast (zfraclo * nz/zfactor); - if (1.0*nzlo != zfraclo*nz) nzlo++; - nzhi = static_cast (zfrachi * nz/zfactor); - if (1.0*nzhi == zfrachi*nz) nzhi--; - } -} - /* ---------------------------------------------------------------------- communicate inbuf around full ring of processors with messtag nbytes = size of inbuf = n datums * nper bytes diff --git a/src/comm.h b/src/comm.h index 3a422f32f1..9476e2e21e 100644 --- a/src/comm.h +++ b/src/comm.h @@ -106,10 +106,6 @@ class Comm : protected Pointers { virtual void coord2proc_setup() {} virtual int coord2proc(double *, int &, int &, int &); - // partition a global regular grid by proc sub-domains - - void partition_grid(int, int, int, double, int &, int &, int &, int &, int &, int &); - // memory usage virtual double memory_usage() = 0; diff --git a/src/grid2d.cpp b/src/grid2d.cpp index c2a4d3c75a..6a9385d30b 100644 --- a/src/grid2d.cpp +++ b/src/grid2d.cpp @@ -530,8 +530,8 @@ void Grid2d::ghost_grid() lo = static_cast((sublo[1]-dist[1]-boxlo[1]) * dyinv + shift_atom + OFFSET) - OFFSET; hi = static_cast((subhi[1]+dist[1]-boxlo[1]) * dyinv + shift_atom + OFFSET) - OFFSET; - outxlo = MIN(lo - stencil_atom_lo, inylo - stencil_grid_lo); - outxhi = MAX(hi + stencil_atom_hi, inyhi + stencil_grid_hi); + outylo = MIN(lo - stencil_atom_lo, inylo - stencil_grid_lo); + outyhi = MAX(hi + stencil_atom_hi, inyhi + stencil_grid_hi); // limit out xyz lo/hi indices to global grid for non-periodic dims @@ -1889,7 +1889,7 @@ int Grid2d::proc_index_uniform(int igrid, int n, int dim, double *split) double fraclo,frachi; // loop over # of procs in this dime - // compute the grid bounds for that proc, same as comm->partition_grid() + // compute the grid bounds for that proc // if igrid falls within those bounds, return m = proc index int m; diff --git a/src/grid3d.cpp b/src/grid3d.cpp index 19dc544b3b..6dd4bd0110 100644 --- a/src/grid3d.cpp +++ b/src/grid3d.cpp @@ -61,119 +61,9 @@ Grid3d::Grid3d(LAMMPS *lmp, MPI_Comm gcomm, int gnx, int gny, int gnz) : maxdist = 0.0; stencil_grid_lo = stencil_grid_hi = 0; stencil_atom_lo = stencil_atom_hi = 0; - shift_grid = shift_atom = 0.0; - zfactor = 1.0; -} - -/* ---------------------------------------------------------------------- - constructor called by PPPM classes - gcomm = world communicator - gnx, gny, gnz = size of global grid - i xyz lohi = portion of global grid this proc owns, 0 <= index < N - o xyz lohi = owned grid portion + ghost grid cells needed in all directions - if o indices are < 0 or hi indices are >= N, - then grid is treated as periodic in that dimension, - communication is done across the periodic boundaries -------------------------------------------------------------------------- */ - -Grid3d::Grid3d(LAMMPS *lmp, MPI_Comm gcomm, - int gnx, int gny, int gnz, - int ixlo, int ixhi, int iylo, int iyhi, int izlo, int izhi, - int oxlo, int oxhi, int oylo, int oyhi, int ozlo, int ozhi) - : Pointers(lmp) -{ - // store commnicator and global grid size - - gridcomm = gcomm; - MPI_Comm_rank(gridcomm,&me); - MPI_Comm_size(gridcomm,&nprocs); - - nx = gnx; - ny = gny; - nz = gnz; - - // store info about Comm decomposition needed for remap operation - // two Grid instances will exist for duration of remap - // each must know Comm decomp at time Grid instance was created - - extract_comm_info(); - - // store grid bounds and proc neighs - - if (layout != Comm::LAYOUT_TILED) { - store(ixlo,ixhi,iylo,iyhi,izlo,izhi, - oxlo,oxhi,oylo,oyhi,ozlo,ozhi, - oxlo,oxhi,oylo,oyhi,ozlo,ozhi, - procxlo,procxhi,procylo,procyhi,proczlo,proczhi); - } else { - store(ixlo,ixhi,iylo,iyhi,izlo,izhi, - oxlo,oxhi,oylo,oyhi,ozlo,ozhi, - oxlo,oxhi,oylo,oyhi,ozlo,ozhi, - 0,0,0,0,0,0); - } -} - -/* ---------------------------------------------------------------------- - constructor called by MSM - gcomm = world communicator or sub-communicator for a hierarchical grid - flag = 1 if e xyz lohi values = larger grid stored by caller in gcomm = world - flag = 2 if e xyz lohi values = 6 neighbor procs in gcomm - gnx, gny, gnz = size of global grid - i xyz lohi = portion of global grid this proc owns, 0 <= index < N - o xyz lohi = owned grid portion + ghost grid cells needed in all directions - e xyz lohi for flag = 1: extent of larger grid stored by caller - e xyz lohi for flag = 2: 6 neighbor procs -------------------------------------------------------------------------- */ - -Grid3d::Grid3d(LAMMPS *lmp, MPI_Comm gcomm, int flag, - int gnx, int gny, int gnz, - int ixlo, int ixhi, int iylo, int iyhi, int izlo, int izhi, - int oxlo, int oxhi, int oylo, int oyhi, int ozlo, int ozhi, - int exlo, int exhi, int eylo, int eyhi, int ezlo, int ezhi) - : Pointers(lmp) -{ - // store commnicator and global grid size - - gridcomm = gcomm; - MPI_Comm_rank(gridcomm,&me); - MPI_Comm_size(gridcomm,&nprocs); - - nx = gnx; - ny = gny; - nz = gnz; - - // store info about Comm decomposition needed for remap operation - // two Grid instances will exist for duration of remap - // each must know Comm decomp at time Grid instance was created - - extract_comm_info(); - - // store grid bounds and proc neighs - - if (flag == 1) { - if (layout != Comm::LAYOUT_TILED) { - // this assumes gcomm = world - store(ixlo,ixhi,iylo,iyhi,izlo,izhi, - oxlo,oxhi,oylo,oyhi,ozlo,ozhi, - exlo,exhi,eylo,eyhi,ezlo,ezhi, - procxlo,procxhi,procylo,procyhi,proczlo,proczhi); - } else { - store(ixlo,ixhi,iylo,iyhi,izlo,izhi, - oxlo,oxhi,oylo,oyhi,ozlo,ozhi, - exlo,exhi,eylo,eyhi,ezlo,ezhi, - 0,0,0,0,0,0); - } - - } else if (flag == 2) { - if (layout != Comm::LAYOUT_TILED) { - store(ixlo,ixhi,iylo,iyhi,izlo,izhi, - oxlo,oxhi,oylo,oyhi,ozlo,ozhi, - oxlo,oxhi,oylo,oyhi,ozlo,ozhi, - exlo,exhi,eylo,eyhi,ezlo,ezhi); - } else { - error->all(FLERR,"Grid3d does not support tiled layout for MSM"); - } - } + shift_grid = 0.5; + shift_atom_lo = shift_atom_hi = 0.0; + zextra = 0; } /* ---------------------------------------------------------------------- */ @@ -219,54 +109,13 @@ Grid3d::~Grid3d() deallocate_remap(); } -/* ---------------------------------------------------------------------- - store grid bounds and proc neighs from caller in internal variables -------------------------------------------------------------------------- */ - -void Grid3d::store(int ixlo, int ixhi, int iylo, int iyhi, - int izlo, int izhi, - int oxlo, int oxhi, int oylo, int oyhi, - int ozlo, int ozhi, - int fxlo, int fxhi, int fylo, int fyhi, - int fzlo, int fzhi, - int pxlo, int pxhi, int pylo, int pyhi, - int pzlo, int pzhi) -{ - inxlo = ixlo; - inxhi = ixhi; - inylo = iylo; - inyhi = iyhi; - inzlo = izlo; - inzhi = izhi; - - outxlo = oxlo; - outxhi = oxhi; - outylo = oylo; - outyhi = oyhi; - outzlo = ozlo; - outzhi = ozhi; - - fullxlo = fxlo; - fullxhi = fxhi; - fullylo = fylo; - fullyhi = fyhi; - fullzlo = fzlo; - fullzhi = fzhi; - - procxlo = pxlo; - procxhi = pxhi; - procylo = pylo; - procyhi = pyhi; - proczlo = pzlo; - proczhi = pzhi; -} - // ---------------------------------------------------------------------- // set Grid parameters // ---------------------------------------------------------------------- /* ---------------------------------------------------------------------- maxdist = max distance outside proc subdomain a particle can be + used to determine extent of ghost cells ------------------------------------------------------------------------- */ void Grid3d::set_distance(double distance) @@ -276,7 +125,7 @@ void Grid3d::set_distance(double distance) /* ---------------------------------------------------------------------- # of grid cells beyond an owned grid cell that caller accesses - e.g. for a finite different stencial + used by FixTTMGrid for a finite different stencil can be different in lo vs hi direction ------------------------------------------------------------------------- */ @@ -287,8 +136,8 @@ void Grid3d::set_stencil_grid(int lo, int hi) } /* ---------------------------------------------------------------------- - # of grid cells beyond a particle's grid cell that caller updates - e.g. for smearing a point charge to the grid + # of grid cells beyond a particle's grid cell that caller accesses + used by PPPM for smearing a point charge to the grid can be different in lo vs hi direction ------------------------------------------------------------------------- */ @@ -299,9 +148,9 @@ void Grid3d::set_stencil_atom(int lo, int hi) } /* ---------------------------------------------------------------------- - shift_grid = offset of position of grid point within grid cell - 0.5 = cell center, 0.0 = lower-left corner of cell - used to determine which proc owns the grid cell within its subdomain + shift_grid = offset within grid cell of position of grid point + 0.5 = cell center (default), 0.0 = lower-left corner of cell + used by Grid to decide which proc owns a grid cell (grid pt within subdomain) ------------------------------------------------------------------------- */ void Grid3d::set_shift_grid(double shift) @@ -310,28 +159,72 @@ void Grid3d::set_shift_grid(double shift) } /* ---------------------------------------------------------------------- - shift_atom = offset of atoms when caller maps them to grid cells + shift_atom = offset added to atoms when caller maps them to grid cells 0.5 = half a grid cell, 0.0 = no offset - used when computing possible ghost extent - PPPM uses 0.5 when order is odd, 0.0 when order is even + used by Grid to compute maximum possible ghost extents + use of lo/hi allows max ghost extent on each side to be different + PPPM uses 0.5 when stencil order is odd, 0.0 when order is even + PPPM/stagger applies different shift values for 2 stagger iterations ------------------------------------------------------------------------- */ -void Grid3d::set_shift_atom(double shift) +void Grid3d::set_shift_atom(double shift_lo, double shift_hi) { - shift_atom = shift; + shift_atom_lo = shift_lo; + shift_atom_hi = shift_hi; } /* ---------------------------------------------------------------------- - zfactor > 1.0 when grid extends beyond simulation box bounds in Z - used by KSpace PPPM for 2d periodic slab geometries - default = 1.0 (no extension in Z) + enable extra grid cells in Z + factor = muliplication factor on box size Z and thus grid size + factor > 1.0 when grid extends beyond Z box size (3.0 = tripled in size) + used by PPPM for 2d periodic slab geometries + default zextra = 0, factor = 1.0 (no extra grid cells in Z) ------------------------------------------------------------------------- */ void Grid3d::set_zfactor(double factor) { + if (factor == 1.0) zextra = 0; + else zextra = 1; zfactor = factor; } +/* ---------------------------------------------------------------------- + set IDs of proc neighbors used in uniform local owned/ghost comm + call this AFTER setup_grid() but BEFORE setup_comm() to override + the processor neighbors stored by extract_comm() + used by MSM to exclude non-participating procs for coarse grid comm +------------------------------------------------------------------------- */ + +void Grid3d::set_proc_neighs(int pxlo, int pxhi, int pylo, int pyhi, + int pzlo, int pzhi) +{ + procxlo = pxlo; + procxhi = pxhi; + procylo = pylo; + procyhi = pyhi; + proczlo = pzlo; + proczhi = pzhi; +} + +/* ---------------------------------------------------------------------- + set allocation dimensions of caller grid used by indices() to setup pack/unpack + call this AFTER setup_grid() but BEFORE setup_comm() to override + the caller grid size set by setup_grid() + used by MSM to allow a larger level 0 grid to be allocated + with more ghost cells for other operations +------------------------------------------------------------------------- */ + +void Grid3d::set_larger_grid(int fxlo, int fxhi, int fylo, int fyhi, + int fzlo, int fzhi) +{ + fullxlo = fxlo; + fullxhi = fxhi; + fullylo = fylo; + fullyhi = fyhi; + fullzlo = fzlo; + fullzhi = fzhi; +} + // ---------------------------------------------------------------------- // retrieve Grid parameters // ---------------------------------------------------------------------- @@ -454,7 +347,7 @@ void Grid3d::setup_grid(int &ixlo, int &ixhi, int &iylo, int &iyhi, // default = caller grid is allocated to ghost grid // used when computing pack/unpack lists in indices() - // NOTE: allow caller to override this + // these values can be overridden by caller using set_larger_grid() fullxlo = outxlo; fullxhi = outxhi; @@ -577,23 +470,24 @@ void Grid3d::ghost_grid() double dzinv = nz / prd[2]; int lo, hi; - lo = static_cast((sublo[0]-dist[0]-boxlo[0]) * dxinv + shift_atom + OFFSET) - OFFSET; - hi = static_cast((subhi[0]+dist[0]-boxlo[0]) * dxinv + shift_atom + OFFSET) - OFFSET; + lo = static_cast((sublo[0]-dist[0]-boxlo[0]) * dxinv + shift_atom_lo + OFFSET) - OFFSET; + hi = static_cast((subhi[0]+dist[0]-boxlo[0]) * dxinv + shift_atom_hi + OFFSET) - OFFSET; outxlo = MIN(lo - stencil_atom_lo, inxlo - stencil_grid_lo); outxhi = MAX(hi + stencil_atom_hi, inxhi + stencil_grid_hi); - lo = static_cast((sublo[1]-dist[1]-boxlo[1]) * dyinv + shift_atom + OFFSET) - OFFSET; - hi = static_cast((subhi[1]+dist[1]-boxlo[1]) * dyinv + shift_atom + OFFSET) - OFFSET; - outxlo = MIN(lo - stencil_atom_lo, inylo - stencil_grid_lo); - outxhi = MAX(hi + stencil_atom_hi, inyhi + stencil_grid_hi); + lo = static_cast((sublo[1]-dist[1]-boxlo[1]) * dyinv + shift_atom_lo + OFFSET) - OFFSET; + hi = static_cast((subhi[1]+dist[1]-boxlo[1]) * dyinv + shift_atom_hi + OFFSET) - OFFSET; + outylo = MIN(lo - stencil_atom_lo, inylo - stencil_grid_lo); + outyhi = MAX(hi + stencil_atom_hi, inyhi + stencil_grid_hi); - // NOTE: need to account for zfactor here + if (zextra == 0) { + lo = static_cast((sublo[2]-dist[2]-boxlo[2]) * dzinv + shift_atom_lo + OFFSET) - OFFSET; + hi = static_cast((subhi[2]+dist[2]-boxlo[2]) * dzinv + shift_atom_hi + OFFSET) - OFFSET; + outzlo = MIN(lo - stencil_atom_lo, inzlo - stencil_grid_lo); + outzhi = MAX(hi + stencil_atom_hi, inzhi + stencil_grid_hi); + } else { + } - lo = static_cast((sublo[2]-dist[2]-boxlo[2]) * dzinv + shift_atom + OFFSET) - OFFSET; - hi = static_cast((subhi[2]+dist[2]-boxlo[2]) * dzinv + shift_atom + OFFSET) - OFFSET; - outxlo = MIN(lo - stencil_atom_lo, inzlo - stencil_grid_lo); - outxhi = MAX(hi + stencil_atom_hi, inzhi + stencil_grid_hi); - // limit out xyz lo/hi indices to global grid for non-periodic dims int *periodicity = domain->periodicity; @@ -612,6 +506,45 @@ void Grid3d::ghost_grid() outzlo = MAX(0,outzlo); outzhi = MIN(nz-1,outzhi); } + + + // NOTE: zperiod effects, also in owned grid cells ? + + /* + + // extent of zprd when 2d slab mode is selected + + double zprd_slab = zprd*slab_volfactor; + + // for slab PPPM, assign z grid as if it were not extended + + nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * + nz_pppm/zprd_slab + shift) - OFFSET; + nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * + nz_pppm/zprd_slab + shift) - OFFSET; + nzlo_out = nlo + nlower; + nzhi_out = nhi + nupper; + + // for slab PPPM, change the grid boundary for processors at +z end + // to include the empty volume between periodically repeating slabs + // for slab PPPM, want charge data communicated from -z proc to +z proc, + // but not vice versa, also want field data communicated from +z proc to + // -z proc, but not vice versa + // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells) + // also insure no other procs use ghost cells beyond +z limit + // differnet logic for non-tiled vs tiled decomposition + + if (slabflag == 1) { + if (comm->layout != Comm::LAYOUT_TILED) { + if (comm->myloc[2] == comm->procgrid[2]-1) nzhi_in = nzhi_out = nz_pppm - 1; + } else { + if (comm->mysplit[2][1] == 1.0) nzhi_in = nzhi_out = nz_pppm - 1; + } + nzhi_out = MIN(nzhi_out,nz_pppm-1); + } + + */ + } /* ---------------------------------------------------------------------- @@ -625,7 +558,7 @@ void Grid3d::extract_comm_info() // for non TILED layout: // proc xyz lohi = my 6 neighbor procs in this MPI_Comm - // NOTE: will need special logic for MSM case with different MPI_Comm + // these proc IDs can be overridden by caller using set_proc_neighs() // xyz split = copy of 1d vectors in Comm // grid2proc = copy of 3d array in Comm @@ -2095,7 +2028,7 @@ int Grid3d::proc_index_uniform(int igrid, int n, int dim, double *split) double fraclo,frachi; // loop over # of procs in this dime - // compute the grid bounds for that proc, same as comm->partition_grid() + // compute the grid bounds for that proc // if igrid falls within those bounds, return m = proc index int m; diff --git a/src/grid3d.h b/src/grid3d.h index 8874102590..39734a482f 100644 --- a/src/grid3d.h +++ b/src/grid3d.h @@ -23,21 +23,16 @@ class Grid3d : protected Pointers { enum { KSPACE = 0, PAIR = 1, FIX = 2 }; // calling classes Grid3d(class LAMMPS *, MPI_Comm, int, int, int); - - Grid3d(class LAMMPS *, MPI_Comm, int, int, int, int, int, int, int, int, int, - int, int, int, int, int, int); - Grid3d(class LAMMPS *, MPI_Comm, int, int, int, int, - int, int, int, int, int, int, int, int, int, int, int, int, - int, int, int, int, int, int); - - ~Grid3d() override; + ~Grid3d(); void set_distance(double); void set_stencil_grid(int, int); void set_stencil_atom(int, int); void set_shift_grid(double); - void set_shift_atom(double); + void set_shift_atom(double, double); void set_zfactor(double); + void set_larger_grid(int, int, int, int, int, int); + void set_proc_neighs(int, int, int, int, int, int); int identical(Grid3d *); void get_size(int &, int &, int &); @@ -72,10 +67,12 @@ class Grid3d : protected Pointers { int stencil_grid_lo,stencil_grid_hi; // grid cells accessed beyond owned cell double shift_grid; // location of grid point within grid cell // only affects which proc owns grid cell - double shift_atom; // shift applied to atomd when mapped to grid cell by caller + double shift_atom_lo,shift_atom_hi;; // max shift applied to atoms + // when mapped to grid cell by caller + // can be different in lo/hi directions // only affects extent of ghost cells - double zfactor; // multiplier on extend of grid in Z direction - // 1.0 = no extra grid cells in Z + int zextra; // 1 if extra grid cells in Z, 0 if not + double zfactor; // multiplier on extent of grid in Z direction // extent of my owned and ghost cells