/* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator https://www.lammps.org/, Sandia National Laboratories LAMMPS development team: developers@lammps.org Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing authors: Ludwig Ahrens-Iwers (TUHH), Shern Tee (UQ), Robert Meißner (TUHH) ------------------------------------------------------------------------- */ #include "pppm_electrode.h" #include "angle.h" #include "atom.h" #include "bond.h" #include "citeme.h" #include "comm.h" #include "domain.h" #include "error.h" #include "fft3d_wrap.h" #include "force.h" #include "gridcomm.h" #include "math_const.h" #include "math_special.h" #include "memory.h" #include "neighbor.h" #include "pair.h" #include "remap_wrap.h" #include "slab_dipole.h" #include "update.h" #include "wire_dipole.h" #include #include #include using namespace LAMMPS_NS; using namespace MathConst; using namespace MathSpecial; #define MAXORDER 7 #define OFFSET 16384 #define LARGE 10000.0 #define SMALL 0.00001 #define EPS_HOC 1.0e-7 enum { REVERSE_RHO }; enum { FORWARD_IK, FORWARD_AD, FORWARD_IK_PERATOM, FORWARD_AD_PERATOM }; #ifdef FFT_SINGLE #define ZEROF 0.0f #define ONEF 1.0f #else #define ZEROF 0.0 #define ONEF 1.0 #endif static const char cite_pppm_electrode[] = "kspace_style pppm/electrode command:\n\n" "@article{Ahrens2021,\n" "author = {Ahrens-Iwers, Ludwig J.V. and Mei{\\ss}ner, Robert H.},\n" "doi = {10.1063/5.0063381},\n" "title = {{Constant potential simulations on a mesh}},\n" "journal = {Journal of Chemical Physics},\n" "year = {2021}\n" "volume = {155},\n" "pages = {104104},\n" "}\n"; /* ---------------------------------------------------------------------- */ PPPMElectrode::PPPMElectrode(LAMMPS *lmp) : PPPM(lmp), electrolyte_density_brick(nullptr), electrolyte_density_fft(nullptr), boundcorr(nullptr) { if (lmp->citeme) lmp->citeme->add(cite_pppm_electrode); group_group_enable = 0; electrolyte_density_brick = nullptr; electrolyte_density_fft = nullptr; compute_vector_called = false; last_source_grpbit = 1 << 0; // initialize to "all" last_invert_source = false; // not sure what to initialize here } /* ---------------------------------------------------------------------- free all memory ------------------------------------------------------------------------- */ PPPMElectrode::~PPPMElectrode() { if (copymode) return; deallocate(); if (peratom_allocate_flag) deallocate_peratom(); if (group_allocate_flag) deallocate_groups(); memory->destroy(part2grid); memory->destroy(acons); } /* ---------------------------------------------------------------------- called once before run ------------------------------------------------------------------------- */ void PPPMElectrode::init() { if (me == 0) utils::logmesg(lmp, "PPPM/electrode initialization ...\n"); // error check if (slabflag == 3) error->all(FLERR, "Cannot (yet) use PPPM/electrode with 'kspace_modify slab ew2d'"); triclinic_check(); triclinic = domain->triclinic; if (triclinic) error->all(FLERR, "Cannot (yet) use PPPM/electrode with triclinic box "); if (domain->dimension == 2) error->all(FLERR, "Cannot use PPPM/electrode with 2d simulation"); if (!atom->q_flag) error->all(FLERR, "KSpace style requires atom attribute q"); if (slabflag == 0 && wireflag == 0 && domain->nonperiodic > 0) error->all(FLERR, "Cannot use non-periodic boundaries with PPPM/electrode"); if (slabflag) { if (domain->xperiodic != 1 || domain->yperiodic != 1 || domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) error->all(FLERR, "Incorrect boundaries with slab PPPM/electrode"); } else if (wireflag) { if (domain->zperiodic != 1 || domain->boundary[0][0] != 1 || domain->boundary[0][1] != 1 || domain->boundary[1][0] != 1 || domain->boundary[1][1] != 1) error->all(FLERR, "Incorrect boundaries with wire PPPM/electrode"); } if (order < 2 || order > MAXORDER) error->all(FLERR, fmt::format("PPPM/electrode order cannot be < 2 or > {}", MAXORDER)); // compute two charge force two_charge(); // extract short-range Coulombic cutoff from pair style pair_check(); int itmp = 0; double *p_cutoff = (double *) force->pair->extract("cut_coul", itmp); if (p_cutoff == nullptr) error->all(FLERR, "KSpace style is incompatible with Pair style"); cutoff = *p_cutoff; // if kspace is TIP4P, extract TIP4P params from pair style // bond/angle are not yet init(), so insure equilibrium request is valid qdist = 0.0; if (tip4pflag) { if (me == 0) utils::logmesg(lmp, " extracting TIP4P info from pair style\n"); double *p_qdist = (double *) force->pair->extract("qdist", itmp); int *p_typeO = (int *) force->pair->extract("typeO", itmp); int *p_typeH = (int *) force->pair->extract("typeH", itmp); int *p_typeA = (int *) force->pair->extract("typeA", itmp); int *p_typeB = (int *) force->pair->extract("typeB", itmp); if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB) error->all(FLERR, "Pair style is incompatible with TIP4P KSpace style"); qdist = *p_qdist; typeO = *p_typeO; typeH = *p_typeH; int typeA = *p_typeA; int typeB = *p_typeB; if (force->angle == nullptr || force->bond == nullptr || force->angle->setflag == nullptr || force->bond->setflag == nullptr) error->all(FLERR, "Bond and angle potentials must be defined for TIP4P"); if (typeA < 1 || typeA > atom->nangletypes || force->angle->setflag[typeA] == 0) error->all(FLERR, "Bad TIP4P angle type for PPPM/TIP4P"); if (typeB < 1 || typeB > atom->nbondtypes || force->bond->setflag[typeB] == 0) error->all(FLERR, "Bad TIP4P bond type for PPPM/TIP4P"); double theta = force->angle->equilibrium_angle(typeA); double blen = force->bond->equilibrium_distance(typeB); alpha = qdist / (cos(0.5 * theta) * blen); } // compute qsum & qsqsum and warn if not charge-neutral scale = 1.0; qqrd2e = force->qqrd2e; qsum_qsq(); natoms_original = atom->natoms; // set accuracy (force units) from accuracy_relative or accuracy_absolute if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; else accuracy = accuracy_relative * two_charge_force; // free all arrays previously allocated deallocate(); delete fft1; delete fft2; delete remap; delete gc; if (peratom_allocate_flag) deallocate_peratom(); if (group_allocate_flag) deallocate_groups(); // setup FFT grid resolution and g_ewald // normally one iteration thru while loop is all that is required // if grid stencil does not extend beyond neighbor proc // or overlap is allowed, then done // else reduce order and try again GridComm *gctmp = nullptr; int iteration = 0; while (order >= minorder) { if (iteration && me == 0) error->warning(FLERR, "Reducing PPPM/electrode order b/c stencil extends " "beyond nearest neighbor processor"); if (stagger_flag && !differentiation_flag) compute_gf_denom(); set_grid_global(); set_grid_local(); if (overlap_allowed) break; gctmp = new GridComm(lmp, world, nx_pppm, ny_pppm, nz_pppm, nxlo_in, nxhi_in, nylo_in, nyhi_in, nzlo_in, nzhi_in, nxlo_out, nxhi_out, nylo_out, nyhi_out, nzlo_out, nzhi_out); int tmp1, tmp2; gctmp->setup(tmp1, tmp2); if (gctmp->ghost_adjacent()) break; delete gctmp; order--; iteration++; } if (order < minorder) error->all(FLERR, "PPPM/electrode order < minimum allowed order"); if (!overlap_allowed && !gctmp->ghost_adjacent()) error->all(FLERR, "PPPM/electrode grid stencil extends beyond nearest neighbor processor"); if (gctmp) delete gctmp; // adjust g_ewald if (!gewaldflag) adjust_gewald(); // calculate the final accuracy double estimated_accuracy = final_accuracy(); // print stats int ngrid_max, nfft_both_max; MPI_Allreduce(&ngrid, &ngrid_max, 1, MPI_INT, MPI_MAX, world); MPI_Allreduce(&nfft_both, &nfft_both_max, 1, MPI_INT, MPI_MAX, world); if (me == 0) { std::string mesg = fmt::format(" G vector (1/distance) = {:.8g}\n", g_ewald); mesg += fmt::format(" grid = {} {} {}\n", nx_pppm, ny_pppm, nz_pppm); mesg += fmt::format(" stencil order = {}\n", order); mesg += fmt::format(" estimated absolute RMS force accuracy = {:.8g}\n", estimated_accuracy); mesg += fmt::format(" estimated relative force accuracy = {:.8g}\n", estimated_accuracy / two_charge_force); mesg += " using " LMP_FFT_PREC " precision " LMP_FFT_LIB "\n"; mesg += fmt::format(" 3d grid and FFT values/proc = {} {}\n", ngrid_max, nfft_both_max); utils::logmesg(lmp, mesg); } // allocate K-space dependent memory // don't invoke allocate peratom() or group(), will be allocated when needed allocate(); // pre-compute Green's function denomiator expansion // pre-compute 1d charge distribution coefficients compute_gf_denom(); if (differentiation_flag == 1) compute_sf_precoeff(); compute_rho_coeff(); compute_step = -1; } /* ---------------------------------------------------------------------- adjust PPPM coeffs, called initially and whenever volume has changed ------------------------------------------------------------------------- */ void PPPMElectrode::setup() { // perform some checks to avoid illegal boundaries with read_data if (slabflag == 0 && wireflag == 0 && domain->nonperiodic > 0) error->all(FLERR, "Cannot use non-periodic boundaries with PPPM/electrode"); if (slabflag) { if (domain->xperiodic != 1 || domain->yperiodic != 1 || domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) error->all(FLERR, "Incorrect boundaries with slab PPPM/electrode"); } else if (wireflag) { if (domain->zperiodic != 1 || domain->boundary[0][0] != 1 || domain->boundary[0][1] != 1 || domain->boundary[1][0] != 1 || domain->boundary[1][1] != 1) error->all(FLERR, "Incorrect boundaries with wire PPPM/electrode"); } int i, j, k, n; double *prd; // volume-dependent factors // adjust z dimension for 2d slab PPPM // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 prd = domain->prd; double xprd = prd[0]; double yprd = prd[1]; double zprd = prd[2]; double xprd_wire = xprd * wire_volfactor; double yprd_wire = yprd * wire_volfactor; double zprd_slab = zprd * slab_volfactor; volume = xprd_wire * yprd_wire * zprd_slab; delxinv = nx_pppm / xprd_wire; delyinv = ny_pppm / yprd_wire; delzinv = nz_pppm / zprd_slab; delvolinv = delxinv * delyinv * delzinv; double unitkx = (MY_2PI / xprd_wire); double unitky = (MY_2PI / yprd_wire); double unitkz = (MY_2PI / zprd_slab); // fkx,fky,fkz for my FFT grid pts for (i = nxlo_fft; i <= nxhi_fft; i++) { int per = i - nx_pppm * (2 * i / nx_pppm); fkx[i] = unitkx * per; } for (i = nylo_fft; i <= nyhi_fft; i++) { int per = i - ny_pppm * (2 * i / ny_pppm); fky[i] = unitky * per; } for (i = nzlo_fft; i <= nzhi_fft; i++) { int per = i - nz_pppm * (2 * i / nz_pppm); fkz[i] = unitkz * per; } // virial coefficients double sqk, vterm; n = 0; for (k = nzlo_fft; k <= nzhi_fft; k++) { for (j = nylo_fft; j <= nyhi_fft; j++) { for (i = nxlo_fft; i <= nxhi_fft; i++) { sqk = fkx[i] * fkx[i] + fky[j] * fky[j] + fkz[k] * fkz[k]; if (sqk == 0.0) { vg[n][0] = 0.0; vg[n][1] = 0.0; vg[n][2] = 0.0; vg[n][3] = 0.0; vg[n][4] = 0.0; vg[n][5] = 0.0; } else { vterm = -2.0 * (1.0 / sqk + 0.25 / (g_ewald * g_ewald)); vg[n][0] = 1.0 + vterm * fkx[i] * fkx[i]; vg[n][1] = 1.0 + vterm * fky[j] * fky[j]; vg[n][2] = 1.0 + vterm * fkz[k] * fkz[k]; vg[n][3] = vterm * fkx[i] * fky[j]; vg[n][4] = vterm * fkx[i] * fkz[k]; vg[n][5] = vterm * fky[j] * fkz[k]; } n++; } } } if (differentiation_flag == 1) compute_gf_ad(); else compute_gf_ik(); } /* ---------------------------------------------------------------------- reset local grid arrays and communication stencils called by fix balance b/c it changed sizes of processor sub-domains ------------------------------------------------------------------------- */ void PPPMElectrode::setup_grid() { // free all arrays previously allocated deallocate(); if (peratom_allocate_flag) deallocate_peratom(); if (group_allocate_flag) deallocate_groups(); // reset portion of global grid that each proc owns set_grid_local(); // reallocate K-space dependent memory // check if grid communication is now overlapping if not allowed // don't invoke allocate peratom() or group(), will be allocated when needed allocate(); if (!overlap_allowed && !gc->ghost_adjacent()) error->all(FLERR, "PPPM/electrode grid stencil extends " "beyond nearest neighbor processor"); // pre-compute Green's function denomiator expansion // pre-compute 1d charge distribution coefficients compute_gf_denom(); if (differentiation_flag == 1) compute_sf_precoeff(); compute_rho_coeff(); // pre-compute volume-dependent coeffs for portion of grid I now own setup(); } /* ---------------------------------------------------------------------- compute the PPPM long-range force, energy, virial ------------------------------------------------------------------------- */ void PPPMElectrode::compute(int eflag, int vflag) { int i, j; // set energy/virial flags // invoke allocate_peratom() if needed for first time ev_init(eflag, vflag); if (evflag_atom && !peratom_allocate_flag) allocate_peratom(); // if atom count has changed, update qsum and qsqsum qsum_qsq(); natoms_original = atom->natoms; // return if there are no charges start_compute(); if (compute_vector_called && last_invert_source) { // electrolyte_density_brick is filled, so we can grab only electrode atoms. // Does not work for direct cg algorithm because electrode charges change after compute_vector. // Therefore, only when last_invert_source true. // TODO: this is dangerous now that compute_vector's interface has been // changed since a compute could call an arbitrary source, needs tightening make_rho_in_brick(last_source_grpbit, density_brick, !last_invert_source); gc->reverse_comm(GridComm::KSPACE, this, 1, sizeof(FFT_SCALAR), REVERSE_RHO, gc_buf1, gc_buf2, MPI_FFT_SCALAR); for (int nz = nzlo_out; nz <= nzhi_out; nz++) for (int ny = nylo_out; ny <= nyhi_out; ny++) for (int nx = nxlo_out; nx <= nxhi_out; nx++) { density_brick[nz][ny][nx] += electrolyte_density_brick[nz][ny][nx]; } } else { make_rho(); // all procs communicate density values from their ghost cells // to fully sum contribution in their 3d bricks // remap from 3d decomposition to FFT decomposition gc->reverse_comm(GridComm::KSPACE, this, 1, sizeof(FFT_SCALAR), REVERSE_RHO, gc_buf1, gc_buf2, MPI_FFT_SCALAR); } brick2fft(); // compute potential gradient on my FFT grid and // portion of e_long on this proc's FFT grid // return gradients (electric fields) in 3d brick decomposition // also performs per-atom calculations via poisson_peratom() poisson(); // all procs communicate E-field values // to fill ghost cells surrounding their 3d bricks if (differentiation_flag == 1) gc->forward_comm(GridComm::KSPACE, this, 1, sizeof(FFT_SCALAR), FORWARD_AD, gc_buf1, gc_buf2, MPI_FFT_SCALAR); else gc->forward_comm(GridComm::KSPACE, this, 3, sizeof(FFT_SCALAR), FORWARD_IK, gc_buf1, gc_buf2, MPI_FFT_SCALAR); // extra per-atom energy/virial communication if (evflag_atom) { if (differentiation_flag == 1 && vflag_atom) gc->forward_comm(GridComm::KSPACE, this, 6, sizeof(FFT_SCALAR), FORWARD_AD_PERATOM, gc_buf1, gc_buf2, MPI_FFT_SCALAR); else if (differentiation_flag == 0) gc->forward_comm(GridComm::KSPACE, this, 7, sizeof(FFT_SCALAR), FORWARD_IK_PERATOM, gc_buf1, gc_buf2, MPI_FFT_SCALAR); } // calculate the force on my particles fieldforce(); // extra per-atom energy/virial communication if (evflag_atom) fieldforce_peratom(); // sum global energy across procs and add in volume-dependent term const double qscale = qqrd2e * scale; if (eflag_global) { double energy_all; MPI_Allreduce(&energy, &energy_all, 1, MPI_DOUBLE, MPI_SUM, world); energy = energy_all; energy *= 0.5 * volume; energy -= g_ewald * qsqsum / MY_PIS + MY_PI2 * qsum * qsum / (g_ewald * g_ewald * volume); energy *= qscale; } // sum global virial across procs if (vflag_global) { double virial_all[6]; MPI_Allreduce(virial, virial_all, 6, MPI_DOUBLE, MPI_SUM, world); for (i = 0; i < 6; i++) virial[i] = 0.5 * qscale * volume * virial_all[i]; } // per-atom energy/virial // energy includes self-energy correction // ntotal accounts for TIP4P tallying eatom/vatom for ghost atoms if (evflag_atom) { double *q = atom->q; int nlocal = atom->nlocal; int ntotal = nlocal; if (tip4pflag) ntotal += atom->nghost; if (eflag_atom) { for (i = 0; i < nlocal; i++) { eatom[i] *= 0.5; eatom[i] -= g_ewald * q[i] * q[i] / MY_PIS + MY_PI2 * q[i] * qsum / (g_ewald * g_ewald * volume); eatom[i] *= qscale; } for (i = nlocal; i < ntotal; i++) eatom[i] *= 0.5 * qscale; } if (vflag_atom) { for (i = 0; i < ntotal; i++) for (j = 0; j < 6; j++) vatom[i][j] *= 0.5 * qscale; } } boundcorr->compute_corr(qsum, eflag_atom, eflag_global, energy, eatom); compute_vector_called = false; } /* ---------------------------------------------------------------------- ------------------------------------------------------------------------- */ void PPPMElectrode::start_compute() { if (compute_step < update->ntimestep) { if (compute_step == -1) setup(); boxlo = domain->boxlo; // extend size of per-atom arrays if necessary if (atom->nmax > nmax) { memory->destroy(part2grid); nmax = atom->nmax; memory->create(part2grid, nmax, 3, "pppm/electrode:part2grid"); } // find grid points for all my particles // map my particle charge onto my local 3d density grid particle_map(); compute_step = update->ntimestep; } } /* ---------------------------------------------------------------------- ------------------------------------------------------------------------- */ void PPPMElectrode::compute_vector(double *vec, int sensor_grpbit, int source_grpbit, bool invert_source) { start_compute(); // temporarily store and switch pointers so we can use brick2fft() for // electrolyte density (without writing an additional function) FFT_SCALAR ***density_brick_real = density_brick; FFT_SCALAR *density_fft_real = density_fft; make_rho_in_brick(source_grpbit, electrolyte_density_brick, invert_source); density_brick = electrolyte_density_brick; density_fft = electrolyte_density_fft; gc->reverse_comm(GridComm::KSPACE, this, 1, sizeof(FFT_SCALAR), REVERSE_RHO, gc_buf1, gc_buf2, MPI_FFT_SCALAR); brick2fft(); // switch back pointers density_brick = density_brick_real; density_fft = density_fft_real; // transform electrolyte charge density (r -> k) (complex conjugate) for (int i = 0, n = 0; i < nfft; i++) { work1[n++] = electrolyte_density_fft[i]; work1[n++] = ZEROF; } fft1->compute(work1, work1, -1); // k->r FFT of Green's * electrolyte density = u_brick for (int i = 0, n = 0; i < nfft; i++) { work2[n] = work1[n] * greensfn[i]; n++; work2[n] = work1[n] * greensfn[i]; n++; } fft2->compute(work2, work2, 1); for (int k = nzlo_in, n = 0; k <= nzhi_in; k++) for (int j = nylo_in; j <= nyhi_in; j++) for (int i = nxlo_in; i <= nxhi_in; i++) { u_brick[k][j][i] = work2[n]; n += 2; } gc->forward_comm(GridComm::KSPACE, this, 1, sizeof(FFT_SCALAR), FORWARD_AD, gc_buf1, gc_buf2, MPI_FFT_SCALAR); project_psi(vec, sensor_grpbit); compute_vector_called = true; } void PPPMElectrode::project_psi(double *vec, int sensor_grpbit) { // project u_brick with weight matrix double **x = atom->x; int *mask = atom->mask; double const scaleinv = 1.0 / (nx_pppm * ny_pppm * nz_pppm); for (int i = 0; i < atom->nlocal; i++) { if (!(mask[i] & sensor_grpbit)) continue; double v = 0.; // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // (mx,my,mz) = global coords of moving stencil pt int nix = part2grid[i][0]; int niy = part2grid[i][1]; int niz = part2grid[i][2]; FFT_SCALAR dix = nix + shiftone - (x[i][0] - boxlo[0]) * delxinv; FFT_SCALAR diy = niy + shiftone - (x[i][1] - boxlo[1]) * delyinv; FFT_SCALAR diz = niz + shiftone - (x[i][2] - boxlo[2]) * delzinv; compute_rho1d(dix, diy, diz); for (int ni = nlower; ni <= nupper; ni++) { double iz0 = rho1d[2][ni]; int miz = ni + niz; for (int mi = nlower; mi <= nupper; mi++) { double iy0 = iz0 * rho1d[1][mi]; int miy = mi + niy; for (int li = nlower; li <= nupper; li++) { int mix = li + nix; double ix0 = iy0 * rho1d[0][li]; v += ix0 * u_brick[miz][miy][mix]; } } } vec[i] += v * scaleinv; } } /* ---------------------------------------------------------------------- ------------------------------------------------------------------------- */ void PPPMElectrode::compute_matrix(bigint *imat, double **matrix, bool timer_flag) { compute(1, 0); // make sure density bricks etc. are set up // fft green's function k -> r (double) double *greens_real; memory->create(greens_real, nz_pppm * ny_pppm * nx_pppm, "pppm/electrode:greens_real"); memset(greens_real, 0, (std::size_t)nz_pppm * (std::size_t)ny_pppm * (std::size_t)nx_pppm * sizeof(double)); for (int i = 0, n = 0; i < nfft; i++) { work2[n++] = greensfn[i]; work2[n++] = ZEROF; } fft2->compute(work2, work2, -1); for (int k = nzlo_in, n = 0; k <= nzhi_in; k++) for (int j = nylo_in; j <= nyhi_in; j++) for (int i = nxlo_in; i <= nxhi_in; i++) { greens_real[ny_pppm * nx_pppm * k + nx_pppm * j + i] = work2[n]; n += 2; } MPI_Allreduce(MPI_IN_PLACE, greens_real, nz_pppm * ny_pppm * nx_pppm, MPI_DOUBLE, MPI_SUM, world); int const nlocal = atom->nlocal; int nmat = std::count_if(&imat[0], &imat[nlocal], [](int x) { return x >= 0; }); MPI_Allreduce(MPI_IN_PLACE, &nmat, 1, MPI_INT, MPI_SUM, world); // gather x_ele double **x_ele; memory->create(x_ele, nmat, 3, "pppm/electrode:x_ele"); memset(&(x_ele[0][0]), 0, nmat * 3 * sizeof(double)); double **x = atom->x; for (int i = 0; i < nlocal; i++) { int ipos = imat[i]; if (ipos < 0) continue; for (int dim = 0; dim < 3; dim++) x_ele[ipos][dim] = x[i][dim]; } MPI_Allreduce(MPI_IN_PLACE, &(x_ele[0][0]), nmat * 3, MPI_DOUBLE, MPI_SUM, world); if (conp_one_step) one_step_multiplication(imat, greens_real, x_ele, matrix, nmat, timer_flag); else two_step_multiplication(imat, greens_real, x_ele, matrix, nmat, timer_flag); memory->destroy(greens_real); memory->destroy(x_ele); } /* ----------------------------------------------------------------------*/ void PPPMElectrode::one_step_multiplication(bigint *imat, double *greens_real, double **x_ele, double **matrix, int const nmat, bool timer_flag) { // map green's function in real space from mesh to particle positions // with matrix multiplication 'W^T G W' in one steps. Uses less memory than // two_step_multiplication // int const nlocal = atom->nlocal; double **x = atom->x; MPI_Barrier(world); double step1_time = MPI_Wtime(); // precalculate rho_1d for local electrode std::vector j_list; for (int j = 0; j < nlocal; j++) { int jpos = imat[j]; if (jpos < 0) continue; j_list.push_back(j); } int const nj_local = j_list.size(); FFT_SCALAR ***rho1d_j; memory->create(rho1d_j, nj_local, 3, order, "pppm/electrode:rho1d_j"); for (int jlist_pos = 0; jlist_pos < nj_local; jlist_pos++) { int j = j_list[jlist_pos]; int njx = part2grid[j][0]; int njy = part2grid[j][1]; int njz = part2grid[j][2]; FFT_SCALAR const djx = njx + shiftone - (x[j][0] - boxlo[0]) * delxinv; FFT_SCALAR const djy = njy + shiftone - (x[j][1] - boxlo[1]) * delyinv; FFT_SCALAR const djz = njz + shiftone - (x[j][2] - boxlo[2]) * delzinv; compute_rho1d(djx, djy, djz); for (int dim = 0; dim < 3; dim++) { for (int oi = 0; oi < order; oi++) { rho1d_j[jlist_pos][dim][oi] = rho1d[dim][oi + nlower]; } } } // nested loops over weights of electrode atoms i and j // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // (mx,my,mz) = global coords of moving stencil pt int const order2 = order * order; int const order6 = order2 * order2 * order2; double *amesh; memory->create(amesh, order6, "pppm/electrode:amesh"); for (int ipos = 0; ipos < nmat; ipos++) { double *_noalias xi_ele = x_ele[ipos]; // new calculation for nx, ny, nz because part2grid available for nlocal, // only int nix = static_cast((xi_ele[0] - boxlo[0]) * delxinv + shift) - OFFSET; int niy = static_cast((xi_ele[1] - boxlo[1]) * delyinv + shift) - OFFSET; int niz = static_cast((xi_ele[2] - boxlo[2]) * delzinv + shift) - OFFSET; FFT_SCALAR const dix = nix + shiftone - (xi_ele[0] - boxlo[0]) * delxinv; FFT_SCALAR const diy = niy + shiftone - (xi_ele[1] - boxlo[1]) * delyinv; FFT_SCALAR const diz = niz + shiftone - (xi_ele[2] - boxlo[2]) * delzinv; compute_rho1d(dix, diy, diz); int njx = -1; int njy = -1; int njz = -1; // force initial build_amesh for (int jlist_pos = 0; jlist_pos < nj_local; jlist_pos++) { int j = j_list[jlist_pos]; int ind_amesh = 0; int jpos = imat[j]; if ((ipos < jpos) == !((ipos - jpos) % 2)) continue; double aij = 0.; if (njx != part2grid[j][0] || njy != part2grid[j][1] || njz != part2grid[j][2]) { njx = part2grid[j][0]; njy = part2grid[j][1]; njz = part2grid[j][2]; build_amesh(njx - nix, njy - niy, njz - niz, amesh, greens_real); } for (int ni = nlower; ni <= nupper; ni++) { // i's rho1d[dim] indexed from nlower to nupper FFT_SCALAR const iz0 = rho1d[2][ni]; for (int nj = 0; nj < order; nj++) { // j's rho1d_j[][dim] indexed from 0 to order-1 FFT_SCALAR const jz0 = rho1d_j[jlist_pos][2][nj]; for (int mi = nlower; mi <= nupper; mi++) { FFT_SCALAR const iy0 = iz0 * rho1d[1][mi]; for (int mj = 0; mj < order; mj++) { FFT_SCALAR const jy0 = jz0 * rho1d_j[jlist_pos][1][mj]; for (int li = nlower; li <= nupper; li++) { FFT_SCALAR const ix0 = iy0 * rho1d[0][li]; double aij_xscan = 0.; for (int lj = 0; lj < order; lj++) { aij_xscan += (double) amesh[ind_amesh] * rho1d_j[jlist_pos][0][lj]; ind_amesh++; } aij += (double) ix0 * jy0 * aij_xscan; } } } } } matrix[ipos][jpos] += aij / volume; if (ipos != jpos) matrix[jpos][ipos] += aij / volume; } } memory->destroy(amesh); memory->destroy(rho1d_j); MPI_Barrier(world); if (timer_flag && (comm->me == 0)) utils::logmesg(lmp, fmt::format("Single step time: {:.4g} s\n", MPI_Wtime() - step1_time)); } /* ----------------------------------------------------------------------*/ void PPPMElectrode::build_amesh(const int dx, // = njx - nix const int dy, // = njy - niy const int dz, // = njz - niz double *amesh, double *const greens_real) { auto fmod = [](int x, int n) { // fast unsigned mod int r = abs(x); while (r >= n) r -= n; return r; }; int ind_amesh = 0; for (int iz = 0; iz < order; iz++) for (int jz = 0; jz < order; jz++) { int const mz = fmod(dz + jz - iz, nz_pppm) * nx_pppm * ny_pppm; for (int iy = 0; iy < order; iy++) for (int jy = 0; jy < order; jy++) { int const my = fmod(dy + jy - iy, ny_pppm) * nx_pppm; for (int ix = 0; ix < order; ix++) for (int jx = 0; jx < order; jx++) { int const mx = fmod(dx + jx - ix, nx_pppm); amesh[ind_amesh] = greens_real[mz + my + mx]; ind_amesh++; } } } } /* ----------------------------------------------------------------------*/ void PPPMElectrode::two_step_multiplication(bigint *imat, double *greens_real, double **x_ele, double **matrix, int const nmat, bool timer_flag) { // map green's function in real space from mesh to particle positions // with matrix multiplication 'W^T G W' in two steps. gw is result of // first multiplication. int const nlocal = atom->nlocal; MPI_Barrier(world); double step1_time = MPI_Wtime(); int nx_ele = nxhi_out - nxlo_out + 1; // nx_pppm + order + 1; int ny_ele = nyhi_out - nylo_out + 1; // ny_pppm + order + 1; int nz_ele = nzhi_out - nzlo_out + 1; // nz_pppm + order + 1; int nxyz = nx_ele * ny_ele * nz_ele; double **gw; memory->create(gw, nmat, nxyz, "pppm/electrode:gw"); memset(&(gw[0][0]), 0, (std::size_t)nmat * (std::size_t)nxyz * sizeof(double)); auto fmod = [](int x, int n) { // fast unsigned mod int r = abs(x); while (r >= n) r -= n; return r; }; // loops over weights of electrode atoms and weights of complete grid // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // (mx,my,mz) = global coords of moving stencil pt for (int ipos = 0; ipos < nmat; ipos++) { double *_noalias xi_ele = x_ele[ipos]; // new calculation for nx, ny, nz because part2grid available for // nlocal, only int nix = static_cast((xi_ele[0] - boxlo[0]) * delxinv + shift) - OFFSET; int niy = static_cast((xi_ele[1] - boxlo[1]) * delyinv + shift) - OFFSET; int niz = static_cast((xi_ele[2] - boxlo[2]) * delzinv + shift) - OFFSET; FFT_SCALAR dx = nix + shiftone - (xi_ele[0] - boxlo[0]) * delxinv; FFT_SCALAR dy = niy + shiftone - (xi_ele[1] - boxlo[1]) * delyinv; FFT_SCALAR dz = niz + shiftone - (xi_ele[2] - boxlo[2]) * delzinv; compute_rho1d(dx, dy, dz); // DO NOT TOUCH THESE LOOPS! // Attempted optimizations (such as calculating parts of indices // in the outer loops) have been shown harmful upon benchmarking! for (int mjz = nzlo_out; mjz <= nzhi_out; mjz++) { for (int ni = nlower; ni <= nupper; ni++) { double const iz0 = rho1d[2][ni]; int const mz = fmod(mjz - ni - niz, nz_pppm); for (int mjy = nylo_out; mjy <= nyhi_out; mjy++) { for (int mi = nlower; mi <= nupper; mi++) { double const iy0 = iz0 * rho1d[1][mi]; int const my = fmod(mjy - mi - niy, ny_pppm); for (int mjx = nxlo_out; mjx <= nxhi_out; mjx++) { for (int li = nlower; li <= nupper; li++) { double const ix0 = iy0 * rho1d[0][li]; int const mx = fmod(mjx - li - nix, nx_pppm); gw[ipos][nx_ele * ny_ele * (mjz - nzlo_out) + nx_ele * (mjy - nylo_out) + (mjx - nxlo_out)] += ix0 * greens_real[mz * nx_pppm * ny_pppm + my * nx_pppm + mx]; } } } } } } } MPI_Barrier(world); if (timer_flag && (comm->me == 0)) utils::logmesg(lmp, fmt::format("step 1 time: {:.4g} s\n", MPI_Wtime() - step1_time)); // nested loop over electrode atoms i and j and stencil of i // in theory could reuse make_rho1d_j here -- but this step is already // super-fast double step2_time = MPI_Wtime(); double **x = atom->x; for (int i = 0; i < nlocal; i++) { int ipos = imat[i]; if (ipos < 0) continue; int nix = part2grid[i][0]; int niy = part2grid[i][1]; int niz = part2grid[i][2]; FFT_SCALAR dix = nix + shiftone - (x[i][0] - boxlo[0]) * delxinv; FFT_SCALAR diy = niy + shiftone - (x[i][1] - boxlo[1]) * delyinv; FFT_SCALAR diz = niz + shiftone - (x[i][2] - boxlo[2]) * delzinv; compute_rho1d(dix, diy, diz); for (int jpos = 0; jpos < nmat; jpos++) { double aij = 0.; for (int ni = nlower; ni <= nupper; ni++) { double iz0 = rho1d[2][ni]; int miz = ni + niz; for (int mi = nlower; mi <= nupper; mi++) { double iy0 = iz0 * rho1d[1][mi]; int miy = mi + niy; for (int li = nlower; li <= nupper; li++) { int mix = li + nix; double ix0 = iy0 * rho1d[0][li]; int miz0 = miz - nzlo_out; int miy0 = miy - nylo_out; int mix0 = mix - nxlo_out; aij += ix0 * gw[jpos][nx_ele * ny_ele * miz0 + nx_ele * miy0 + mix0]; } } } matrix[ipos][jpos] += aij / volume; } } MPI_Barrier(world); memory->destroy(gw); if (timer_flag && (comm->me == 0)) utils::logmesg(lmp, fmt::format("step 2 time: {:.4g} s\n", MPI_Wtime() - step2_time)); } /* ---------------------------------------------------------------------- allocate memory that depends on # of K-vectors and order ------------------------------------------------------------------------- */ void PPPMElectrode::allocate() { if (slabflag == 1) { // EW3Dc dipole correction boundcorr = new SlabDipole(lmp); } else if (wireflag == 1) { // EW3Dc wire correction boundcorr = new WireDipole(lmp); } else { // dummy BoundaryCorrection for ffield boundcorr = new BoundaryCorrection(lmp); } memory->create3d_offset(electrolyte_density_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out, "pppm/electrode:electrolyte_density_brick"); memory->create(electrolyte_density_fft, nfft_both, "pppm/electrode:electrolyte_density_fft"); PPPM::allocate(); if (differentiation_flag != 1) memory->create3d_offset(u_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out, "pppm:u_brick"); } /* ---------------------------------------------------------------------- deallocate memory that depends on # of K-vectors and order ------------------------------------------------------------------------- */ void PPPMElectrode::deallocate() { if (boundcorr != nullptr) delete boundcorr; memory->destroy3d_offset(electrolyte_density_brick, nzlo_out, nylo_out, nxlo_out); memory->destroy(electrolyte_density_fft); memory->destroy3d_offset(density_brick, nzlo_out, nylo_out, nxlo_out); memory->destroy3d_offset(u_brick, nzlo_out, nylo_out, nxlo_out); if (differentiation_flag == 1) { memory->destroy(sf_precoeff1); memory->destroy(sf_precoeff2); memory->destroy(sf_precoeff3); memory->destroy(sf_precoeff4); memory->destroy(sf_precoeff5); memory->destroy(sf_precoeff6); } else { memory->destroy3d_offset(vdx_brick, nzlo_out, nylo_out, nxlo_out); memory->destroy3d_offset(vdy_brick, nzlo_out, nylo_out, nxlo_out); memory->destroy3d_offset(vdz_brick, nzlo_out, nylo_out, nxlo_out); } memory->destroy(density_fft); memory->destroy(greensfn); memory->destroy(work1); memory->destroy(work2); memory->destroy(vg); memory->destroy1d_offset(fkx, nxlo_fft); memory->destroy1d_offset(fky, nylo_fft); memory->destroy1d_offset(fkz, nzlo_fft); memory->destroy(gf_b); if (stagger_flag) gf_b = nullptr; memory->destroy2d_offset(rho1d, -order_allocated / 2); memory->destroy2d_offset(drho1d, -order_allocated / 2); memory->destroy2d_offset(rho_coeff, (1 - order_allocated) / 2); memory->destroy2d_offset(drho_coeff, (1 - order_allocated) / 2); memory->destroy(gc_buf1); memory->destroy(gc_buf2); } void PPPMElectrode::allocate_peratom() { peratom_allocate_flag = 1; memory->create3d_offset(v0_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out, "pppm:v0_brick"); memory->create3d_offset(v1_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out, "pppm:v1_brick"); memory->create3d_offset(v2_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out, "pppm:v2_brick"); memory->create3d_offset(v3_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out, "pppm:v3_brick"); memory->create3d_offset(v4_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out, "pppm:v4_brick"); memory->create3d_offset(v5_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out, "pppm:v5_brick"); // use same GC ghost grid object for peratom grid communication // but need to reallocate a larger gc_buf1 and gc_buf2 if (differentiation_flag) npergrid = 6; else npergrid = 7; memory->destroy(gc_buf1); memory->destroy(gc_buf2); memory->create(gc_buf1, npergrid * ngc_buf1, "pppm:gc_buf1"); memory->create(gc_buf2, npergrid * ngc_buf2, "pppm:gc_buf2"); } /* ---------------------------------------------------------------------- set global size of PPPM grid = nx,ny,nz_pppm used for charge accumulation, FFTs, and electric field interpolation ------------------------------------------------------------------------- */ void PPPMElectrode::set_grid_global() { // use xprd,yprd,zprd (even if triclinic, and then scale later) // adjust z dimension for 2d slab PPPM // 3d PPPM just uses zprd since slab_volfactor = 1.0 double xprd = domain->xprd; double yprd = domain->yprd; double zprd = domain->zprd; double xprd_wire = xprd * wire_volfactor; double yprd_wire = yprd * wire_volfactor; double zprd_slab = zprd * slab_volfactor; // make initial g_ewald estimate // based on desired accuracy and real space cutoff // fluid-occupied volume used to estimate real-space error // zprd used rather than zprd_slab double h; bigint natoms = atom->natoms; if (!gewaldflag) { if (accuracy <= 0.0) error->all(FLERR, "KSpace accuracy must be > 0"); if (q2 == 0.0) error->all(FLERR, "Must use kspace_modify gewald for uncharged system"); g_ewald = accuracy * sqrt(natoms * cutoff * xprd * yprd * zprd) / (2.0 * q2); if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15 * log(accuracy)) / cutoff; else g_ewald = sqrt(-log(g_ewald)) / cutoff; } // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy // nz_pppm uses extended zprd_slab instead of zprd // reduce it until accuracy target is met if (!gridflag) { if (differentiation_flag == 1 || stagger_flag) { h = h_x = h_y = h_z = 4.0 / g_ewald; int count = 0; while (true) { // set grid dimensions nx_pppm = static_cast(xprd_wire / h_x); ny_pppm = static_cast(yprd_wire / h_y); nz_pppm = static_cast(zprd_slab / h_z); if (nx_pppm <= 1) nx_pppm = 2; if (ny_pppm <= 1) ny_pppm = 2; if (nz_pppm <= 1) nz_pppm = 2; // estimate KSpace force error double df_kspace = compute_df_kspace(); // break loop if the accuracy has been reached or // too many loops have been performed count++; if (df_kspace <= accuracy) break; if (count > 500) error->all(FLERR, "Could not compute grid size"); h *= 0.95; h_x = h_y = h_z = h; } } else { double err; h_x = h_y = h_z = 1.0 / g_ewald; nx_pppm = static_cast(xprd_wire / h_x) + 1; ny_pppm = static_cast(yprd_wire / h_y) + 1; nz_pppm = static_cast(zprd_slab / h_z) + 1; err = estimate_ik_error(h_x, xprd_wire, natoms); while (err > accuracy) { err = estimate_ik_error(h_x, xprd_wire, natoms); nx_pppm++; h_x = xprd_wire / nx_pppm; } err = estimate_ik_error(h_y, yprd_wire, natoms); while (err > accuracy) { err = estimate_ik_error(h_y, yprd_wire, natoms); ny_pppm++; h_y = yprd_wire / ny_pppm; } err = estimate_ik_error(h_z, zprd_slab, natoms); while (err > accuracy) { err = estimate_ik_error(h_z, zprd_slab, natoms); nz_pppm++; h_z = zprd_slab / nz_pppm; } } } // boost grid size until it is factorable while (!factorable(nx_pppm)) nx_pppm++; while (!factorable(ny_pppm)) ny_pppm++; while (!factorable(nz_pppm)) nz_pppm++; h_x = xprd_wire / nx_pppm; h_y = yprd_wire / ny_pppm; h_z = zprd_slab / nz_pppm; if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) error->all(FLERR, "PPPM/electrode grid is too large"); } /* ---------------------------------------------------------------------- compute estimated kspace force error ------------------------------------------------------------------------- */ double PPPMElectrode::compute_df_kspace() { double xprd = domain->xprd; double yprd = domain->yprd; double zprd = domain->zprd; double xprd_wire = xprd * wire_volfactor; double yprd_wire = yprd * wire_volfactor; double zprd_slab = zprd * slab_volfactor; bigint natoms = atom->natoms; double df_kspace = 0.0; if (differentiation_flag == 1 || stagger_flag) { double qopt = compute_qopt(); df_kspace = sqrt(qopt / natoms) * q2 / (xprd_wire * yprd_wire * zprd_slab); } else { double lprx = estimate_ik_error(h_x, xprd_wire, natoms); double lpry = estimate_ik_error(h_y, yprd_wire, natoms); double lprz = estimate_ik_error(h_z, zprd_slab, natoms); df_kspace = sqrt(lprx * lprx + lpry * lpry + lprz * lprz) / sqrt(3.0); } return df_kspace; } /* ---------------------------------------------------------------------- compute qopt ------------------------------------------------------------------------- */ double PPPMElectrode::compute_qopt() { int k, l, m, nx, ny, nz; double argx, argy, argz, wx, wy, wz, sx, sy, sz, qx, qy, qz; double u1, u2, sqk; double sum1, sum2, sum3, sum4, dot2; double *prd = domain->prd; const double xprd = prd[0]; const double yprd = prd[1]; const double zprd = prd[2]; const double xprd_wire = xprd * wire_volfactor; const double yprd_wire = yprd * wire_volfactor; const double zprd_slab = zprd * slab_volfactor; volume = xprd_wire * yprd_wire * zprd_slab; const double unitkx = (MY_2PI / xprd_wire); const double unitky = (MY_2PI / yprd_wire); const double unitkz = (MY_2PI / zprd_slab); const int twoorder = 2 * order; // loop over entire FFT grid // each proc calculates contributions from every Pth grid point bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm; int nxy_pppm = nx_pppm * ny_pppm; double qopt = 0.0; for (bigint i = me; i < ngridtotal; i += nprocs) { k = i % nx_pppm; l = (i / nx_pppm) % ny_pppm; m = i / nxy_pppm; const int kper = k - nx_pppm * (2 * k / nx_pppm); const int lper = l - ny_pppm * (2 * l / ny_pppm); const int mper = m - nz_pppm * (2 * m / nz_pppm); sqk = square(unitkx * kper) + square(unitky * lper) + square(unitkz * mper); if (sqk == 0.0) continue; sum1 = sum2 = sum3 = sum4 = 0.0; for (nx = -2; nx <= 2; nx++) { qx = unitkx * (kper + nx_pppm * nx); sx = exp(-0.25 * square(qx / g_ewald)); argx = 0.5 * qx * xprd_wire / nx_pppm; wx = powsinxx(argx, twoorder); qx *= qx; for (ny = -2; ny <= 2; ny++) { qy = unitky * (lper + ny_pppm * ny); sy = exp(-0.25 * square(qy / g_ewald)); argy = 0.5 * qy * yprd_wire / ny_pppm; wy = powsinxx(argy, twoorder); qy *= qy; for (nz = -2; nz <= 2; nz++) { qz = unitkz * (mper + nz_pppm * nz); sz = exp(-0.25 * square(qz / g_ewald)); argz = 0.5 * qz * zprd_slab / nz_pppm; wz = powsinxx(argz, twoorder); qz *= qz; dot2 = qx + qy + qz; u1 = sx * sy * sz; u2 = wx * wy * wz; sum1 += u1 * u1 / dot2 * MY_4PI * MY_4PI; sum2 += u1 * u2 * MY_4PI; sum3 += u2; sum4 += dot2 * u2; } } } sum2 *= sum2; qopt += sum1 - sum2 / (sum3 * sum4); } // sum qopt over all procs double qopt_all; MPI_Allreduce(&qopt, &qopt_all, 1, MPI_DOUBLE, MPI_SUM, world); return qopt_all; } /* ---------------------------------------------------------------------- set local subset of PPPM/FFT grid that I own n xyz lo/hi in = 3d brick that I own (inclusive) n xyz lo/hi out = 3d brick + ghost cells in 6 directions (inclusive) n xyz lo/hi fft = FFT columns that I own (all of x dim, 2d decomp in yz) ------------------------------------------------------------------------- */ void PPPMElectrode::set_grid_local() { // global indices of PPPM grid range from 0 to N-1 // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of // global PPPM grid that I own without ghost cells // for slab PPPM, assign z grid as if it were not extended // both non-tiled and tiled proc layouts use 0-1 fractional sumdomain // info if (comm->layout != Comm::LAYOUT_TILED) { nxlo_in = static_cast(comm->xsplit[comm->myloc[0]] * nx_pppm / wire_volfactor); nxhi_in = static_cast(comm->xsplit[comm->myloc[0] + 1] * nx_pppm / wire_volfactor) - 1; nylo_in = static_cast(comm->ysplit[comm->myloc[1]] * ny_pppm / wire_volfactor); nyhi_in = static_cast(comm->ysplit[comm->myloc[1] + 1] * ny_pppm / wire_volfactor) - 1; nzlo_in = static_cast(comm->zsplit[comm->myloc[2]] * nz_pppm / slab_volfactor); nzhi_in = static_cast(comm->zsplit[comm->myloc[2] + 1] * nz_pppm / slab_volfactor) - 1; } else { nxlo_in = static_cast(comm->mysplit[0][0] * nx_pppm / wire_volfactor); nxhi_in = static_cast(comm->mysplit[0][1] * nx_pppm / wire_volfactor) - 1; nylo_in = static_cast(comm->mysplit[1][0] * ny_pppm / wire_volfactor); nyhi_in = static_cast(comm->mysplit[1][1] * ny_pppm / wire_volfactor) - 1; nzlo_in = static_cast(comm->mysplit[2][0] * nz_pppm / slab_volfactor); nzhi_in = static_cast(comm->mysplit[2][1] * nz_pppm / slab_volfactor) - 1; } // nlower,nupper = stencil size for mapping particles to PPPM grid nlower = -(order - 1) / 2; nupper = order / 2; // shift values for particle <-> grid mapping // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 if (order % 2) shift = OFFSET + 0.5; else shift = OFFSET; if (order % 2) shiftone = 0.0; else shiftone = 0.5; // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of // global PPPM grid that my particles can contribute charge to // effectively nlo_in,nhi_in + ghost cells // nlo,nhi = global coords of grid pt to "lower left" of // smallest/largest // position a particle in my box can be at // dist[3] = particle position bound = subbox + skin/2.0 + qdist // qdist = offset due to TIP4P fictitious charge // convert to triclinic if necessary // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping // for slab PPPM, assign z grid as if it were not extended double *prd, *sublo, *subhi; prd = domain->prd; boxlo = domain->boxlo; sublo = domain->sublo; subhi = domain->subhi; double xprd = prd[0]; double yprd = prd[1]; double zprd = prd[2]; double xprd_wire = xprd * wire_volfactor; double yprd_wire = yprd * wire_volfactor; double zprd_slab = zprd * slab_volfactor; double dist[3] = {0.0, 0.0, 0.0}; double cuthalf = 0.5 * neighbor->skin + qdist; dist[0] = dist[1] = dist[2] = cuthalf; int nlo, nhi; nlo = nhi = 0; nlo = static_cast((sublo[0] - dist[0] - boxlo[0]) * nx_pppm / xprd_wire + shift) - OFFSET; nhi = static_cast((subhi[0] + dist[0] - boxlo[0]) * nx_pppm / xprd_wire + shift) - OFFSET; nxlo_out = nlo + nlower; nxhi_out = nhi + nupper; nlo = static_cast((sublo[1] - dist[1] - boxlo[1]) * ny_pppm / yprd_wire + shift) - OFFSET; nhi = static_cast((subhi[1] + dist[1] - boxlo[1]) * ny_pppm / yprd_wire + shift) - OFFSET; nylo_out = nlo + nlower; nyhi_out = nhi + nupper; nlo = static_cast((sublo[2] - dist[2] - boxlo[2]) * nz_pppm / zprd_slab + shift) - OFFSET; nhi = static_cast((subhi[2] + dist[2] - boxlo[2]) * nz_pppm / zprd_slab + shift) - OFFSET; nzlo_out = nlo + nlower; nzhi_out = nhi + nupper; if (stagger_flag) { nxhi_out++; nyhi_out++; nzhi_out++; } // for slab PPPM, change the grid boundary for processors at +z end // to include the empty volume between periodically repeating slabs // for slab PPPM, want charge data communicated from -z proc to +z proc, // but not vice versa, also want field data communicated from +z proc // to -z proc, but not vice versa // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells) // also insure no other procs use ghost cells beyond +z limit // differnet logic for non-tiled vs tiled decomposition if (slabflag == 1) { if (comm->layout != Comm::LAYOUT_TILED) { if (comm->myloc[2] == comm->procgrid[2] - 1) nzhi_in = nzhi_out = nz_pppm - 1; } else { if (comm->mysplit[2][1] == 1.0) nzhi_in = nzhi_out = nz_pppm - 1; } nzhi_out = MIN(nzhi_out, nz_pppm - 1); } if (wireflag == 1) { if (comm->layout != Comm::LAYOUT_TILED) { if (comm->myloc[0] == comm->procgrid[0] - 1) nxhi_in = nxhi_out = nx_pppm - 1; if (comm->myloc[1] == comm->procgrid[1] - 1) nyhi_in = nyhi_out = ny_pppm - 1; } else { if (comm->mysplit[0][1] == 1.0) nxhi_in = nxhi_out = nx_pppm - 1; if (comm->mysplit[1][1] == 1.0) nyhi_in = nyhi_out = ny_pppm - 1; } nxhi_out = MIN(nxhi_out, nx_pppm - 1); nyhi_out = MIN(nyhi_out, ny_pppm - 1); } // x-pencil decomposition of FFT mesh // global indices range from 0 to N-1 // each proc owns entire x-dimension, clumps of columns in y,z // dimensions npey_fft,npez_fft = # of procs in y,z dims if nprocs is // small enough, proc can own 1 or more entire xy planes, // else proc owns 2d sub-blocks of yz plane // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions // nlo_fft,nhi_fft = lower/upper limit of the section // of the global FFT mesh that I own in x-pencil decomposition int npey_fft, npez_fft; if (nz_pppm >= nprocs) { npey_fft = 1; npez_fft = nprocs; } else procs2grid2d(nprocs, ny_pppm, nz_pppm, &npey_fft, &npez_fft); int me_y = me % npey_fft; int me_z = me / npey_fft; nxlo_fft = 0; nxhi_fft = nx_pppm - 1; nylo_fft = me_y * ny_pppm / npey_fft; nyhi_fft = (me_y + 1) * ny_pppm / npey_fft - 1; nzlo_fft = me_z * nz_pppm / npez_fft; nzhi_fft = (me_z + 1) * nz_pppm / npez_fft - 1; // ngrid = count of PPPM grid pts owned by this proc, including ghosts ngrid = (nxhi_out - nxlo_out + 1) * (nyhi_out - nylo_out + 1) * (nzhi_out - nzlo_out + 1); // count of FFT grids pts owned by this proc, without ghosts // nfft = FFT points in x-pencil FFT decomposition on this proc // nfft_brick = FFT points in 3d brick-decomposition on this proc // nfft_both = greater of 2 values nfft = (nxhi_fft - nxlo_fft + 1) * (nyhi_fft - nylo_fft + 1) * (nzhi_fft - nzlo_fft + 1); int nfft_brick = (nxhi_in - nxlo_in + 1) * (nyhi_in - nylo_in + 1) * (nzhi_in - nzlo_in + 1); nfft_both = MAX(nfft, nfft_brick); } /* ---------------------------------------------------------------------- pre-compute modified (Hockney-Eastwood) Coulomb Green's function ------------------------------------------------------------------------- */ void PPPMElectrode::compute_gf_ik() { const double *const prd = domain->prd; const double xprd = prd[0]; const double yprd = prd[1]; const double zprd = prd[2]; const double xprd_wire = xprd * wire_volfactor; const double yprd_wire = yprd * wire_volfactor; const double zprd_slab = zprd * slab_volfactor; const double unitkx = (MY_2PI / xprd_wire); const double unitky = (MY_2PI / yprd_wire); const double unitkz = (MY_2PI / zprd_slab); double snx, sny, snz; double argx, argy, argz, wx, wy, wz, sx, sy, sz, qx, qy, qz; double sum1, dot1, dot2; double numerator, denominator; double sqk; int k, l, m, n, nx, ny, nz, kper, lper, mper; const int nbx = static_cast((g_ewald * xprd_wire / (MY_PI * nx_pppm)) * pow(-log(EPS_HOC), 0.25)); const int nby = static_cast((g_ewald * yprd_wire / (MY_PI * ny_pppm)) * pow(-log(EPS_HOC), 0.25)); const int nbz = static_cast((g_ewald * zprd_slab / (MY_PI * nz_pppm)) * pow(-log(EPS_HOC), 0.25)); const int twoorder = 2 * order; n = 0; for (m = nzlo_fft; m <= nzhi_fft; m++) { mper = m - nz_pppm * (2 * m / nz_pppm); snz = square(sin(0.5 * unitkz * mper * zprd_slab / nz_pppm)); for (l = nylo_fft; l <= nyhi_fft; l++) { lper = l - ny_pppm * (2 * l / ny_pppm); sny = square(sin(0.5 * unitky * lper * yprd_wire / ny_pppm)); for (k = nxlo_fft; k <= nxhi_fft; k++) { kper = k - nx_pppm * (2 * k / nx_pppm); snx = square(sin(0.5 * unitkx * kper * xprd_wire / nx_pppm)); sqk = square(unitkx * kper) + square(unitky * lper) + square(unitkz * mper); if (sqk != 0.0) { numerator = 12.5663706 / sqk; // 4 pi / k^2 denominator = gf_denom(snx, sny, snz); sum1 = 0.0; for (nx = -nbx; nx <= nbx; nx++) { qx = unitkx * (kper + nx_pppm * nx); sx = exp(-0.25 * square(qx / g_ewald)); argx = 0.5 * qx * xprd_wire / nx_pppm; wx = powsinxx(argx, twoorder); for (ny = -nby; ny <= nby; ny++) { qy = unitky * (lper + ny_pppm * ny); sy = exp(-0.25 * square(qy / g_ewald)); argy = 0.5 * qy * yprd_wire / ny_pppm; wy = powsinxx(argy, twoorder); for (nz = -nbz; nz <= nbz; nz++) { qz = unitkz * (mper + nz_pppm * nz); sz = exp(-0.25 * square(qz / g_ewald)); argz = 0.5 * qz * zprd_slab / nz_pppm; wz = powsinxx(argz, twoorder); dot1 = unitkx * kper * qx + unitky * lper * qy + unitkz * mper * qz; dot2 = qx * qx + qy * qy + qz * qz; sum1 += (dot1 / dot2) * sx * sy * sz * wx * wy * wz; } } } greensfn[n++] = numerator * sum1 / denominator; } else greensfn[n++] = 0.0; } } } } /* ---------------------------------------------------------------------- compute optimized Green's function for energy calculation ------------------------------------------------------------------------- */ void PPPMElectrode::compute_gf_ad() { const double *const prd = domain->prd; const double xprd = prd[0]; const double yprd = prd[1]; const double zprd = prd[2]; const double xprd_wire = xprd * wire_volfactor; const double yprd_wire = yprd * wire_volfactor; const double zprd_slab = zprd * slab_volfactor; const double unitkx = (MY_2PI / xprd_wire); const double unitky = (MY_2PI / yprd_wire); const double unitkz = (MY_2PI / zprd_slab); double snx, sny, snz, sqk; double argx, argy, argz, wx, wy, wz, sx, sy, sz, qx, qy, qz; double numerator, denominator; int k, l, m, n, kper, lper, mper; const int twoorder = 2 * order; for (int i = 0; i < 6; i++) sf_coeff[i] = 0.0; n = 0; for (m = nzlo_fft; m <= nzhi_fft; m++) { mper = m - nz_pppm * (2 * m / nz_pppm); qz = unitkz * mper; snz = square(sin(0.5 * qz * zprd_slab / nz_pppm)); sz = exp(-0.25 * square(qz / g_ewald)); argz = 0.5 * qz * zprd_slab / nz_pppm; wz = powsinxx(argz, twoorder); for (l = nylo_fft; l <= nyhi_fft; l++) { lper = l - ny_pppm * (2 * l / ny_pppm); qy = unitky * lper; sny = square(sin(0.5 * qy * yprd_wire / ny_pppm)); sy = exp(-0.25 * square(qy / g_ewald)); argy = 0.5 * qy * yprd_wire / ny_pppm; wy = powsinxx(argy, twoorder); for (k = nxlo_fft; k <= nxhi_fft; k++) { kper = k - nx_pppm * (2 * k / nx_pppm); qx = unitkx * kper; snx = square(sin(0.5 * qx * xprd_wire / nx_pppm)); sx = exp(-0.25 * square(qx / g_ewald)); argx = 0.5 * qx * xprd_wire / nx_pppm; wx = powsinxx(argx, twoorder); sqk = qx * qx + qy * qy + qz * qz; if (sqk != 0.0) { numerator = MY_4PI / sqk; denominator = gf_denom(snx, sny, snz); greensfn[n] = numerator * sx * sy * sz * wx * wy * wz / denominator; sf_coeff[0] += sf_precoeff1[n] * greensfn[n]; sf_coeff[1] += sf_precoeff2[n] * greensfn[n]; sf_coeff[2] += sf_precoeff3[n] * greensfn[n]; sf_coeff[3] += sf_precoeff4[n] * greensfn[n]; sf_coeff[4] += sf_precoeff5[n] * greensfn[n]; sf_coeff[5] += sf_precoeff6[n] * greensfn[n]; n++; } else { greensfn[n] = 0.0; sf_coeff[0] += sf_precoeff1[n] * greensfn[n]; sf_coeff[1] += sf_precoeff2[n] * greensfn[n]; sf_coeff[2] += sf_precoeff3[n] * greensfn[n]; sf_coeff[3] += sf_precoeff4[n] * greensfn[n]; sf_coeff[4] += sf_precoeff5[n] * greensfn[n]; sf_coeff[5] += sf_precoeff6[n] * greensfn[n]; n++; } } } } // compute the coefficients for the self-force correction double prex, prey, prez; prex = prey = prez = MY_PI / volume; prex *= nx_pppm / xprd_wire; prey *= ny_pppm / yprd_wire; prez *= nz_pppm / zprd_slab; sf_coeff[0] *= prex; sf_coeff[1] *= prex * 2; sf_coeff[2] *= prey; sf_coeff[3] *= prey * 2; sf_coeff[4] *= prez; sf_coeff[5] *= prez * 2; // communicate values with other procs double tmp[6]; MPI_Allreduce(sf_coeff, tmp, 6, MPI_DOUBLE, MPI_SUM, world); for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n]; } /* ---------------------------------------------------------------------- create discretized "density" of a set of particles (c.f. make_rho()) in a specified scratch space. density(x,y,z) = charge "density" at grid points of my 3d brick (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) in global grid ------------------------------------------------------------------------- */ void PPPMElectrode::make_rho_in_brick(int source_grpbit, FFT_SCALAR ***scratch_brick, bool invert_source) { int l, m, n, nx, ny, nz, mx, my, mz; FFT_SCALAR dx, dy, dz, x0, y0, z0; last_source_grpbit = source_grpbit; last_invert_source = invert_source; // clear 3d density array memset(&(scratch_brick[nzlo_out][nylo_out][nxlo_out]), 0, ngrid * sizeof(FFT_SCALAR)); // loop over my charges, add their contribution to nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // (mx,my,mz) = global coords of moving stencil pt double *q = atom->q; double **x = atom->x; int *mask = atom->mask; int nlocal = atom->nlocal; for (int i = 0; i < nlocal; i++) { bool const i_in_source = !!(mask[i] & source_grpbit) != invert_source; if (!i_in_source) continue; nx = part2grid[i][0]; ny = part2grid[i][1]; nz = part2grid[i][2]; dx = nx + shiftone - (x[i][0] - boxlo[0]) * delxinv; dy = ny + shiftone - (x[i][1] - boxlo[1]) * delyinv; dz = nz + shiftone - (x[i][2] - boxlo[2]) * delzinv; compute_rho1d(dx, dy, dz); z0 = delvolinv * q[i]; for (n = nlower; n <= nupper; n++) { mz = n + nz; y0 = z0 * rho1d[2][n]; for (m = nlower; m <= nupper; m++) { my = m + ny; x0 = y0 * rho1d[1][m]; for (l = nlower; l <= nupper; l++) { mx = l + nx; scratch_brick[mz][my][mx] += x0 * rho1d[0][l]; } } } } } /* ---------------------------------------------------------------------- group-group interactions ------------------------------------------------------------------------- */ void PPPMElectrode::compute_group_group(int /*groupbit_A*/, int /*groupbit_B*/, int /*AA_flag*/) { error->all(FLERR, "group group interaction not implemented in pppm/electrode yet"); } void PPPMElectrode::compute_matrix_corr(bigint *imat, double **matrix) { boundcorr->matrix_corr(imat, matrix); } /* ---------------------------------------------------------------------- compute b-vector EW3DC correction of constant potential approach ------------------------------------------------------------------------- */ void PPPMElectrode::compute_vector_corr(double *vec, int sensor_grpbit, int source_grpbit, bool invert_source) { boundcorr->vector_corr(vec, sensor_grpbit, source_grpbit, invert_source); }