diff --git a/lib/gpu/lal_amoeba.cu b/lib/gpu/lal_amoeba.cu index 66926721cb..da5c6f0c3c 100644 --- a/lib/gpu/lal_amoeba.cu +++ b/lib/gpu/lal_amoeba.cu @@ -1922,6 +1922,185 @@ __kernel void k_amoeba_fphi_uind(const __global numtyp4 *restrict thetai1, } } + +/* ---------------------------------------------------------------------- + fphi_mpole = multipole potential from grid + fphi_mpole extracts the permanent multipole potential from + the particle mesh Ewald grid +------------------------------------------------------------------------- */ + +__kernel void k_amoeba_fphi_mpole(const __global numtyp4 *restrict thetai1, + const __global numtyp4 *restrict thetai2, + const __global numtyp4 *restrict thetai3, + const __global int *restrict igrid, + const __global numtyp *restrict grid, + __global numtyp *restrict fphi, + const int bsorder, const int inum, + const int nzlo_out, const int nylo_out, + const int nxlo_out, const int ngridxy, + const int ngridx) +{ + int tid=THREAD_ID_X; + int ii=tid+BLOCK_ID_X*BLOCK_SIZE_X; + + if (ii _nfft_max) { + memory->destroy(_moduli_bsarray); + _nfft_max = maxfft; + memory->create(_moduli_bsarray,_nfft_max,"amoeba:_moduli_bsarray"); + } + // compute and load the moduli values double x = 0.0; - bspline(x,bsorder,array); + //bspline(x,bsorder,array); + bspline(x,bsorder,_moduli_array); - for (i = 0; i < maxfft; i++) bsarray[i] = 0.0; - for (i = 0; i < bsorder; i++) bsarray[i+1] = array[i]; + for (i = 0; i < maxfft; i++) _moduli_bsarray[i] = 0.0; + for (i = 0; i < bsorder; i++) _moduli_bsarray[i+1] = _moduli_array[i]; - dftmod(bsmod1,bsarray,nfft1,bsorder); - dftmod(bsmod2,bsarray,nfft2,bsorder); - dftmod(bsmod3,bsarray,nfft3,bsorder); + dftmod(bsmod1,_moduli_bsarray,nfft1,bsorder); + dftmod(bsmod2,_moduli_bsarray,nfft2,bsorder); + dftmod(bsmod3,_moduli_bsarray,nfft3,bsorder); // perform deallocation of local arrays - delete[] array; - delete[] bsarray; + //delete[] array; + //delete[] bsarray; } /* ---------------------------------------------------------------------- diff --git a/src/AMOEBA/pair_amoeba.cpp b/src/AMOEBA/pair_amoeba.cpp index 9890904e42..d301a86cdb 100644 --- a/src/AMOEBA/pair_amoeba.cpp +++ b/src/AMOEBA/pair_amoeba.cpp @@ -68,67 +68,71 @@ PairAmoeba::PairAmoeba(LAMMPS *lmp) : Pair(lmp) // force field settings nmax = 0; - xaxis2local = yaxis2local = zaxis2local = NULL; - rpole = NULL; - tq = NULL; + xaxis2local = yaxis2local = zaxis2local = nullptr; + rpole = nullptr; + tq = nullptr; - red2local = NULL; - xred = NULL; + red2local = nullptr; + xred = nullptr; - uind = uinp = udirp = NULL; - uopt = uoptp = NULL; - fopt = foptp = NULL; - field = fieldp = NULL; - ufld = dufld = NULL; - rsd = rsdp = NULL; - zrsd = zrsdp = NULL; + uind = uinp = udirp = nullptr; + uopt = uoptp = nullptr; + fopt = foptp = nullptr; + field = fieldp = nullptr; + ufld = dufld = nullptr; + rsd = rsdp = nullptr; + zrsd = zrsdp = nullptr; - cmp = fmp = NULL; - cphi = fphi = NULL; + cmp = fmp = nullptr; + cphi = fphi = nullptr; - poli = NULL; - conj = conjp = NULL; - vec = vecp = NULL; - udir = usum = usump = NULL; + _moduli_array = nullptr; + _moduli_bsarray = nullptr; + _nfft_max = 0; - fuind = fuinp = NULL; - fdip_phi1 = fdip_phi2 = fdip_sum_phi = NULL; - dipfield1 = dipfield2 = NULL; + poli = nullptr; + conj = conjp = nullptr; + vec = vecp = nullptr; + udir = usum = usump = nullptr; - fphid = fphip = NULL; - fphidp = cphidp = NULL; + fuind = fuinp = nullptr; + fdip_phi1 = fdip_phi2 = fdip_sum_phi = nullptr; + dipfield1 = dipfield2 = nullptr; + + fphid = fphip = nullptr; + fphidp = cphidp = nullptr; bsordermax = 0; - thetai1 = thetai2 = thetai3 = NULL; - bsmod1 = bsmod2 = bsmod3 = NULL; - bsbuild = NULL; - igrid = NULL; - m_kspace = p_kspace = pc_kspace = d_kspace = NULL; - i_kspace = ic_kspace = NULL; + thetai1 = thetai2 = thetai3 = nullptr; + bsmod1 = bsmod2 = bsmod3 = nullptr; + bsbuild = nullptr; + igrid = nullptr; + m_kspace = p_kspace = pc_kspace = d_kspace = nullptr; + i_kspace = ic_kspace = nullptr; - numneigh_dipole = NULL; - firstneigh_dipole = NULL; - firstneigh_dipdip = NULL; - ipage_dipole = NULL; - dpage_dipdip = NULL; + numneigh_dipole = nullptr; + firstneigh_dipole = nullptr; + firstneigh_dipdip = nullptr; + ipage_dipole = nullptr; + dpage_dipdip = nullptr; - numneigh_precond = NULL; - firstneigh_precond = NULL; - ipage_precond = NULL; + numneigh_precond = nullptr; + firstneigh_precond = nullptr; + ipage_precond = nullptr; - firstneigh_pcpc = NULL; - dpage_pcpc = NULL; + firstneigh_pcpc = nullptr; + dpage_pcpc = nullptr; - qfac = NULL; - gridfft1 = NULL; + qfac = nullptr; + gridfft1 = nullptr; initialize_type_class(); initialize_vdwl(); initialize_smallsize(); - forcefield = NULL; + forcefield = nullptr; - id_pole = id_udalt = id_upalt = NULL; + id_pole = id_udalt = id_upalt = nullptr; nualt = 0; first_flag = 1; @@ -220,6 +224,9 @@ PairAmoeba::~PairAmoeba() memory->destroy(fphidp); memory->destroy(cphidp); + memory->destroy(_moduli_array); + memory->destroy(_moduli_bsarray); + memory->destroy(thetai1); memory->destroy(thetai2); memory->destroy(thetai3); @@ -2312,6 +2319,8 @@ void PairAmoeba::grow_local() firstneigh_pcpc = (double **) memory->smalloc(nmax*sizeof(double *),"induce:firstneigh_pcpc"); } + + memory->create(_moduli_array,bsordermax,"amoeba:_moduli_array"); } /* ---------------------------------------------------------------------- diff --git a/src/AMOEBA/pair_amoeba.h b/src/AMOEBA/pair_amoeba.h index 24ce6fcfbc..91ec8faf0c 100644 --- a/src/AMOEBA/pair_amoeba.h +++ b/src/AMOEBA/pair_amoeba.h @@ -337,7 +337,11 @@ class PairAmoeba : public Pair { double *gridfft1; // copy of p_kspace FFT grid double **cmp,**fmp; // Cartesian and fractional multipoles - double **cphi,**fphi; + double **cphi,**fphi; + + double *_moduli_array; // buffers for moduli + double *_moduli_bsarray; + int _nfft_max; // params for current KSpace solve and FFT being worked on @@ -347,8 +351,12 @@ class PairAmoeba : public Pair { double ctf[10][10]; // indices NOT flipped vs Fortran double ftc[10][10]; // indices NOT flipped vs Fortran - class AmoebaConvolution *m_kspace,*p_kspace,*pc_kspace,*d_kspace; - class AmoebaConvolution *i_kspace,*ic_kspace; + class AmoebaConvolution *m_kspace; // multipole KSpace + class AmoebaConvolution *p_kspace; // polar KSpace + class AmoebaConvolution *pc_kspace; + class AmoebaConvolution *d_kspace; // dispersion KSpace + class AmoebaConvolution *i_kspace; // induce KSpace + class AmoebaConvolution *ic_kspace; // FFT grid size factors