diff --git a/src/GPU/pair_amoeba_gpu.cpp b/src/GPU/pair_amoeba_gpu.cpp index fa0670a757..534ab24085 100644 --- a/src/GPU/pair_amoeba_gpu.cpp +++ b/src/GPU/pair_amoeba_gpu.cpp @@ -13,7 +13,7 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: Trung Nguyen (Northwestern) + Contributing author: Trung Nguyen (Northwestern/UChicago) ------------------------------------------------------------------------- */ #include "pair_amoeba_gpu.h" @@ -486,8 +486,6 @@ void PairAmoebaGPU::induce() comm->reverse_comm(this); } - //error->all(FLERR,"STOP GPU"); - // set initial conjugate gradient residual and conjugate vector for (i = 0; i < nlocal; i++) { @@ -547,8 +545,6 @@ void PairAmoebaGPU::induce() comm->reverse_comm(this); } - //error->all(FLERR,"STOP"); - for (i = 0; i < nlocal; i++) { for (j = 0; j < 3; j++) { uind[i][j] = vec[i][j]; @@ -1751,166 +1747,6 @@ void PairAmoebaGPU::polar_kspace() } } - // account for dipole response terms in the TCG method - - /* - if (poltyp == TCG) { - - for (m = 0; m < tcgnab; m++) { - for (i = 0; i < nlocal; i++) { - for (j = 0; j < 3; j++) { - fuind[i][j] = a[0][j]*uad[i][m][0] + a[1][j]*uad[i][m][1] + - a[2][j]*uad[i][m][2]; - fuinp[i][j] = a[0][j]*ubp[i][m][0] + a[1][j]*ubp[i][m][1] + - a[2][j]*ubp[i][m][2]; - } - } - - grid_uind(fuind,fuinp); - efft->compute(qgrid[0][0][0],qgrid[0][0][0],1); - - for (k = 0; k < nfft3; k++) { - for (j = 0; j < nfft2; j++) { - for (i = 0; i < nfft1; i++) { - term = qfac[k][j][i]; - qgrid[k][j][i][0] *= term; - qgrid[k][j][i][1] *= term; - } - } - } - - efft->compute(qgrid[0][0][0],qgrid[0][0][0],-1); - fphi_uind(fphid,fphip,fphidp); - - for (i = 0; i < nlocal; i++) { - for (j = 1; j < 10; j++) { - fphid[i][j] *= felec; - fphip[i][j] *= felec; - } - } - - for (i = 0; i < nlocal; i++) { - f1 = 0.0; - f2 = 0.0; - f3 = 0.0; - for (k = 0; k < 3; k++) { - j1 = deriv1[k+1]; - j2 = deriv2[k+1]; - j3 = deriv3[k+1]; - f1 += fuind[i][k]*fphip[i][j1]+fuinp[i][k]*fphid[i][j1]; - f2 += fuind[i][k]*fphip[i][j2]+fuinp[i][k]*fphid[i][j2]; - f3 += fuind[i][k]*fphip[i][j3]+fuinp[i][k]*fphid[i][j3]; - } - - f1 *= 0.5 * nfft1; - f2 *= 0.5 * nfft2; - f3 *= 0.5 * nfft3; - h1 = recip[0][0]*f1 + recip[0][1]*f2 + recip[0][2]*f3; - h2 = recip[1][0]*f1 + recip[1][1]*f2 + recip[1][2]*f3; - h3 = recip[2][0]*f1 + recip[2][1]*f2 + recip[2][2]*f3; - f[i][0] -= h1; - f[i][1] -= h2; - f[i][2] -= h3; - - for (j = 1; j < 4; j++) { - cphid[j] = 0.0; - cphip[j] = 0.0; - for (k = 1; k < 4; k++) { - cphid[j] += ftc[j][k]*fphid[i][k]; - cphip[j] += ftc[j][k]*fphip[i][k]; - } - } - - vxx -= 0.5*(cphid[1]*ubp[i][m][0] + cphip[1]*uad[i][m][0]); - vyy -= 0.5*(cphid[2]*ubp[i][m][1] + cphip[2]*uad[i][m][1]); - vzz -= 0.5*(cphid[3]*ubp[i][m][2] + cphip[3]*uad[i][m][2]); - - vxy -= 0.25*(cphid[1]*ubp[i][m][1] + cphip[1]*uad[i][m][1] + - cphid[2]*ubp[i][m][0] + cphip[2]*uad[i][m][0]); - vyz -= 0.25*(cphid[1]*ubp[i][m][2] + cphip[1]*uad[i][m][2] + - cphid[3]*ubp[i][m][0] + cphip[3]*uad[i][m][0]); - vxz -= 0.25*(cphid[2]*ubp[i][m][2] + cphip[2]*uad[i][m][2] + - cphid[3]*ubp[i][m][1] + cphip[3]*uad[i][m][1]); - } - - for (i = 0; i < nlocal; i++) { - for (j = 0; j < 3; j++) { - fuind[i][j] = a[0][j]*ubd[i][m][0] + a[1][j]*ubd[i][m][1] + - a[2][j]*ubd[i][m][2]; - fuinp[i][j] = a[0][j]*uap[i][m][0] + a[1][j]*uap[i][m][1] + - a[2][j]*uap[i][m][2]; - } - } - - grid_uind(fuind,fuinp); - efft->compute(qgrid[0][0][0],qgrid[0][0][0],1); - - for (k = 0; k < nfft3; k++) { - for (j = 0; j < nfft2; j++) { - for (i = 0; i < nfft1; i++) { - term = qfac[k][j][i]; - qgrid[k][j][i][0] *= term; - qgrid[k][j][i][1] *= term; - } - } - } - - efft->compute(qgrid[0][0][0],qgrid[0][0][0],-1); - fphi_uind(fphid,fphip,fphidp); - - for (i = 0; i < nlocal; i++) { - for (j = 1; j < 10; j++) { - fphid[i][j] *= felec; - fphip[i][j] *= felec; - } - } - - for (i = 0; i < nlocal; i++) { - f1 = 0.0; - f2 = 0.0; - f3 = 0.0; - for (k = 0; k < 3; k++) { - j1 = deriv1[k+1]; - j2 = deriv2[k+1]; - j3 = deriv3[k+1]; - f1 += fuind[i][k]*fphip[i][j1]+fuinp[i][k]*fphid[i][j1]; - f2 += fuind[i][k]*fphip[i][j2]+fuinp[i][k]*fphid[i][j2]; - f3 += fuind[i][k]*fphip[i][j3]+fuinp[i][k]*fphid[i][j3]; - } - - f1 *= 0.5 * nfft1; - f2 *= 0.5 * nfft2; - f3 *= 0.5 * nfft3; - h1 = recip[0][0]*f1 + recip[0][1]*f2 + recip[0][2]*f3; // matvec - h2 = recip[1][0]*f1 + recip[1][1]*f2 + recip[1][2]*f3; - h3 = recip[2][0]*f1 + recip[2][1]*f2 + recip[2][2]*f3; - f[i][0] -= h1; - f[i][1] -= h2; - f[i][2] -= h3; - - for (j = 1; j < 4; j++) { - cphid[j] = 0.0; - cphip[j] = 0.0; - for (k = 1; k < 4; k++) { - cphid[j] += ftc[j][k]*fphid[i][k]; - cphip[j] += ftc[j][k]*fphip[i][k]; - } - } - - vxx -= 0.5*(cphid[1]*uap[i][m][0] + cphip[1]*ubd[i][m][0]); - vyy -= 0.5*(cphid[2]*uap[i][m][1] + cphip[2]*ubd[i][m][1]); - vzz -= 0.5*(cphid[3]*uap[i][m][2] + cphip[3]*ubd[i][m][2]); - vxy -= 0.25*(cphid[1]*uap[i][m][1] + cphip[1]*ubd[i][m][1] + - cphid[2]*uap[i][m][0] + cphip[2]*ubd[i][m][0]); - vxz -= 0.25*(cphid[1]*uap[i][m][2] + cphip[1]*ubd[i][m][2] + - cphid[3]*uap[i][m][0] + cphip[3]*ubd[i][m][0]); - vyz -= 0.25*(cphid[2]*uap[i][m][2] + cphip[2]*ubd[i][m][2] + - cphid[3]*uap[i][m][1] + cphip[3]*ubd[i][m][1]); - } - } - } - */ - // assign permanent and induced multipoles to the PME grid for (i = 0; i < nlocal; i++) { @@ -2097,142 +1933,6 @@ void PairAmoebaGPU::polar_kspace() } } - // add back missing terms for the TCG polarization method; - // first do the term for "UAD" dotted with "UBP" - - /* - if (poltyp == TCG) { - - for (m = 0; m < tcgnab; m++) { - for (i = 0; i < nlocal; i++) { - for (j = 0; j < 10; j++) - cmp[i][j] = 0.0; - for (j = 1; j < 4; j++) - cmp[i][j] = ubp[i][m][j-1]; - } - - cmp_to_fmp(cmp,fmp); - grid_mpole(fmp); - efft->compute(qgrid[0][0][0],qgrid[0][0][0],1); - - for (k = 0; k < nfft3; k++) { - for (j = 0; j < nfft2; j++) { - for (i = 0; i < nfft1; i++) { - qgrip[k][j][i][0] = qgrid[k][j][i][0]; - qgrip[k][j][i][1] = qgrid[k][j][i][1]; - } - } - } - - for (i = 0; i < nlocal; i++) { - for (j = 1; j < 4; j++) - cmp[i][j] = uad[i][m][j-1]; - } - - cmp_to_fmp(cmp,fmp); - grid_mpole(fmp); - efft->compute(qgrid[0][0][0],qgrid[0][0][0],1); - - // make the scalar summation over reciprocal lattice - // NOTE: this loop has to be distributed for parallel - // NOTE: why does this one include m = 0 ? - - for (m = 1; m < ntot; m++) { - k1 = m % nfft1; - k2 = (m % nff) / nfft1; - k3 = m/nff; - r1 = (k1 >= nf1) ? k1-nfft1 : k1; - r2 = (k2 >= nf2) ? k2-nfft2 : k2; - r3 = (k3 >= nf3) ? k3-nfft3 : k3; - h1 = recip[0][0]*r1 + recip[0][1]*r2 + recip[0][2]*r3; - h2 = recip[1][0]*r1 + recip[1][1]*r2 + recip[1][2]*r3; - h3 = recip[2][0]*r1 + recip[2][1]*r2 + recip[2][2]*r3; - hsq = h1*h1 + h2*h2 + h3*h3; - term = -pterm * hsq; - expterm = 0.0; - if (term > -50.0 && hsq != 0.0) { - denom = volterm*hsq*bsmod1[k1]*bsmod2[k2]*bsmod3[k3]; - expterm = exp(term) / denom; - struc2 = qgrid[k3][k2][k1][0]*qgrip[k3][k2][k1][0] + - qgrid[k3][k2][k1][1]*qgrip[k3][k2][k1][1]; - eterm = 0.5 * felec * expterm * struc2; - vterm = (2.0/hsq) * (1.0-term) * eterm; - virpolar[0] -= h1*h1*vterm - eterm; - virpolar[1] -= h2*h2*vterm - eterm; - virpolar[2] -= h3*h3*vterm - eterm; - virpolar[3] -= h1*h2*vterm; - virpolar[4] -= h1*h3*vterm; - virpolar[5] -= h2*h3*vterm; - } - } - - // now do the TCG terms with "UBD" dotted with "UAP" - - for (i = 0; i < nlocal; i++) { - for (j = 0; j < 10; j++) - cmp[i][j] = 0.0; - for (j = 1; j < 4; j++) - cmp[i][j] = uap[i][m][j-1]; - } - - cmp_to_fmp(cmp,fmp); - grid_mpole(fmp); - efft->compute(qgrid[0][0][0],qgrid[0][0][0],1); - - for (k = 0; k < nfft3; k++) { - for (j = 0; j < nfft2; j++) { - for (i = 0; i < nfft1; i++) { - qgrip[k][j][i][0] = qgrid[k][j][i][0]; - qgrip[k][j][i][1] = qgrid[k][j][i][1]; - } - } - } - - for (i = 0; i < nlocal; i++) { - for (j = 1; j < 4; j++) - cmp[i][j] = ubd[i][m][j-1]; - } - - cmp_to_fmp(cmp,fmp); - grid_mpole(fmp); - efft->compute(qgrid[0][0][0],qgrid[0][0][0],1); - - // make the scalar summation over reciprocal lattice - // NOTE: this loop has to be distributed for parallel - // NOTE: why does this one include m = 0 ? - - for (m = 1; m < ntot; m++) { - k1 = m % nfft1; - k2 = (m % nff) / nfft1; - k3 = m/nff; - r1 = (k1 >= nf1) ? k1-nfft1 : k1; - r2 = (k2 >= nf2) ? k2-nfft2 : k2; - r3 = (k3 >= nf3) ? k3-nfft3 : k3; - h1 = recip[0][0]*r1 + recip[0][1]*r2 + recip[0][2]*r3; - h2 = recip[1][0]*r1 + recip[1][1]*r2 + recip[1][2]*r3; - h3 = recip[2][0]*r1 + recip[2][1]*r2 + recip[2][2]*r3; - hsq = h1*h1 + h2*h2 + h3*h3; - term = -pterm * hsq; - expterm = 0.0; - if (term > -50.0 && hsq != 0.0) { - denom = volterm*hsq*bsmod1[k1]*bsmod2[k2]*bsmod3[k3]; - expterm = exp(term) / denom; - struc2 = qgrid[k3][k2][k1][0]*qgrip[k3][k2][k1][0] + - qgrid[k3][k2][k1][1]*qgrip[k3][k2][k1][1]; - eterm = 0.5 * felec * expterm * struc2; - vterm = (2.0/hsq) * (1.0-term) * eterm; - virpolar[0] -= h1*h1*vterm - eterm; - virpolar[1] -= h2*h2*vterm - eterm; - virpolar[2] -= h3*h3*vterm - eterm; - virpolar[3] -= h1*h2*vterm; - virpolar[4] -= h1*h3*vterm; - virpolar[5] -= h2*h3*vterm; - } - } - } - } - */ - // increment the total internal virial tensor components if (vflag_global) { diff --git a/src/GPU/pair_hippo_gpu.cpp b/src/GPU/pair_hippo_gpu.cpp index 915c67e512..61c30c0ad1 100644 --- a/src/GPU/pair_hippo_gpu.cpp +++ b/src/GPU/pair_hippo_gpu.cpp @@ -13,7 +13,7 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: Trung Nguyen (Northwestern) + Contributing author: Trung Nguyen (Northwestern/UChicago) ------------------------------------------------------------------------- */ #include "pair_hippo_gpu.h" @@ -208,14 +208,14 @@ void PairHippoGPU::init_style() int tq_size; int mnf = 5e-2 * neighbor->oneatom; int success = hippo_gpu_init(atom->ntypes+1, max_amtype, max_amclass, - pdamp, thole, dirdamp, amtype2class, - special_repel, special_disp, special_mpole, - special_polar_wscale, special_polar_piscale, - special_polar_pscale, sizpr, dmppr, elepr, - csix, adisp, pcore, palpha, - atom->nlocal, atom->nlocal+atom->nghost, mnf, - maxspecial, maxspecial15, cell_size, gpu_mode, - screen, polar_dscale, polar_uscale, tq_size); + pdamp, thole, dirdamp, amtype2class, + special_repel, special_disp, special_mpole, + special_polar_wscale, special_polar_piscale, + special_polar_pscale, sizpr, dmppr, elepr, + csix, adisp, pcore, palpha, + atom->nlocal, atom->nlocal+atom->nghost, mnf, + maxspecial, maxspecial15, cell_size, gpu_mode, + screen, polar_dscale, polar_uscale, tq_size); GPU_EXTRA::check_flag(success,error,world); if (gpu_mode == GPU_FORCE) @@ -271,14 +271,14 @@ void PairHippoGPU::repulsion() inum = atom->nlocal; firstneigh = hippo_gpu_precompute(neighbor->ago, inum, nall, atom->x, - atom->type, amtype, amgroup, rpole, - nullptr, nullptr, nullptr, - sublo, subhi, atom->tag, - atom->nspecial, atom->special, - atom->nspecial15, atom->special15, - eflag, vflag, eflag_atom, vflag_atom, - host_start, &ilist, &numneigh, cpu_time, - success, atom->q, domain->boxlo, domain->prd); + atom->type, amtype, amgroup, rpole, + nullptr, nullptr, nullptr, + sublo, subhi, atom->tag, + atom->nspecial, atom->special, + atom->nspecial15, atom->special15, + eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, + success, atom->q, domain->boxlo, domain->prd); // select the correct cutoff for the term @@ -480,14 +480,6 @@ void PairHippoGPU::induce() } } } -/* - printf("GPU: cutghost = %f\n", comm->cutghost[0]); - for (i = 0; i < 10; i++) { - printf("i = %d: udir = %f %f %f; udirp = %f %f %f\n", - i, udir[i][0], udir[i][1], udir[i][2], - udirp[i][0], udirp[i][1], udirp[i][2]); - } -*/ // allocate memory and make early host-device transfers // must be done before the first ufield0c @@ -611,8 +603,6 @@ void PairHippoGPU::induce() comm->reverse_comm(this); } - //error->all(FLERR,"STOP GPU"); - // set initial conjugate gradient residual and conjugate vector for (i = 0; i < nlocal; i++) { @@ -1022,7 +1012,7 @@ void PairHippoGPU::udirect2b_cpu() tdipdip[ndip++] = bcn[1]*yr*zr; tdipdip[ndip++] = -bcn[0] + bcn[1]*zr*zr; } else { - if (comm->me == 0) printf("i = %d: j = %d: poltyp == DIRECT\n", i, j); + } } // jj @@ -1055,16 +1045,7 @@ void PairHippoGPU::ufield0c(double **field, double **fieldp) memset(&field[0][0], 0, 3*nall *sizeof(double)); memset(&fieldp[0][0], 0, 3*nall *sizeof(double)); - -/* - for (int i = 0; i < nall; i++) { - for (int j = 0; j < 3; j++) { - field[i][j] = 0.0; - fieldp[i][j] = 0.0; - } - } -*/ - + // get the real space portion of the mutual field first MPI_Barrier(world); @@ -1086,19 +1067,13 @@ void PairHippoGPU::ufield0c(double **field, double **fieldp) field[i][1] += term*uind[i][1]; field[i][2] += term*uind[i][2]; } + for (int i = 0; i < nlocal; i++) { fieldp[i][0] += term*uinp[i][0]; fieldp[i][1] += term*uinp[i][1]; fieldp[i][2] += term*uinp[i][2]; } -/* - for (i = 0; i < nlocal; i++) { - for (j = 0; j < 3; j++) { - field[i][j] += term*uind[i][j]; - fieldp[i][j] += term*uinp[i][j]; - } - } -*/ + // accumulate the field and fieldp values from the real-space portion from umutual2b() on the GPU // field and fieldp may already have some nonzero values from kspace (umutual1 and self) @@ -1271,25 +1246,6 @@ void PairHippoGPU::umutual1(double **field, double **fieldp) fieldp[i][1] -= dfy; fieldp[i][2] -= dfz; } -/* - for (int i = 0; i < nlocal; i++) { - for (j = 0; j < 3; j++) { - dipfield1[i][j] = a[j][0]*fdip_phi1[i][1] + - a[j][1]*fdip_phi1[i][2] + a[j][2]*fdip_phi1[i][3]; - dipfield2[i][j] = a[j][0]*fdip_phi2[i][1] + - a[j][1]*fdip_phi2[i][2] + a[j][2]*fdip_phi2[i][3]; - } - } - - // increment the field at each multipole site - - for (i = 0; i < nlocal; i++) { - for (j = 0; j < 3; j++) { - field[i][j] -= dipfield1[i][j]; - fieldp[i][j] -= dipfield2[i][j]; - } - } -*/ } /* ----------------------------------------------------------------------