Cleaned up debug statements and unused sections in the amoeba and hippo gpu styles

This commit is contained in:
Trung Nguyen
2023-01-14 20:02:36 -06:00
parent 03e48f2658
commit c21f2faa1f
2 changed files with 22 additions and 366 deletions

View File

@ -13,7 +13,7 @@
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Trung Nguyen (Northwestern)
Contributing author: Trung Nguyen (Northwestern/UChicago)
------------------------------------------------------------------------- */
#include "pair_amoeba_gpu.h"
@ -486,8 +486,6 @@ void PairAmoebaGPU::induce()
comm->reverse_comm(this);
}
//error->all(FLERR,"STOP GPU");
// set initial conjugate gradient residual and conjugate vector
for (i = 0; i < nlocal; i++) {
@ -547,8 +545,6 @@ void PairAmoebaGPU::induce()
comm->reverse_comm(this);
}
//error->all(FLERR,"STOP");
for (i = 0; i < nlocal; i++) {
for (j = 0; j < 3; j++) {
uind[i][j] = vec[i][j];
@ -1751,166 +1747,6 @@ void PairAmoebaGPU::polar_kspace()
}
}
// account for dipole response terms in the TCG method
/*
if (poltyp == TCG) {
for (m = 0; m < tcgnab; m++) {
for (i = 0; i < nlocal; i++) {
for (j = 0; j < 3; j++) {
fuind[i][j] = a[0][j]*uad[i][m][0] + a[1][j]*uad[i][m][1] +
a[2][j]*uad[i][m][2];
fuinp[i][j] = a[0][j]*ubp[i][m][0] + a[1][j]*ubp[i][m][1] +
a[2][j]*ubp[i][m][2];
}
}
grid_uind(fuind,fuinp);
efft->compute(qgrid[0][0][0],qgrid[0][0][0],1);
for (k = 0; k < nfft3; k++) {
for (j = 0; j < nfft2; j++) {
for (i = 0; i < nfft1; i++) {
term = qfac[k][j][i];
qgrid[k][j][i][0] *= term;
qgrid[k][j][i][1] *= term;
}
}
}
efft->compute(qgrid[0][0][0],qgrid[0][0][0],-1);
fphi_uind(fphid,fphip,fphidp);
for (i = 0; i < nlocal; i++) {
for (j = 1; j < 10; j++) {
fphid[i][j] *= felec;
fphip[i][j] *= felec;
}
}
for (i = 0; i < nlocal; i++) {
f1 = 0.0;
f2 = 0.0;
f3 = 0.0;
for (k = 0; k < 3; k++) {
j1 = deriv1[k+1];
j2 = deriv2[k+1];
j3 = deriv3[k+1];
f1 += fuind[i][k]*fphip[i][j1]+fuinp[i][k]*fphid[i][j1];
f2 += fuind[i][k]*fphip[i][j2]+fuinp[i][k]*fphid[i][j2];
f3 += fuind[i][k]*fphip[i][j3]+fuinp[i][k]*fphid[i][j3];
}
f1 *= 0.5 * nfft1;
f2 *= 0.5 * nfft2;
f3 *= 0.5 * nfft3;
h1 = recip[0][0]*f1 + recip[0][1]*f2 + recip[0][2]*f3;
h2 = recip[1][0]*f1 + recip[1][1]*f2 + recip[1][2]*f3;
h3 = recip[2][0]*f1 + recip[2][1]*f2 + recip[2][2]*f3;
f[i][0] -= h1;
f[i][1] -= h2;
f[i][2] -= h3;
for (j = 1; j < 4; j++) {
cphid[j] = 0.0;
cphip[j] = 0.0;
for (k = 1; k < 4; k++) {
cphid[j] += ftc[j][k]*fphid[i][k];
cphip[j] += ftc[j][k]*fphip[i][k];
}
}
vxx -= 0.5*(cphid[1]*ubp[i][m][0] + cphip[1]*uad[i][m][0]);
vyy -= 0.5*(cphid[2]*ubp[i][m][1] + cphip[2]*uad[i][m][1]);
vzz -= 0.5*(cphid[3]*ubp[i][m][2] + cphip[3]*uad[i][m][2]);
vxy -= 0.25*(cphid[1]*ubp[i][m][1] + cphip[1]*uad[i][m][1] +
cphid[2]*ubp[i][m][0] + cphip[2]*uad[i][m][0]);
vyz -= 0.25*(cphid[1]*ubp[i][m][2] + cphip[1]*uad[i][m][2] +
cphid[3]*ubp[i][m][0] + cphip[3]*uad[i][m][0]);
vxz -= 0.25*(cphid[2]*ubp[i][m][2] + cphip[2]*uad[i][m][2] +
cphid[3]*ubp[i][m][1] + cphip[3]*uad[i][m][1]);
}
for (i = 0; i < nlocal; i++) {
for (j = 0; j < 3; j++) {
fuind[i][j] = a[0][j]*ubd[i][m][0] + a[1][j]*ubd[i][m][1] +
a[2][j]*ubd[i][m][2];
fuinp[i][j] = a[0][j]*uap[i][m][0] + a[1][j]*uap[i][m][1] +
a[2][j]*uap[i][m][2];
}
}
grid_uind(fuind,fuinp);
efft->compute(qgrid[0][0][0],qgrid[0][0][0],1);
for (k = 0; k < nfft3; k++) {
for (j = 0; j < nfft2; j++) {
for (i = 0; i < nfft1; i++) {
term = qfac[k][j][i];
qgrid[k][j][i][0] *= term;
qgrid[k][j][i][1] *= term;
}
}
}
efft->compute(qgrid[0][0][0],qgrid[0][0][0],-1);
fphi_uind(fphid,fphip,fphidp);
for (i = 0; i < nlocal; i++) {
for (j = 1; j < 10; j++) {
fphid[i][j] *= felec;
fphip[i][j] *= felec;
}
}
for (i = 0; i < nlocal; i++) {
f1 = 0.0;
f2 = 0.0;
f3 = 0.0;
for (k = 0; k < 3; k++) {
j1 = deriv1[k+1];
j2 = deriv2[k+1];
j3 = deriv3[k+1];
f1 += fuind[i][k]*fphip[i][j1]+fuinp[i][k]*fphid[i][j1];
f2 += fuind[i][k]*fphip[i][j2]+fuinp[i][k]*fphid[i][j2];
f3 += fuind[i][k]*fphip[i][j3]+fuinp[i][k]*fphid[i][j3];
}
f1 *= 0.5 * nfft1;
f2 *= 0.5 * nfft2;
f3 *= 0.5 * nfft3;
h1 = recip[0][0]*f1 + recip[0][1]*f2 + recip[0][2]*f3; // matvec
h2 = recip[1][0]*f1 + recip[1][1]*f2 + recip[1][2]*f3;
h3 = recip[2][0]*f1 + recip[2][1]*f2 + recip[2][2]*f3;
f[i][0] -= h1;
f[i][1] -= h2;
f[i][2] -= h3;
for (j = 1; j < 4; j++) {
cphid[j] = 0.0;
cphip[j] = 0.0;
for (k = 1; k < 4; k++) {
cphid[j] += ftc[j][k]*fphid[i][k];
cphip[j] += ftc[j][k]*fphip[i][k];
}
}
vxx -= 0.5*(cphid[1]*uap[i][m][0] + cphip[1]*ubd[i][m][0]);
vyy -= 0.5*(cphid[2]*uap[i][m][1] + cphip[2]*ubd[i][m][1]);
vzz -= 0.5*(cphid[3]*uap[i][m][2] + cphip[3]*ubd[i][m][2]);
vxy -= 0.25*(cphid[1]*uap[i][m][1] + cphip[1]*ubd[i][m][1] +
cphid[2]*uap[i][m][0] + cphip[2]*ubd[i][m][0]);
vxz -= 0.25*(cphid[1]*uap[i][m][2] + cphip[1]*ubd[i][m][2] +
cphid[3]*uap[i][m][0] + cphip[3]*ubd[i][m][0]);
vyz -= 0.25*(cphid[2]*uap[i][m][2] + cphip[2]*ubd[i][m][2] +
cphid[3]*uap[i][m][1] + cphip[3]*ubd[i][m][1]);
}
}
}
*/
// assign permanent and induced multipoles to the PME grid
for (i = 0; i < nlocal; i++) {
@ -2097,142 +1933,6 @@ void PairAmoebaGPU::polar_kspace()
}
}
// add back missing terms for the TCG polarization method;
// first do the term for "UAD" dotted with "UBP"
/*
if (poltyp == TCG) {
for (m = 0; m < tcgnab; m++) {
for (i = 0; i < nlocal; i++) {
for (j = 0; j < 10; j++)
cmp[i][j] = 0.0;
for (j = 1; j < 4; j++)
cmp[i][j] = ubp[i][m][j-1];
}
cmp_to_fmp(cmp,fmp);
grid_mpole(fmp);
efft->compute(qgrid[0][0][0],qgrid[0][0][0],1);
for (k = 0; k < nfft3; k++) {
for (j = 0; j < nfft2; j++) {
for (i = 0; i < nfft1; i++) {
qgrip[k][j][i][0] = qgrid[k][j][i][0];
qgrip[k][j][i][1] = qgrid[k][j][i][1];
}
}
}
for (i = 0; i < nlocal; i++) {
for (j = 1; j < 4; j++)
cmp[i][j] = uad[i][m][j-1];
}
cmp_to_fmp(cmp,fmp);
grid_mpole(fmp);
efft->compute(qgrid[0][0][0],qgrid[0][0][0],1);
// make the scalar summation over reciprocal lattice
// NOTE: this loop has to be distributed for parallel
// NOTE: why does this one include m = 0 ?
for (m = 1; m < ntot; m++) {
k1 = m % nfft1;
k2 = (m % nff) / nfft1;
k3 = m/nff;
r1 = (k1 >= nf1) ? k1-nfft1 : k1;
r2 = (k2 >= nf2) ? k2-nfft2 : k2;
r3 = (k3 >= nf3) ? k3-nfft3 : k3;
h1 = recip[0][0]*r1 + recip[0][1]*r2 + recip[0][2]*r3;
h2 = recip[1][0]*r1 + recip[1][1]*r2 + recip[1][2]*r3;
h3 = recip[2][0]*r1 + recip[2][1]*r2 + recip[2][2]*r3;
hsq = h1*h1 + h2*h2 + h3*h3;
term = -pterm * hsq;
expterm = 0.0;
if (term > -50.0 && hsq != 0.0) {
denom = volterm*hsq*bsmod1[k1]*bsmod2[k2]*bsmod3[k3];
expterm = exp(term) / denom;
struc2 = qgrid[k3][k2][k1][0]*qgrip[k3][k2][k1][0] +
qgrid[k3][k2][k1][1]*qgrip[k3][k2][k1][1];
eterm = 0.5 * felec * expterm * struc2;
vterm = (2.0/hsq) * (1.0-term) * eterm;
virpolar[0] -= h1*h1*vterm - eterm;
virpolar[1] -= h2*h2*vterm - eterm;
virpolar[2] -= h3*h3*vterm - eterm;
virpolar[3] -= h1*h2*vterm;
virpolar[4] -= h1*h3*vterm;
virpolar[5] -= h2*h3*vterm;
}
}
// now do the TCG terms with "UBD" dotted with "UAP"
for (i = 0; i < nlocal; i++) {
for (j = 0; j < 10; j++)
cmp[i][j] = 0.0;
for (j = 1; j < 4; j++)
cmp[i][j] = uap[i][m][j-1];
}
cmp_to_fmp(cmp,fmp);
grid_mpole(fmp);
efft->compute(qgrid[0][0][0],qgrid[0][0][0],1);
for (k = 0; k < nfft3; k++) {
for (j = 0; j < nfft2; j++) {
for (i = 0; i < nfft1; i++) {
qgrip[k][j][i][0] = qgrid[k][j][i][0];
qgrip[k][j][i][1] = qgrid[k][j][i][1];
}
}
}
for (i = 0; i < nlocal; i++) {
for (j = 1; j < 4; j++)
cmp[i][j] = ubd[i][m][j-1];
}
cmp_to_fmp(cmp,fmp);
grid_mpole(fmp);
efft->compute(qgrid[0][0][0],qgrid[0][0][0],1);
// make the scalar summation over reciprocal lattice
// NOTE: this loop has to be distributed for parallel
// NOTE: why does this one include m = 0 ?
for (m = 1; m < ntot; m++) {
k1 = m % nfft1;
k2 = (m % nff) / nfft1;
k3 = m/nff;
r1 = (k1 >= nf1) ? k1-nfft1 : k1;
r2 = (k2 >= nf2) ? k2-nfft2 : k2;
r3 = (k3 >= nf3) ? k3-nfft3 : k3;
h1 = recip[0][0]*r1 + recip[0][1]*r2 + recip[0][2]*r3;
h2 = recip[1][0]*r1 + recip[1][1]*r2 + recip[1][2]*r3;
h3 = recip[2][0]*r1 + recip[2][1]*r2 + recip[2][2]*r3;
hsq = h1*h1 + h2*h2 + h3*h3;
term = -pterm * hsq;
expterm = 0.0;
if (term > -50.0 && hsq != 0.0) {
denom = volterm*hsq*bsmod1[k1]*bsmod2[k2]*bsmod3[k3];
expterm = exp(term) / denom;
struc2 = qgrid[k3][k2][k1][0]*qgrip[k3][k2][k1][0] +
qgrid[k3][k2][k1][1]*qgrip[k3][k2][k1][1];
eterm = 0.5 * felec * expterm * struc2;
vterm = (2.0/hsq) * (1.0-term) * eterm;
virpolar[0] -= h1*h1*vterm - eterm;
virpolar[1] -= h2*h2*vterm - eterm;
virpolar[2] -= h3*h3*vterm - eterm;
virpolar[3] -= h1*h2*vterm;
virpolar[4] -= h1*h3*vterm;
virpolar[5] -= h2*h3*vterm;
}
}
}
}
*/
// increment the total internal virial tensor components
if (vflag_global) {

View File

@ -13,7 +13,7 @@
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Trung Nguyen (Northwestern)
Contributing author: Trung Nguyen (Northwestern/UChicago)
------------------------------------------------------------------------- */
#include "pair_hippo_gpu.h"
@ -208,14 +208,14 @@ void PairHippoGPU::init_style()
int tq_size;
int mnf = 5e-2 * neighbor->oneatom;
int success = hippo_gpu_init(atom->ntypes+1, max_amtype, max_amclass,
pdamp, thole, dirdamp, amtype2class,
special_repel, special_disp, special_mpole,
special_polar_wscale, special_polar_piscale,
special_polar_pscale, sizpr, dmppr, elepr,
csix, adisp, pcore, palpha,
atom->nlocal, atom->nlocal+atom->nghost, mnf,
maxspecial, maxspecial15, cell_size, gpu_mode,
screen, polar_dscale, polar_uscale, tq_size);
pdamp, thole, dirdamp, amtype2class,
special_repel, special_disp, special_mpole,
special_polar_wscale, special_polar_piscale,
special_polar_pscale, sizpr, dmppr, elepr,
csix, adisp, pcore, palpha,
atom->nlocal, atom->nlocal+atom->nghost, mnf,
maxspecial, maxspecial15, cell_size, gpu_mode,
screen, polar_dscale, polar_uscale, tq_size);
GPU_EXTRA::check_flag(success,error,world);
if (gpu_mode == GPU_FORCE)
@ -271,14 +271,14 @@ void PairHippoGPU::repulsion()
inum = atom->nlocal;
firstneigh = hippo_gpu_precompute(neighbor->ago, inum, nall, atom->x,
atom->type, amtype, amgroup, rpole,
nullptr, nullptr, nullptr,
sublo, subhi, atom->tag,
atom->nspecial, atom->special,
atom->nspecial15, atom->special15,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo, domain->prd);
atom->type, amtype, amgroup, rpole,
nullptr, nullptr, nullptr,
sublo, subhi, atom->tag,
atom->nspecial, atom->special,
atom->nspecial15, atom->special15,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo, domain->prd);
// select the correct cutoff for the term
@ -480,14 +480,6 @@ void PairHippoGPU::induce()
}
}
}
/*
printf("GPU: cutghost = %f\n", comm->cutghost[0]);
for (i = 0; i < 10; i++) {
printf("i = %d: udir = %f %f %f; udirp = %f %f %f\n",
i, udir[i][0], udir[i][1], udir[i][2],
udirp[i][0], udirp[i][1], udirp[i][2]);
}
*/
// allocate memory and make early host-device transfers
// must be done before the first ufield0c
@ -611,8 +603,6 @@ void PairHippoGPU::induce()
comm->reverse_comm(this);
}
//error->all(FLERR,"STOP GPU");
// set initial conjugate gradient residual and conjugate vector
for (i = 0; i < nlocal; i++) {
@ -1022,7 +1012,7 @@ void PairHippoGPU::udirect2b_cpu()
tdipdip[ndip++] = bcn[1]*yr*zr;
tdipdip[ndip++] = -bcn[0] + bcn[1]*zr*zr;
} else {
if (comm->me == 0) printf("i = %d: j = %d: poltyp == DIRECT\n", i, j);
}
} // jj
@ -1055,16 +1045,7 @@ void PairHippoGPU::ufield0c(double **field, double **fieldp)
memset(&field[0][0], 0, 3*nall *sizeof(double));
memset(&fieldp[0][0], 0, 3*nall *sizeof(double));
/*
for (int i = 0; i < nall; i++) {
for (int j = 0; j < 3; j++) {
field[i][j] = 0.0;
fieldp[i][j] = 0.0;
}
}
*/
// get the real space portion of the mutual field first
MPI_Barrier(world);
@ -1086,19 +1067,13 @@ void PairHippoGPU::ufield0c(double **field, double **fieldp)
field[i][1] += term*uind[i][1];
field[i][2] += term*uind[i][2];
}
for (int i = 0; i < nlocal; i++) {
fieldp[i][0] += term*uinp[i][0];
fieldp[i][1] += term*uinp[i][1];
fieldp[i][2] += term*uinp[i][2];
}
/*
for (i = 0; i < nlocal; i++) {
for (j = 0; j < 3; j++) {
field[i][j] += term*uind[i][j];
fieldp[i][j] += term*uinp[i][j];
}
}
*/
// accumulate the field and fieldp values from the real-space portion from umutual2b() on the GPU
// field and fieldp may already have some nonzero values from kspace (umutual1 and self)
@ -1271,25 +1246,6 @@ void PairHippoGPU::umutual1(double **field, double **fieldp)
fieldp[i][1] -= dfy;
fieldp[i][2] -= dfz;
}
/*
for (int i = 0; i < nlocal; i++) {
for (j = 0; j < 3; j++) {
dipfield1[i][j] = a[j][0]*fdip_phi1[i][1] +
a[j][1]*fdip_phi1[i][2] + a[j][2]*fdip_phi1[i][3];
dipfield2[i][j] = a[j][0]*fdip_phi2[i][1] +
a[j][1]*fdip_phi2[i][2] + a[j][2]*fdip_phi2[i][3];
}
}
// increment the field at each multipole site
for (i = 0; i < nlocal; i++) {
for (j = 0; j < 3; j++) {
field[i][j] -= dipfield1[i][j];
fieldp[i][j] -= dipfield2[i][j];
}
}
*/
}
/* ----------------------------------------------------------------------