Fixed bugs with the umutual2b kernel, now the field and fieldp seems correct

This commit is contained in:
Trung Nguyen
2021-09-13 01:11:03 -05:00
parent edd76733a1
commit bc665999d5
2 changed files with 22 additions and 21 deletions

View File

@ -463,7 +463,7 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
igroup = polar3[i].w; // amgroup[i];
numtyp pdi = damping[itype].x;
numtyp ddi = damping[itype].z;
numtyp pti = damping[itype].y;
numtyp aesq2 = (numtyp)2.0 * aewald*aewald;
numtyp aesq2n = (numtyp)0.0;
@ -502,16 +502,8 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
numtyp ukzp = polar5[j].z; // uinp[j][2];
numtyp factor_uscale;
//const numtyp4 sp_pol = sp_polar[sbmask15(jextra)];
//factor_wscale = sp_pol.x; // sp_polar_wscale[sbmask15(jextra)];
if (igroup == jgroup) {
//factor_pscale = sp_pol.y; // sp_polar_piscale[sbmask15(jextra)];
//factor_dscale = polar_dscale;
factor_uscale = polar_uscale;
} else {
//factor_pscale = sp_pol.z; // sp_polar_pscale[sbmask15(jextra)];
factor_uscale = (numtyp)1.0;
}
if (igroup == jgroup) factor_uscale = polar_uscale;
else factor_uscale = (numtyp)1.0;
// calculate the real space Ewald error function terms
@ -535,15 +527,14 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
numtyp scale5 = (numtyp)1.0;
numtyp damp = pdi * damping[jtype].x; // pdamp[jtype]
if (damp != (numtyp)0.0) {
numtyp pgamma = MIN(ddi,damping[jtype].z); // dirdamp[jtype]
if (pgamma != (numtyp)0.0) {
damp = pgamma * ucl_powr(r/damp,(numtyp)3.0);
if (damp < (numtyp)50.0) {
numtyp expdamp = ucl_exp(-damp);
scale3 = (numtyp)1.0 - expdamp;
scale5 = (numtyp)1.0 - expdamp*((numtyp)1.0+damp);
}
numtyp pgamma = MIN(pti,damping[jtype].y); // thole[jtype]
damp = pgamma * ucl_powr(r/damp,(numtyp)3.0);
if (damp < (numtyp)50.0) {
numtyp expdamp = ucl_exp(-damp);
scale3 = (numtyp)1.0 - expdamp;
scale5 = (numtyp)1.0 - expdamp*((numtyp)1.0+damp);
}
} else { // damp == 0: ???
}

View File

@ -524,6 +524,14 @@ void PairAmoebaGPU::induce()
uinp[i][0], uinp[i][1], uinp[i][2]);
}
}
*/
/*
if (comm->me == 0) {
printf("GPU before\n");
for (int i = 0; i < 10; i++) {
printf("i = %d; fieldp = %f %f %f\n", i, fieldp[i][0], fieldp[i][1], fieldp[i][2]);
}
}
*/
ufield0c(field,fieldp);
@ -531,12 +539,14 @@ void PairAmoebaGPU::induce()
crstyle = FIELD;
comm->reverse_comm_pair(this);
}
/*
if (comm->me == 0) {
printf("GPU after \n");
for (int i = 0; i < 10; i++) {
printf("i = %d; fieldp = %f %f %f\n", i, fieldp[i][0], fieldp[i][1], fieldp[i][2]);
}
}
*/
//error->all(FLERR,"STOP GPU");
@ -841,7 +851,7 @@ void PairAmoebaGPU::udirect2b(double **field, double **fieldp)
// rebuild dipole-dipole pair list and store pairwise dipole matrices
// done one atom at a time in real-space double loop over atoms & neighs
udirect2b_cpu();
//udirect2b_cpu();
// accumulate the field and fieldp values from the GPU lib
// field and fieldp may already have some nonzero values from kspace (udirect1)