From edd76733a10929ecb3149a928daf7c4399c42d2d Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Sun, 12 Sep 2021 00:51:48 -0500 Subject: [PATCH] Working on umutual2b, tdipdip are correct, but incorrect results for field and fieldp --- lib/gpu/lal_amoeba.cu | 35 ++++++++++++++++++++++++++++++++++- lib/gpu/lal_base_amoeba.cpp | 4 ++-- src/AMOEBA/amoeba_induce.cpp | 15 +++++++++++---- src/GPU/pair_amoeba_gpu.cpp | 17 ++++++++++++----- 4 files changed, 59 insertions(+), 12 deletions(-) diff --git a/lib/gpu/lal_amoeba.cu b/lib/gpu/lal_amoeba.cu index fb515c69f7..add17e2725 100644 --- a/lib/gpu/lal_amoeba.cu +++ b/lib/gpu/lal_amoeba.cu @@ -465,6 +465,10 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_, numtyp pdi = damping[itype].x; numtyp ddi = damping[itype].z; + numtyp aesq2 = (numtyp)2.0 * aewald*aewald; + numtyp aesq2n = (numtyp)0.0; + if (aewald > (numtyp)0.0) aesq2n = (numtyp)1.0 / (MY_PIS*aewald); + for ( ; nbor_max_tep_size) { - _max_tep_size=static_cast(static_cast(nall)*1.10); + if (inum_full>_max_tep_size) { + _max_tep_size=static_cast(static_cast(inum_full)*1.10); _tep.resize(_max_tep_size*4); } *tep_ptr=_tep.host.begin(); diff --git a/src/AMOEBA/amoeba_induce.cpp b/src/AMOEBA/amoeba_induce.cpp index 2ffd4d275b..2294f543dd 100644 --- a/src/AMOEBA/amoeba_induce.cpp +++ b/src/AMOEBA/amoeba_induce.cpp @@ -279,6 +279,10 @@ void PairAmoeba::induce() crstyle = FIELD; comm->reverse_comm_pair(this); + for (int i = 0; i < 10; i++) { + printf("i = %d; fieldp = %f %f %f\n", i, fieldp[i][0], fieldp[i][1], fieldp[i][2]); + } + //error->all(FLERR,"STOP CPU"); /* if (comm->me == 0) { printf("CPU: cutghost = %f\n", comm->cutghost[0]); @@ -369,12 +373,13 @@ void PairAmoeba::induce() cfstyle = INDUCE; comm->forward_comm_pair(this); - ufield0c(field,fieldp); - - //error->all(FLERR,"STOP"); + ufield0c(field,fieldp); crstyle = FIELD; comm->reverse_comm_pair(this); + + + //error->all(FLERR,"STOP"); /* if (comm->me == 0) { printf("CPU: iter = %d\n", iter); @@ -1243,7 +1248,9 @@ void PairAmoeba::umutual2b(double **field, double **fieldp) j = jlist[jj]; uindj = uind[j]; uinpj = uinp[j]; - + //if (i==0 && j == 10) + // printf("i = %d: j = %d: tdipdip %f %f %f %f %f %f\n", + // i, j,tdipdip[0],tdipdip[1],tdipdip[2],tdipdip[3],tdipdip[4],tdipdip[5]); fid[0] = tdipdip[0]*uindj[0] + tdipdip[1]*uindj[1] + tdipdip[2]*uindj[2]; fid[1] = tdipdip[1]*uindj[0] + tdipdip[3]*uindj[1] + tdipdip[4]*uindj[2]; fid[2] = tdipdip[2]*uindj[0] + tdipdip[4]*uindj[1] + tdipdip[5]*uindj[2]; diff --git a/src/GPU/pair_amoeba_gpu.cpp b/src/GPU/pair_amoeba_gpu.cpp index edd51667aa..bdde1176d9 100644 --- a/src/GPU/pair_amoeba_gpu.cpp +++ b/src/GPU/pair_amoeba_gpu.cpp @@ -111,7 +111,7 @@ PairAmoebaGPU::PairAmoebaGPU(LAMMPS *lmp) : PairAmoeba(lmp), gpu_mode(GPU_FORCE) tep_pinned = nullptr; gpu_udirect2b_ready = true; - gpu_umutual2b_ready = false; + gpu_umutual2b_ready = true; gpu_polar_real_ready = true; GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); @@ -532,6 +532,14 @@ void PairAmoebaGPU::induce() comm->reverse_comm_pair(this); } + if (comm->me == 0) { + for (int i = 0; i < 10; i++) { + printf("i = %d; fieldp = %f %f %f\n", i, fieldp[i][0], fieldp[i][1], fieldp[i][2]); + } + } + + //error->all(FLERR,"STOP GPU"); + /* if (comm->me == 0) { printf("GPU: cutghost = %f\n", comm->cutghost[0]); @@ -596,12 +604,12 @@ void PairAmoebaGPU::induce() ufield0c(field,fieldp); - //error->all(FLERR,"STOP"); - if (!gpu_umutual2b_ready) { crstyle = FIELD; comm->reverse_comm_pair(this); } + + //error->all(FLERR,"STOP"); /* if (comm->me == 0) { printf("GPU: iter = %d\n", iter); @@ -1051,7 +1059,7 @@ void PairAmoebaGPU::umutual2b(double **field, double **fieldp) error->one(FLERR,"Insufficient memory on accelerator"); // accumulate the field and fieldp values from the GPU lib - // field and fieldp may already have some nonzero values from kspace (udirect1) + // field and fieldp may already have some nonzero values from kspace (umutual1) int nlocal = atom->nlocal; double *field_ptr = (double *)fieldp_pinned; @@ -1071,7 +1079,6 @@ void PairAmoebaGPU::umutual2b(double **field, double **fieldp) fieldp[i][1] += fieldp_ptr[idx+1]; fieldp[i][2] += fieldp_ptr[idx+2]; } - } /* ---------------------------------------------------------------------- */