diff --git a/lib/gpu/lal_amoeba.cu b/lib/gpu/lal_amoeba.cu index adcff0e648..c4f146a7c9 100644 --- a/lib/gpu/lal_amoeba.cu +++ b/lib/gpu/lal_amoeba.cu @@ -116,9 +116,13 @@ _texture( q_tex,int2); } \ if (offset==0 && ii1) { \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ @@ -166,9 +170,13 @@ _texture( q_tex,int2); } \ if (offset==0 && ii_field.cols(), _max_fieldp_size); + + printf("GPU lib: _fieldp size = %d: max fieldp size = %d\n", + this->_fieldp.cols(), _max_fieldp_size); for (int i = 0; i < 10; i++) { numtyp4* p = (numtyp4*)(&this->_fieldp[4*i]); printf("i = %d; field = %f %f %f\n", i, p->x, p->y, p->z); } -*/ + return firstneigh; //nbor->host_jlist.begin()-host_start; } diff --git a/src/GPU/pair_amoeba_gpu.cpp b/src/GPU/pair_amoeba_gpu.cpp index d87e35cdf8..6501376dfa 100644 --- a/src/GPU/pair_amoeba_gpu.cpp +++ b/src/GPU/pair_amoeba_gpu.cpp @@ -367,10 +367,10 @@ void PairAmoebaGPU::induce() dfield0c(field,fieldp); // reverse comm to sum field,fieldp from ghost atoms to owned atoms - +/* crstyle = FIELD; comm->reverse_comm_pair(this); - +*/ // set induced dipoles to polarizability times direct field @@ -778,7 +778,7 @@ void PairAmoebaGPU::dfield0c(double **field, double **fieldp) void PairAmoebaGPU::udirect2b(double **field, double **fieldp) { - bool gpu_udirect2b_ready = false; + bool gpu_udirect2b_ready = true; if (!gpu_udirect2b_ready) { PairAmoeba::udirect2b(field, fieldp); return; @@ -815,31 +815,33 @@ void PairAmoebaGPU::udirect2b(double **field, double **fieldp) if (!success) error->one(FLERR,"Insufficient memory on accelerator"); - // get field and fieldp values from the GPU lib + // rebuild dipole-dipole pair list and store pairwise dipole matrices + // done one atom at a time in real-space double loop over atoms & neighs + + udirect2b_cpu(); + + // accumulate the field and fieldp values from the GPU lib + // field and fieldp may already have some nonzero values from kspace (udirect1) int nlocal = atom->nlocal; double *field_ptr = (double *)fieldp_pinned; for (int i = 0; i < nlocal; i++) { int idx = 4*i; - field[i][0] = field_ptr[idx]; - field[i][1] = field_ptr[idx+1]; - field[i][2] = field_ptr[idx+2]; + field[i][0] += field_ptr[idx]; + field[i][1] += field_ptr[idx+1]; + field[i][2] += field_ptr[idx+2]; } double* fieldp_ptr = (double *)fieldp_pinned; fieldp_ptr += 4*inum; for (int i = 0; i < nlocal; i++) { int idx = 4*i; - fieldp[i][0] = fieldp_ptr[idx]; - fieldp[i][1] = fieldp_ptr[idx+1]; - fieldp[i][2] = fieldp_ptr[idx+2]; + fieldp[i][0] += fieldp_ptr[idx]; + fieldp[i][1] += fieldp_ptr[idx+1]; + fieldp[i][2] += fieldp_ptr[idx+2]; } - - // rebuild dipole-dipole pair list and store pairwise dipole matrices - // done one atom at a time in real-space double loop over atoms & neighs - - udirect2b_cpu(); + } /* ----------------------------------------------------------------------