diff --git a/lib/gpu/lal_dpd.cpp b/lib/gpu/lal_dpd.cpp index f890fb53a3..1f5209852b 100644 --- a/lib/gpu/lal_dpd.cpp +++ b/lib/gpu/lal_dpd.cpp @@ -99,16 +99,25 @@ int DPDT::init(const int ntypes, cutsq.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); this->atom->type_pack1(ntypes,lj_types,cutsq,host_rsq,host_cutsq); + double special_sqrt[4]; + special_sqrt[0] = sqrt(host_special_lj[0]); + special_sqrt[1] = sqrt(host_special_lj[1]); + special_sqrt[2] = sqrt(host_special_lj[2]); + special_sqrt[3] = sqrt(host_special_lj[3]); + UCL_H_Vec dview; sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY); dview.view(host_special_lj,4,*(this->ucl_device)); ucl_copy(sp_lj,dview,false); + sp_sqrt.alloc(4,*(this->ucl_device),UCL_READ_ONLY); + dview.view(special_sqrt,4,*(this->ucl_device)); + ucl_copy(sp_sqrt,dview,false); _tstat_only = 0; if (tstat_only) _tstat_only=1; _allocated=true; - this->_max_bytes=coeff.row_bytes()+cutsq.row_bytes()+sp_lj.row_bytes(); + this->_max_bytes=coeff.row_bytes()+cutsq.row_bytes()+sp_lj.row_bytes()+sp_sqrt.row_bytes(); return 0; } @@ -121,6 +130,7 @@ void DPDT::clear() { coeff.clear(); cutsq.clear(); sp_lj.clear(); + sp_sqrt.clear(); this->clear_atomic(); } @@ -144,7 +154,7 @@ int DPDT::loop(const int eflag, const int vflag) { this->time_pair.start(); if (shared_types) { this->k_pair_sel->set_size(GX,BX); - this->k_pair_sel->run(&this->atom->x, &coeff, &sp_lj, + this->k_pair_sel->run(&this->atom->x, &coeff, &sp_lj, &sp_sqrt, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->atom->v, &cutsq, @@ -152,7 +162,7 @@ int DPDT::loop(const int eflag, const int vflag) { &this->_tstat_only, &this->_threads_per_atom); } else { this->k_pair.set_size(GX,BX); - this->k_pair.run(&this->atom->x, &coeff, &_lj_types, &sp_lj, + this->k_pair.run(&this->atom->x, &coeff, &_lj_types, &sp_lj, &sp_sqrt, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->atom->v, &cutsq, &this->_dtinvsqrt, diff --git a/lib/gpu/lal_dpd.cu b/lib/gpu/lal_dpd.cu index 2794110a92..0c861f51de 100644 --- a/lib/gpu/lal_dpd.cu +++ b/lib/gpu/lal_dpd.cu @@ -165,6 +165,7 @@ __kernel void k_dpd(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict coeff, const int lj_types, const __global numtyp *restrict sp_lj, + const __global numtyp *restrict sp_sqrt, const __global int * dev_nbor, const __global int * dev_packed, __global acctyp4 *restrict ans, @@ -200,11 +201,12 @@ __kernel void k_dpd(const __global numtyp4 *restrict x_, numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i]; int itag=iv.w; - numtyp factor_dpd; + numtyp factor_dpd, factor_sqrt; for ( ; nbor { UCL_D_Vec cutsq; /// Special LJ values - UCL_D_Vec sp_lj; + UCL_D_Vec sp_lj, sp_sqrt; /// If atom type constants fit in shared memory, use fast kernels bool shared_types; diff --git a/src/GPU/pair_dpd_gpu.cpp b/src/GPU/pair_dpd_gpu.cpp index 79fd3f1345..716978deac 100644 --- a/src/GPU/pair_dpd_gpu.cpp +++ b/src/GPU/pair_dpd_gpu.cpp @@ -313,7 +313,7 @@ void PairDPDGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, in int i, j, ii, jj, jnum, itype, jtype; double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair; double vxtmp, vytmp, vztmp, delvx, delvy, delvz; - double rsq, r, rinv, dot, wd, randnum, factor_dpd; + double rsq, r, rinv, dot, wd, randnum, factor_dpd, factor_sqrt; int *jlist; tagint itag, jtag; @@ -344,6 +344,7 @@ void PairDPDGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, in for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_dpd = special_lj[sbmask(j)]; + factor_sqrt = special_sqrt[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; @@ -376,10 +377,11 @@ void PairDPDGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, in // drag force = -gamma * wd^2 * (delx dot delv) / r // random force = sigma * wd * rnd * dtinvsqrt; - fpair = a0[itype][jtype] * wd; - fpair -= gamma[itype][jtype] * wd * wd * dot * rinv; - fpair += sigma[itype][jtype] * wd * randnum * dtinvsqrt; - fpair *= factor_dpd * rinv; + fpair = a0[itype][jtype]*wd; + fpair -= gamma[itype][jtype]*wd*wd*dot*rinv; + fpair *= factor_dpd; + fpair += factor_sqrt*sigma[itype][jtype]*wd*randnum*dtinvsqrt; + fpair *= rinv; f[i][0] += delx * fpair; f[i][1] += dely * fpair; diff --git a/src/GPU/pair_dpd_tstat_gpu.cpp b/src/GPU/pair_dpd_tstat_gpu.cpp index 49dacf3f1e..029bf7245e 100644 --- a/src/GPU/pair_dpd_tstat_gpu.cpp +++ b/src/GPU/pair_dpd_tstat_gpu.cpp @@ -329,7 +329,7 @@ void PairDPDTstatGPU::cpu_compute(int start, int inum, int /* eflag */, int /* v int i, j, ii, jj, jnum, itype, jtype; double xtmp, ytmp, ztmp, delx, dely, delz, fpair; double vxtmp, vytmp, vztmp, delvx, delvy, delvz; - double rsq, r, rinv, dot, wd, randnum, factor_dpd; + double rsq, r, rinv, dot, wd, randnum, factor_dpd, factor_sqrt; int *jlist; tagint itag, jtag; @@ -360,6 +360,7 @@ void PairDPDTstatGPU::cpu_compute(int start, int inum, int /* eflag */, int /* v for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_dpd = special_lj[sbmask(j)]; + factor_sqrt = special_sqrt[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; @@ -392,9 +393,9 @@ void PairDPDTstatGPU::cpu_compute(int start, int inum, int /* eflag */, int /* v // drag force = -gamma * wd^2 * (delx dot delv) / r // random force = sigma * wd * rnd * dtinvsqrt; - fpair = -gamma[itype][jtype] * wd * wd * dot * rinv; - fpair += sigma[itype][jtype] * wd * randnum * dtinvsqrt; - fpair *= factor_dpd * rinv; + fpair = -factor_dpd * gamma[itype][jtype] * wd * wd * dot * rinv; + fpair += factor_sqrt * sigma[itype][jtype] * wd * randnum * dtinvsqrt; + fpair *= rinv; f[i][0] += delx * fpair; f[i][1] += dely * fpair;