port DPD exclusions corrections to GPU package

This commit is contained in:
Axel Kohlmeyer
2023-01-02 12:04:10 -05:00
parent 37b3ba827f
commit 396d577f40
5 changed files with 43 additions and 20 deletions

View File

@ -99,16 +99,25 @@ int DPDT::init(const int ntypes,
cutsq.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
this->atom->type_pack1(ntypes,lj_types,cutsq,host_rsq,host_cutsq);
double special_sqrt[4];
special_sqrt[0] = sqrt(host_special_lj[0]);
special_sqrt[1] = sqrt(host_special_lj[1]);
special_sqrt[2] = sqrt(host_special_lj[2]);
special_sqrt[3] = sqrt(host_special_lj[3]);
UCL_H_Vec<double> dview;
sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY);
dview.view(host_special_lj,4,*(this->ucl_device));
ucl_copy(sp_lj,dview,false);
sp_sqrt.alloc(4,*(this->ucl_device),UCL_READ_ONLY);
dview.view(special_sqrt,4,*(this->ucl_device));
ucl_copy(sp_sqrt,dview,false);
_tstat_only = 0;
if (tstat_only) _tstat_only=1;
_allocated=true;
this->_max_bytes=coeff.row_bytes()+cutsq.row_bytes()+sp_lj.row_bytes();
this->_max_bytes=coeff.row_bytes()+cutsq.row_bytes()+sp_lj.row_bytes()+sp_sqrt.row_bytes();
return 0;
}
@ -121,6 +130,7 @@ void DPDT::clear() {
coeff.clear();
cutsq.clear();
sp_lj.clear();
sp_sqrt.clear();
this->clear_atomic();
}
@ -144,7 +154,7 @@ int DPDT::loop(const int eflag, const int vflag) {
this->time_pair.start();
if (shared_types) {
this->k_pair_sel->set_size(GX,BX);
this->k_pair_sel->run(&this->atom->x, &coeff, &sp_lj,
this->k_pair_sel->run(&this->atom->x, &coeff, &sp_lj, &sp_sqrt,
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->ans->force, &this->ans->engv, &eflag,
&vflag, &ainum, &nbor_pitch, &this->atom->v, &cutsq,
@ -152,7 +162,7 @@ int DPDT::loop(const int eflag, const int vflag) {
&this->_tstat_only, &this->_threads_per_atom);
} else {
this->k_pair.set_size(GX,BX);
this->k_pair.run(&this->atom->x, &coeff, &_lj_types, &sp_lj,
this->k_pair.run(&this->atom->x, &coeff, &_lj_types, &sp_lj, &sp_sqrt,
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->ans->force, &this->ans->engv, &eflag, &vflag,
&ainum, &nbor_pitch, &this->atom->v, &cutsq, &this->_dtinvsqrt,

View File

@ -165,6 +165,7 @@ __kernel void k_dpd(const __global numtyp4 *restrict x_,
const __global numtyp4 *restrict coeff,
const int lj_types,
const __global numtyp *restrict sp_lj,
const __global numtyp *restrict sp_sqrt,
const __global int * dev_nbor,
const __global int * dev_packed,
__global acctyp4 *restrict ans,
@ -200,11 +201,12 @@ __kernel void k_dpd(const __global numtyp4 *restrict x_,
numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
int itag=iv.w;
numtyp factor_dpd;
numtyp factor_dpd, factor_sqrt;
for ( ; nbor<nbor_end; nbor+=n_stride) {
int j=dev_packed[nbor];
factor_dpd = sp_lj[sbmask(j)];
factor_sqrt = sp_sqrt[sbmask(j)];
j &= NEIGHMASK;
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
@ -245,8 +247,9 @@ __kernel void k_dpd(const __global numtyp4 *restrict x_,
numtyp force = (numtyp)0.0;
if (!tstat_only) force = coeff[mtype].x*wd;
force -= coeff[mtype].y*wd*wd*dot*rinv;
force += coeff[mtype].z*wd*randnum*dtinvsqrt;
force*=factor_dpd*rinv;
force *= factor_dpd;
force += factor_sqrt*coeff[mtype].z*wd*randnum*dtinvsqrt;
force*=rinv;
f.x+=delx*force;
f.y+=dely*force;
@ -278,6 +281,7 @@ __kernel void k_dpd(const __global numtyp4 *restrict x_,
__kernel void k_dpd_fast(const __global numtyp4 *restrict x_,
const __global numtyp4 *restrict coeff_in,
const __global numtyp *restrict sp_lj_in,
const __global numtyp *restrict sp_sqrt_in,
const __global int * dev_nbor,
const __global int * dev_packed,
__global acctyp4 *restrict ans,
@ -295,8 +299,10 @@ __kernel void k_dpd_fast(const __global numtyp4 *restrict x_,
#ifndef ONETYPE
__local numtyp4 coeff[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
__local numtyp sp_lj[4];
__local numtyp sp_sqrt[4];
if (tid<4)
sp_lj[tid]=sp_lj_in[tid];
sp_sqrt[tid]=sp_sqrt_in[tid];
if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
coeff[tid]=coeff_in[tid];
}
@ -333,12 +339,15 @@ __kernel void k_dpd_fast(const __global numtyp4 *restrict x_,
numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
int itag=iv.w;
numtyp factor_dpd;
#ifndef ONETYPE
numtyp factor_dpd, factor_sqrt;
#endif
for ( ; nbor<nbor_end; nbor+=n_stride) {
int j=dev_packed[nbor];
#ifndef ONETYPE
factor_dpd = sp_lj[sbmask(j)];
factor_sqrt = sp_sqrt[sbmask(j)];
j &= NEIGHMASK;
#endif
@ -390,12 +399,13 @@ __kernel void k_dpd_fast(const __global numtyp4 *restrict x_,
numtyp force = (numtyp)0.0;
if (!tstat_only) force = coeffx*wd;
force -= coeffy*wd*wd*dot*rinv;
force += coeffz*wd*randnum*dtinvsqrt;
#ifndef ONETYPE
force*=factor_dpd*rinv;
force *= factor_dpd;
force += factor_sqrt*coeffz*wd*randnum*dtinvsqrt;
#else
force*=rinv;
force += coeffz*wd*randnum*dtinvsqrt;
#endif
force*=rinv;
f.x+=delx*force;
f.y+=dely*force;

View File

@ -65,7 +65,7 @@ class DPD : public BaseDPD<numtyp, acctyp> {
UCL_D_Vec<numtyp> cutsq;
/// Special LJ values
UCL_D_Vec<numtyp> sp_lj;
UCL_D_Vec<numtyp> sp_lj, sp_sqrt;
/// If atom type constants fit in shared memory, use fast kernels
bool shared_types;

View File

@ -313,7 +313,7 @@ void PairDPDGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, in
int i, j, ii, jj, jnum, itype, jtype;
double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
double vxtmp, vytmp, vztmp, delvx, delvy, delvz;
double rsq, r, rinv, dot, wd, randnum, factor_dpd;
double rsq, r, rinv, dot, wd, randnum, factor_dpd, factor_sqrt;
int *jlist;
tagint itag, jtag;
@ -344,6 +344,7 @@ void PairDPDGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, in
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
factor_dpd = special_lj[sbmask(j)];
factor_sqrt = special_sqrt[sbmask(j)];
j &= NEIGHMASK;
delx = xtmp - x[j][0];
@ -376,10 +377,11 @@ void PairDPDGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, in
// drag force = -gamma * wd^2 * (delx dot delv) / r
// random force = sigma * wd * rnd * dtinvsqrt;
fpair = a0[itype][jtype] * wd;
fpair -= gamma[itype][jtype] * wd * wd * dot * rinv;
fpair += sigma[itype][jtype] * wd * randnum * dtinvsqrt;
fpair *= factor_dpd * rinv;
fpair = a0[itype][jtype]*wd;
fpair -= gamma[itype][jtype]*wd*wd*dot*rinv;
fpair *= factor_dpd;
fpair += factor_sqrt*sigma[itype][jtype]*wd*randnum*dtinvsqrt;
fpair *= rinv;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;

View File

@ -329,7 +329,7 @@ void PairDPDTstatGPU::cpu_compute(int start, int inum, int /* eflag */, int /* v
int i, j, ii, jj, jnum, itype, jtype;
double xtmp, ytmp, ztmp, delx, dely, delz, fpair;
double vxtmp, vytmp, vztmp, delvx, delvy, delvz;
double rsq, r, rinv, dot, wd, randnum, factor_dpd;
double rsq, r, rinv, dot, wd, randnum, factor_dpd, factor_sqrt;
int *jlist;
tagint itag, jtag;
@ -360,6 +360,7 @@ void PairDPDTstatGPU::cpu_compute(int start, int inum, int /* eflag */, int /* v
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
factor_dpd = special_lj[sbmask(j)];
factor_sqrt = special_sqrt[sbmask(j)];
j &= NEIGHMASK;
delx = xtmp - x[j][0];
@ -392,9 +393,9 @@ void PairDPDTstatGPU::cpu_compute(int start, int inum, int /* eflag */, int /* v
// drag force = -gamma * wd^2 * (delx dot delv) / r
// random force = sigma * wd * rnd * dtinvsqrt;
fpair = -gamma[itype][jtype] * wd * wd * dot * rinv;
fpair += sigma[itype][jtype] * wd * randnum * dtinvsqrt;
fpair *= factor_dpd * rinv;
fpair = -factor_dpd * gamma[itype][jtype] * wd * wd * dot * rinv;
fpair += factor_sqrt * sigma[itype][jtype] * wd * randnum * dtinvsqrt;
fpair *= rinv;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;