port DPD exclusions corrections to GPU package
This commit is contained in:
@ -99,16 +99,25 @@ int DPDT::init(const int ntypes,
|
|||||||
cutsq.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
|
cutsq.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||||
this->atom->type_pack1(ntypes,lj_types,cutsq,host_rsq,host_cutsq);
|
this->atom->type_pack1(ntypes,lj_types,cutsq,host_rsq,host_cutsq);
|
||||||
|
|
||||||
|
double special_sqrt[4];
|
||||||
|
special_sqrt[0] = sqrt(host_special_lj[0]);
|
||||||
|
special_sqrt[1] = sqrt(host_special_lj[1]);
|
||||||
|
special_sqrt[2] = sqrt(host_special_lj[2]);
|
||||||
|
special_sqrt[3] = sqrt(host_special_lj[3]);
|
||||||
|
|
||||||
UCL_H_Vec<double> dview;
|
UCL_H_Vec<double> dview;
|
||||||
sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY);
|
sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY);
|
||||||
dview.view(host_special_lj,4,*(this->ucl_device));
|
dview.view(host_special_lj,4,*(this->ucl_device));
|
||||||
ucl_copy(sp_lj,dview,false);
|
ucl_copy(sp_lj,dview,false);
|
||||||
|
sp_sqrt.alloc(4,*(this->ucl_device),UCL_READ_ONLY);
|
||||||
|
dview.view(special_sqrt,4,*(this->ucl_device));
|
||||||
|
ucl_copy(sp_sqrt,dview,false);
|
||||||
|
|
||||||
_tstat_only = 0;
|
_tstat_only = 0;
|
||||||
if (tstat_only) _tstat_only=1;
|
if (tstat_only) _tstat_only=1;
|
||||||
|
|
||||||
_allocated=true;
|
_allocated=true;
|
||||||
this->_max_bytes=coeff.row_bytes()+cutsq.row_bytes()+sp_lj.row_bytes();
|
this->_max_bytes=coeff.row_bytes()+cutsq.row_bytes()+sp_lj.row_bytes()+sp_sqrt.row_bytes();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -121,6 +130,7 @@ void DPDT::clear() {
|
|||||||
coeff.clear();
|
coeff.clear();
|
||||||
cutsq.clear();
|
cutsq.clear();
|
||||||
sp_lj.clear();
|
sp_lj.clear();
|
||||||
|
sp_sqrt.clear();
|
||||||
this->clear_atomic();
|
this->clear_atomic();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -144,7 +154,7 @@ int DPDT::loop(const int eflag, const int vflag) {
|
|||||||
this->time_pair.start();
|
this->time_pair.start();
|
||||||
if (shared_types) {
|
if (shared_types) {
|
||||||
this->k_pair_sel->set_size(GX,BX);
|
this->k_pair_sel->set_size(GX,BX);
|
||||||
this->k_pair_sel->run(&this->atom->x, &coeff, &sp_lj,
|
this->k_pair_sel->run(&this->atom->x, &coeff, &sp_lj, &sp_sqrt,
|
||||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||||
&this->ans->force, &this->ans->engv, &eflag,
|
&this->ans->force, &this->ans->engv, &eflag,
|
||||||
&vflag, &ainum, &nbor_pitch, &this->atom->v, &cutsq,
|
&vflag, &ainum, &nbor_pitch, &this->atom->v, &cutsq,
|
||||||
@ -152,7 +162,7 @@ int DPDT::loop(const int eflag, const int vflag) {
|
|||||||
&this->_tstat_only, &this->_threads_per_atom);
|
&this->_tstat_only, &this->_threads_per_atom);
|
||||||
} else {
|
} else {
|
||||||
this->k_pair.set_size(GX,BX);
|
this->k_pair.set_size(GX,BX);
|
||||||
this->k_pair.run(&this->atom->x, &coeff, &_lj_types, &sp_lj,
|
this->k_pair.run(&this->atom->x, &coeff, &_lj_types, &sp_lj, &sp_sqrt,
|
||||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||||
&this->ans->force, &this->ans->engv, &eflag, &vflag,
|
&this->ans->force, &this->ans->engv, &eflag, &vflag,
|
||||||
&ainum, &nbor_pitch, &this->atom->v, &cutsq, &this->_dtinvsqrt,
|
&ainum, &nbor_pitch, &this->atom->v, &cutsq, &this->_dtinvsqrt,
|
||||||
|
|||||||
@ -165,6 +165,7 @@ __kernel void k_dpd(const __global numtyp4 *restrict x_,
|
|||||||
const __global numtyp4 *restrict coeff,
|
const __global numtyp4 *restrict coeff,
|
||||||
const int lj_types,
|
const int lj_types,
|
||||||
const __global numtyp *restrict sp_lj,
|
const __global numtyp *restrict sp_lj,
|
||||||
|
const __global numtyp *restrict sp_sqrt,
|
||||||
const __global int * dev_nbor,
|
const __global int * dev_nbor,
|
||||||
const __global int * dev_packed,
|
const __global int * dev_packed,
|
||||||
__global acctyp4 *restrict ans,
|
__global acctyp4 *restrict ans,
|
||||||
@ -200,11 +201,12 @@ __kernel void k_dpd(const __global numtyp4 *restrict x_,
|
|||||||
numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
|
numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
|
||||||
int itag=iv.w;
|
int itag=iv.w;
|
||||||
|
|
||||||
numtyp factor_dpd;
|
numtyp factor_dpd, factor_sqrt;
|
||||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||||
|
|
||||||
int j=dev_packed[nbor];
|
int j=dev_packed[nbor];
|
||||||
factor_dpd = sp_lj[sbmask(j)];
|
factor_dpd = sp_lj[sbmask(j)];
|
||||||
|
factor_sqrt = sp_sqrt[sbmask(j)];
|
||||||
j &= NEIGHMASK;
|
j &= NEIGHMASK;
|
||||||
|
|
||||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||||
@ -245,8 +247,9 @@ __kernel void k_dpd(const __global numtyp4 *restrict x_,
|
|||||||
numtyp force = (numtyp)0.0;
|
numtyp force = (numtyp)0.0;
|
||||||
if (!tstat_only) force = coeff[mtype].x*wd;
|
if (!tstat_only) force = coeff[mtype].x*wd;
|
||||||
force -= coeff[mtype].y*wd*wd*dot*rinv;
|
force -= coeff[mtype].y*wd*wd*dot*rinv;
|
||||||
force += coeff[mtype].z*wd*randnum*dtinvsqrt;
|
force *= factor_dpd;
|
||||||
force*=factor_dpd*rinv;
|
force += factor_sqrt*coeff[mtype].z*wd*randnum*dtinvsqrt;
|
||||||
|
force*=rinv;
|
||||||
|
|
||||||
f.x+=delx*force;
|
f.x+=delx*force;
|
||||||
f.y+=dely*force;
|
f.y+=dely*force;
|
||||||
@ -278,6 +281,7 @@ __kernel void k_dpd(const __global numtyp4 *restrict x_,
|
|||||||
__kernel void k_dpd_fast(const __global numtyp4 *restrict x_,
|
__kernel void k_dpd_fast(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp4 *restrict coeff_in,
|
const __global numtyp4 *restrict coeff_in,
|
||||||
const __global numtyp *restrict sp_lj_in,
|
const __global numtyp *restrict sp_lj_in,
|
||||||
|
const __global numtyp *restrict sp_sqrt_in,
|
||||||
const __global int * dev_nbor,
|
const __global int * dev_nbor,
|
||||||
const __global int * dev_packed,
|
const __global int * dev_packed,
|
||||||
__global acctyp4 *restrict ans,
|
__global acctyp4 *restrict ans,
|
||||||
@ -295,8 +299,10 @@ __kernel void k_dpd_fast(const __global numtyp4 *restrict x_,
|
|||||||
#ifndef ONETYPE
|
#ifndef ONETYPE
|
||||||
__local numtyp4 coeff[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
__local numtyp4 coeff[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||||
__local numtyp sp_lj[4];
|
__local numtyp sp_lj[4];
|
||||||
|
__local numtyp sp_sqrt[4];
|
||||||
if (tid<4)
|
if (tid<4)
|
||||||
sp_lj[tid]=sp_lj_in[tid];
|
sp_lj[tid]=sp_lj_in[tid];
|
||||||
|
sp_sqrt[tid]=sp_sqrt_in[tid];
|
||||||
if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
|
if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
|
||||||
coeff[tid]=coeff_in[tid];
|
coeff[tid]=coeff_in[tid];
|
||||||
}
|
}
|
||||||
@ -333,12 +339,15 @@ __kernel void k_dpd_fast(const __global numtyp4 *restrict x_,
|
|||||||
numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
|
numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
|
||||||
int itag=iv.w;
|
int itag=iv.w;
|
||||||
|
|
||||||
numtyp factor_dpd;
|
#ifndef ONETYPE
|
||||||
|
numtyp factor_dpd, factor_sqrt;
|
||||||
|
#endif
|
||||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||||
|
|
||||||
int j=dev_packed[nbor];
|
int j=dev_packed[nbor];
|
||||||
#ifndef ONETYPE
|
#ifndef ONETYPE
|
||||||
factor_dpd = sp_lj[sbmask(j)];
|
factor_dpd = sp_lj[sbmask(j)];
|
||||||
|
factor_sqrt = sp_sqrt[sbmask(j)];
|
||||||
j &= NEIGHMASK;
|
j &= NEIGHMASK;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -390,12 +399,13 @@ __kernel void k_dpd_fast(const __global numtyp4 *restrict x_,
|
|||||||
numtyp force = (numtyp)0.0;
|
numtyp force = (numtyp)0.0;
|
||||||
if (!tstat_only) force = coeffx*wd;
|
if (!tstat_only) force = coeffx*wd;
|
||||||
force -= coeffy*wd*wd*dot*rinv;
|
force -= coeffy*wd*wd*dot*rinv;
|
||||||
force += coeffz*wd*randnum*dtinvsqrt;
|
|
||||||
#ifndef ONETYPE
|
#ifndef ONETYPE
|
||||||
force*=factor_dpd*rinv;
|
force *= factor_dpd;
|
||||||
|
force += factor_sqrt*coeffz*wd*randnum*dtinvsqrt;
|
||||||
#else
|
#else
|
||||||
force*=rinv;
|
force += coeffz*wd*randnum*dtinvsqrt;
|
||||||
#endif
|
#endif
|
||||||
|
force*=rinv;
|
||||||
|
|
||||||
f.x+=delx*force;
|
f.x+=delx*force;
|
||||||
f.y+=dely*force;
|
f.y+=dely*force;
|
||||||
|
|||||||
@ -65,7 +65,7 @@ class DPD : public BaseDPD<numtyp, acctyp> {
|
|||||||
UCL_D_Vec<numtyp> cutsq;
|
UCL_D_Vec<numtyp> cutsq;
|
||||||
|
|
||||||
/// Special LJ values
|
/// Special LJ values
|
||||||
UCL_D_Vec<numtyp> sp_lj;
|
UCL_D_Vec<numtyp> sp_lj, sp_sqrt;
|
||||||
|
|
||||||
/// If atom type constants fit in shared memory, use fast kernels
|
/// If atom type constants fit in shared memory, use fast kernels
|
||||||
bool shared_types;
|
bool shared_types;
|
||||||
|
|||||||
@ -313,7 +313,7 @@ void PairDPDGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, in
|
|||||||
int i, j, ii, jj, jnum, itype, jtype;
|
int i, j, ii, jj, jnum, itype, jtype;
|
||||||
double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
|
double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
|
||||||
double vxtmp, vytmp, vztmp, delvx, delvy, delvz;
|
double vxtmp, vytmp, vztmp, delvx, delvy, delvz;
|
||||||
double rsq, r, rinv, dot, wd, randnum, factor_dpd;
|
double rsq, r, rinv, dot, wd, randnum, factor_dpd, factor_sqrt;
|
||||||
int *jlist;
|
int *jlist;
|
||||||
tagint itag, jtag;
|
tagint itag, jtag;
|
||||||
|
|
||||||
@ -344,6 +344,7 @@ void PairDPDGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, in
|
|||||||
for (jj = 0; jj < jnum; jj++) {
|
for (jj = 0; jj < jnum; jj++) {
|
||||||
j = jlist[jj];
|
j = jlist[jj];
|
||||||
factor_dpd = special_lj[sbmask(j)];
|
factor_dpd = special_lj[sbmask(j)];
|
||||||
|
factor_sqrt = special_sqrt[sbmask(j)];
|
||||||
j &= NEIGHMASK;
|
j &= NEIGHMASK;
|
||||||
|
|
||||||
delx = xtmp - x[j][0];
|
delx = xtmp - x[j][0];
|
||||||
@ -378,8 +379,9 @@ void PairDPDGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, in
|
|||||||
|
|
||||||
fpair = a0[itype][jtype]*wd;
|
fpair = a0[itype][jtype]*wd;
|
||||||
fpair -= gamma[itype][jtype]*wd*wd*dot*rinv;
|
fpair -= gamma[itype][jtype]*wd*wd*dot*rinv;
|
||||||
fpair += sigma[itype][jtype] * wd * randnum * dtinvsqrt;
|
fpair *= factor_dpd;
|
||||||
fpair *= factor_dpd * rinv;
|
fpair += factor_sqrt*sigma[itype][jtype]*wd*randnum*dtinvsqrt;
|
||||||
|
fpair *= rinv;
|
||||||
|
|
||||||
f[i][0] += delx * fpair;
|
f[i][0] += delx * fpair;
|
||||||
f[i][1] += dely * fpair;
|
f[i][1] += dely * fpair;
|
||||||
|
|||||||
@ -329,7 +329,7 @@ void PairDPDTstatGPU::cpu_compute(int start, int inum, int /* eflag */, int /* v
|
|||||||
int i, j, ii, jj, jnum, itype, jtype;
|
int i, j, ii, jj, jnum, itype, jtype;
|
||||||
double xtmp, ytmp, ztmp, delx, dely, delz, fpair;
|
double xtmp, ytmp, ztmp, delx, dely, delz, fpair;
|
||||||
double vxtmp, vytmp, vztmp, delvx, delvy, delvz;
|
double vxtmp, vytmp, vztmp, delvx, delvy, delvz;
|
||||||
double rsq, r, rinv, dot, wd, randnum, factor_dpd;
|
double rsq, r, rinv, dot, wd, randnum, factor_dpd, factor_sqrt;
|
||||||
int *jlist;
|
int *jlist;
|
||||||
tagint itag, jtag;
|
tagint itag, jtag;
|
||||||
|
|
||||||
@ -360,6 +360,7 @@ void PairDPDTstatGPU::cpu_compute(int start, int inum, int /* eflag */, int /* v
|
|||||||
for (jj = 0; jj < jnum; jj++) {
|
for (jj = 0; jj < jnum; jj++) {
|
||||||
j = jlist[jj];
|
j = jlist[jj];
|
||||||
factor_dpd = special_lj[sbmask(j)];
|
factor_dpd = special_lj[sbmask(j)];
|
||||||
|
factor_sqrt = special_sqrt[sbmask(j)];
|
||||||
j &= NEIGHMASK;
|
j &= NEIGHMASK;
|
||||||
|
|
||||||
delx = xtmp - x[j][0];
|
delx = xtmp - x[j][0];
|
||||||
@ -392,9 +393,9 @@ void PairDPDTstatGPU::cpu_compute(int start, int inum, int /* eflag */, int /* v
|
|||||||
// drag force = -gamma * wd^2 * (delx dot delv) / r
|
// drag force = -gamma * wd^2 * (delx dot delv) / r
|
||||||
// random force = sigma * wd * rnd * dtinvsqrt;
|
// random force = sigma * wd * rnd * dtinvsqrt;
|
||||||
|
|
||||||
fpair = -gamma[itype][jtype] * wd * wd * dot * rinv;
|
fpair = -factor_dpd * gamma[itype][jtype] * wd * wd * dot * rinv;
|
||||||
fpair += sigma[itype][jtype] * wd * randnum * dtinvsqrt;
|
fpair += factor_sqrt * sigma[itype][jtype] * wd * randnum * dtinvsqrt;
|
||||||
fpair *= factor_dpd * rinv;
|
fpair *= rinv;
|
||||||
|
|
||||||
f[i][0] += delx * fpair;
|
f[i][0] += delx * fpair;
|
||||||
f[i][1] += dely * fpair;
|
f[i][1] += dely * fpair;
|
||||||
|
|||||||
Reference in New Issue
Block a user