port DPD exclusions corrections to GPU package
This commit is contained in:
@ -165,6 +165,7 @@ __kernel void k_dpd(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj,
|
||||
const __global numtyp *restrict sp_sqrt,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
@ -200,11 +201,12 @@ __kernel void k_dpd(const __global numtyp4 *restrict x_,
|
||||
numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
|
||||
int itag=iv.w;
|
||||
|
||||
numtyp factor_dpd;
|
||||
numtyp factor_dpd, factor_sqrt;
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
|
||||
int j=dev_packed[nbor];
|
||||
factor_dpd = sp_lj[sbmask(j)];
|
||||
factor_sqrt = sp_sqrt[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||
@ -245,8 +247,9 @@ __kernel void k_dpd(const __global numtyp4 *restrict x_,
|
||||
numtyp force = (numtyp)0.0;
|
||||
if (!tstat_only) force = coeff[mtype].x*wd;
|
||||
force -= coeff[mtype].y*wd*wd*dot*rinv;
|
||||
force += coeff[mtype].z*wd*randnum*dtinvsqrt;
|
||||
force*=factor_dpd*rinv;
|
||||
force *= factor_dpd;
|
||||
force += factor_sqrt*coeff[mtype].z*wd*randnum*dtinvsqrt;
|
||||
force*=rinv;
|
||||
|
||||
f.x+=delx*force;
|
||||
f.y+=dely*force;
|
||||
@ -278,6 +281,7 @@ __kernel void k_dpd(const __global numtyp4 *restrict x_,
|
||||
__kernel void k_dpd_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global numtyp *restrict sp_sqrt_in,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
@ -295,8 +299,10 @@ __kernel void k_dpd_fast(const __global numtyp4 *restrict x_,
|
||||
#ifndef ONETYPE
|
||||
__local numtyp4 coeff[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||
__local numtyp sp_lj[4];
|
||||
__local numtyp sp_sqrt[4];
|
||||
if (tid<4)
|
||||
sp_lj[tid]=sp_lj_in[tid];
|
||||
sp_sqrt[tid]=sp_sqrt_in[tid];
|
||||
if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
|
||||
coeff[tid]=coeff_in[tid];
|
||||
}
|
||||
@ -333,12 +339,15 @@ __kernel void k_dpd_fast(const __global numtyp4 *restrict x_,
|
||||
numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
|
||||
int itag=iv.w;
|
||||
|
||||
numtyp factor_dpd;
|
||||
#ifndef ONETYPE
|
||||
numtyp factor_dpd, factor_sqrt;
|
||||
#endif
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
|
||||
int j=dev_packed[nbor];
|
||||
#ifndef ONETYPE
|
||||
factor_dpd = sp_lj[sbmask(j)];
|
||||
factor_sqrt = sp_sqrt[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
#endif
|
||||
|
||||
@ -390,12 +399,13 @@ __kernel void k_dpd_fast(const __global numtyp4 *restrict x_,
|
||||
numtyp force = (numtyp)0.0;
|
||||
if (!tstat_only) force = coeffx*wd;
|
||||
force -= coeffy*wd*wd*dot*rinv;
|
||||
force += coeffz*wd*randnum*dtinvsqrt;
|
||||
#ifndef ONETYPE
|
||||
force*=factor_dpd*rinv;
|
||||
force *= factor_dpd;
|
||||
force += factor_sqrt*coeffz*wd*randnum*dtinvsqrt;
|
||||
#else
|
||||
force*=rinv;
|
||||
force += coeffz*wd*randnum*dtinvsqrt;
|
||||
#endif
|
||||
force*=rinv;
|
||||
|
||||
f.x+=delx*force;
|
||||
f.y+=dely*force;
|
||||
|
||||
Reference in New Issue
Block a user