port DPD exclusions corrections to GPU package

This commit is contained in:
Axel Kohlmeyer
2023-01-02 12:04:10 -05:00
parent 37b3ba827f
commit 396d577f40
5 changed files with 43 additions and 20 deletions

View File

@ -165,6 +165,7 @@ __kernel void k_dpd(const __global numtyp4 *restrict x_,
const __global numtyp4 *restrict coeff,
const int lj_types,
const __global numtyp *restrict sp_lj,
const __global numtyp *restrict sp_sqrt,
const __global int * dev_nbor,
const __global int * dev_packed,
__global acctyp4 *restrict ans,
@ -200,11 +201,12 @@ __kernel void k_dpd(const __global numtyp4 *restrict x_,
numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
int itag=iv.w;
numtyp factor_dpd;
numtyp factor_dpd, factor_sqrt;
for ( ; nbor<nbor_end; nbor+=n_stride) {
int j=dev_packed[nbor];
factor_dpd = sp_lj[sbmask(j)];
factor_sqrt = sp_sqrt[sbmask(j)];
j &= NEIGHMASK;
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
@ -245,8 +247,9 @@ __kernel void k_dpd(const __global numtyp4 *restrict x_,
numtyp force = (numtyp)0.0;
if (!tstat_only) force = coeff[mtype].x*wd;
force -= coeff[mtype].y*wd*wd*dot*rinv;
force += coeff[mtype].z*wd*randnum*dtinvsqrt;
force*=factor_dpd*rinv;
force *= factor_dpd;
force += factor_sqrt*coeff[mtype].z*wd*randnum*dtinvsqrt;
force*=rinv;
f.x+=delx*force;
f.y+=dely*force;
@ -278,6 +281,7 @@ __kernel void k_dpd(const __global numtyp4 *restrict x_,
__kernel void k_dpd_fast(const __global numtyp4 *restrict x_,
const __global numtyp4 *restrict coeff_in,
const __global numtyp *restrict sp_lj_in,
const __global numtyp *restrict sp_sqrt_in,
const __global int * dev_nbor,
const __global int * dev_packed,
__global acctyp4 *restrict ans,
@ -295,8 +299,10 @@ __kernel void k_dpd_fast(const __global numtyp4 *restrict x_,
#ifndef ONETYPE
__local numtyp4 coeff[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
__local numtyp sp_lj[4];
__local numtyp sp_sqrt[4];
if (tid<4)
sp_lj[tid]=sp_lj_in[tid];
sp_sqrt[tid]=sp_sqrt_in[tid];
if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
coeff[tid]=coeff_in[tid];
}
@ -333,12 +339,15 @@ __kernel void k_dpd_fast(const __global numtyp4 *restrict x_,
numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
int itag=iv.w;
numtyp factor_dpd;
#ifndef ONETYPE
numtyp factor_dpd, factor_sqrt;
#endif
for ( ; nbor<nbor_end; nbor+=n_stride) {
int j=dev_packed[nbor];
#ifndef ONETYPE
factor_dpd = sp_lj[sbmask(j)];
factor_sqrt = sp_sqrt[sbmask(j)];
j &= NEIGHMASK;
#endif
@ -390,12 +399,13 @@ __kernel void k_dpd_fast(const __global numtyp4 *restrict x_,
numtyp force = (numtyp)0.0;
if (!tstat_only) force = coeffx*wd;
force -= coeffy*wd*wd*dot*rinv;
force += coeffz*wd*randnum*dtinvsqrt;
#ifndef ONETYPE
force*=factor_dpd*rinv;
force *= factor_dpd;
force += factor_sqrt*coeffz*wd*randnum*dtinvsqrt;
#else
force*=rinv;
force += coeffz*wd*randnum*dtinvsqrt;
#endif
force*=rinv;
f.x+=delx*force;
f.y+=dely*force;