Updated sph_lj kernels
This commit is contained in:
@ -29,27 +29,72 @@ _texture_2d( vel_tex,int4);
|
||||
|
||||
#if (SHUFFLE_AVAIL == 0)
|
||||
|
||||
#define store_drhoE(drhoI, deltaE, ii, inum, tid, t_per_atom, offset, \
|
||||
drhoE) \
|
||||
#define store_drhoE(drhoEacc, ii, inum, tid, t_per_atom, offset, drhoE) \
|
||||
if (t_per_atom>1) { \
|
||||
simdsync(); \
|
||||
simd_reduce_add2(t_per_atom, red_acc, offset, tid, rhoEi, deltaE); \
|
||||
simd_reduce_add2(t_per_atom, red_acc, offset, tid, \
|
||||
drhoEacc.x, drhoEacc.y); \
|
||||
} \
|
||||
if (offset==0 && ii<inum) { \
|
||||
drhoE[ii].x=drhoI; \
|
||||
drhoE[ii].y=deltaE; \
|
||||
drhoE[ii]=drhoEacc; \
|
||||
}
|
||||
#else
|
||||
#define store_drhoE(drhoI, deltaE, ii, inum, tid, t_per_atom, offset, drhoE) \
|
||||
#define store_drhoE(drhoEacc, ii, inum, tid, t_per_atom, offset, drhoE) \
|
||||
if (t_per_atom>1) { \
|
||||
simd_reduce_add2(t_per_atom,drhoI,deltaE); \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
drhoEacc.x += shfl_down(drhoEacc.x, s, t_per_atom); \
|
||||
drhoEacc.y += shfl_down(drhoEacc.y, s, t_per_atom); \
|
||||
} \
|
||||
} \
|
||||
if (offset==0 && ii<inum) { \
|
||||
drhoE[ii].x=drhoI; \
|
||||
drhoE[ii].y=deltaE; \
|
||||
drhoE[ii].x=drhoEacc; \
|
||||
}
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------------ */
|
||||
/* Lennard-Jones EOS,
|
||||
Francis H. Ree
|
||||
"Analytic representation of thermodynamic data for the Lennard‐Jones fluid",
|
||||
Journal of Chemical Physics 73 pp. 5401-5403 (1980)
|
||||
return p = pc[0], c = pc[1]
|
||||
*/
|
||||
|
||||
void LJEOS2(const numtyp rho, const numtyp e, const numtyp cv, numtyp pc[2])
|
||||
{
|
||||
numtyp T = e/cv;
|
||||
numtyp beta = ucl_recip(T); // (numtyp)1.0 / T;
|
||||
numtyp beta_sqrt = ucl_sqrt(beta);
|
||||
numtyp x = rho * ucl_sqrt(beta_sqrt);
|
||||
|
||||
numtyp xsq = x * x;
|
||||
numtyp xpow3 = xsq * x;
|
||||
numtyp xpow4 = xsq * xsq;
|
||||
|
||||
/* differential of Helmholtz free energy w.r.t. x */
|
||||
numtyp diff_A_NkT = (numtyp)3.629 + (numtyp)7.264*x -
|
||||
beta*((numtyp)3.492 - (numtyp)18.698*x + (numtyp)35.505*xsq - (numtyp)31.816*xpow3 +
|
||||
(numtyp)11.195*xpow4) - beta_sqrt*((numtyp)5.369 + (numtyp)13.16*x +
|
||||
(numtyp)18.525*xsq - (numtyp)17.076*xpow3 + (numtyp)9.32*xpow4) +
|
||||
(numtyp)10.4925*xsq + (numtyp)11.46*xpow3 + (numtyp)2.176*xpow4*xpow4*x;
|
||||
|
||||
/* differential of Helmholtz free energy w.r.t. x^2 */
|
||||
numtyp d2A_dx2 = (numtyp)7.264 + (numtyp)20.985*x +
|
||||
beta*((numtyp)18.698 - (numtyp)71.01*x + (numtyp)95.448*xsq - (numtyp)44.78*xpow3) -
|
||||
beta_sqrt*((numtyp)13.16 + (numtyp)37.05*x - (numtyp)51.228*xsq + (numtyp)37.28*xpow3) +
|
||||
(numtyp)34.38*xsq + (numtyp)19.584*xpow4*xpow4;
|
||||
|
||||
// p = rho k T * (1 + rho * d(A/(NkT))/drho)
|
||||
// dx/drho = rho/x
|
||||
pc[0] = rho * T * ((numtyp)1.0 + diff_A_NkT * x); // pressure
|
||||
numtyp csq = T * ((numtyp)1.0 + (numtyp)2.0 * diff_A_NkT * x + d2A_dx2 * x * x); // soundspeed squared
|
||||
if (csq > (numtyp)0.0) {
|
||||
pc[1] = ucl_sqrt(csq); // soundspeed
|
||||
} else {
|
||||
pc[1] = (numtyp)0.0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void k_sph_lj(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict extra,
|
||||
const __global numtyp4 *restrict coeff,
|
||||
@ -87,7 +132,6 @@ __kernel void k_sph_lj(const __global numtyp4 *restrict x_,
|
||||
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
int itype=ix.w;
|
||||
numtyp mass_itype = mass[itype];
|
||||
numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
|
||||
|
||||
const numtyp4 extrai = extra[i];
|
||||
@ -97,11 +141,12 @@ __kernel void k_sph_lj(const __global numtyp4 *restrict x_,
|
||||
numtyp massi= extrai.w;
|
||||
|
||||
// compute pressure of particle i with LJ EOS
|
||||
numtyp fi, ci;
|
||||
//LJEOS2(rho[i], esph[i], cv[i], &fi, &ci);
|
||||
//fi /= (rho[i] * rho[i]);
|
||||
numtyp fci[2];
|
||||
LJEOS2(rhoi, esphi, cvi, fci);
|
||||
numtyp fi = fci[0];
|
||||
numtyp ci = fci[1];
|
||||
fi /= (rhoi * rhoi);
|
||||
|
||||
numtyp factor_lj;
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
ucl_prefetch(dev_packed+nbor+n_stride);
|
||||
|
||||
@ -132,21 +177,23 @@ __kernel void k_sph_lj(const __global numtyp4 *restrict x_,
|
||||
numtyp massj= extraj.w;
|
||||
|
||||
numtyp h = coeffy; // cut[itype][jtype]
|
||||
ih = (numtyp) 1.0 / h;
|
||||
ih = ucl_recip(h); // (numtyp)1.0 / h;
|
||||
numtyp ihsq = ih * ih;
|
||||
numtyp ihcub = ihsq * ih;
|
||||
|
||||
numtyp wfd = h - ucl_sqrt(rsq);
|
||||
// domain->dimension == 3 Lucy Kernel, 3d
|
||||
wfd = -25.066903536973515383e0 * wfd * wfd * ihsq * ihsq * ihsq * ih;
|
||||
wfd = (numtyp)-25.066903536973515383 * wfd * wfd * ihsq * ihsq * ihsq * ih;
|
||||
|
||||
// Lucy Kernel, 2d
|
||||
//wfd = -19.098593171027440292e0 * wfd * wfd * ihsq * ihsq * ihsq;
|
||||
|
||||
// function call to LJ EOS
|
||||
numtyp fj, cj;
|
||||
//LJEOS2(rho[j], esph[j], cv[j], &fj, &cj);
|
||||
//fj /= (rho[j] * rho[j]);
|
||||
numtyp fcj[2];
|
||||
LJEOS2(rhoj, esphj, cvj, fcj);
|
||||
numtyp fj = fcj[0];
|
||||
numtyp cj = fcj[1];
|
||||
fj /= (rhoj * rhoj);
|
||||
|
||||
// apply long-range correction to model a LJ fluid with cutoff
|
||||
// this implies that the modelled LJ fluid has cutoff == SPH cutoff
|
||||
@ -163,7 +210,7 @@ __kernel void k_sph_lj(const __global numtyp4 *restrict x_,
|
||||
// artificial viscosity (Monaghan 1992)
|
||||
numtyp fvisc = (numtyp)0;
|
||||
if (delVdotDelR < (numyp)0) {
|
||||
mu = h * delVdotDelR / (rsq + (numyp)0.01 * h * h);
|
||||
numtyp mu = h * delVdotDelR / (rsq + (numyp)0.01 * h * h);
|
||||
fvisc = -coeffx * (ci + cj) * mu / (rhoi + rhoj); // viscosity[itype][jtype]
|
||||
}
|
||||
|
||||
@ -229,7 +276,8 @@ __kernel void k_sph_lj_fast(const __global numtyp4 *restrict x_,
|
||||
__syncthreads();
|
||||
#else
|
||||
const numtyp coeffx=coeff_in[ONETYPE].x; // viscosity[itype][jtype]
|
||||
const numtyp coeffy=coeff_in[ONETYPE].y; // cutsq[itype][jtype]
|
||||
const numtyp coeffy=coeff_in[ONETYPE].y; // cut[itype][jtype]
|
||||
const numtyp cutsq_p=coeff_in[ONETYPE].z; // cutsq[itype][jtype]
|
||||
#endif
|
||||
|
||||
int n_stride;
|
||||
@ -242,7 +290,8 @@ __kernel void k_sph_lj_fast(const __global numtyp4 *restrict x_,
|
||||
energy=(acctyp)0;
|
||||
for (int i=0; i<6; i++) virial[i]=(acctyp)0;
|
||||
}
|
||||
acctyp Qi = (acctyp)0;
|
||||
acctyp2 drhoEacc;
|
||||
drhoEacc.x = drhoEacc.x = (acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
int i, numj, nbor, nbor_end;
|
||||
@ -258,13 +307,19 @@ __kernel void k_sph_lj_fast(const __global numtyp4 *restrict x_,
|
||||
numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
|
||||
int itag=iv.w;
|
||||
|
||||
const numtyp4 Tcvi = extra[i];
|
||||
numtyp Ti = Tcvi.x;
|
||||
numtyp cvi = Tcvi.y;
|
||||
const numtyp4 extrai = extra[i];
|
||||
numtyp rhoi = extrai.x;
|
||||
numtyp esphi = extrai.y;
|
||||
numtyp cvi = extrai.z;
|
||||
numtyp massi= extrai.w;
|
||||
|
||||
// compute pressure of particle i with LJ EOS
|
||||
numtyp fci[2];
|
||||
LJEOS2(rhoi, esphi, cvi, fci);
|
||||
numtyp fi = fci[0];
|
||||
numtyp ci = fci[1];
|
||||
fi /= (rhoi * rhoi);
|
||||
|
||||
#ifndef ONETYPE
|
||||
numtyp factor_dpd;
|
||||
#endif
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
ucl_prefetch(dev_packed+nbor+n_stride);
|
||||
|
||||
@ -289,103 +344,65 @@ __kernel void k_sph_lj_fast(const __global numtyp4 *restrict x_,
|
||||
numtyp rsq = delx*delx+dely*dely+delz*delz;
|
||||
|
||||
if (rsq<cutsq_p) {
|
||||
numtyp r=ucl_sqrt(rsq);
|
||||
if (r < EPSILON) continue;
|
||||
#ifndef ONETYPE
|
||||
const numtyp coeffx=coeff[mtype].x; // viscosity[itype][jtype]
|
||||
const numtyp coeffy=coeff[mtype].y; // cut[itype][jtype]
|
||||
#endif
|
||||
const numtyp4 extraj = extra[j];
|
||||
numtyp rhoj = extraj.x;
|
||||
numtyp esphj = extraj.y;
|
||||
numtyp cvj = extraj.z;
|
||||
numtyp massj= extraj.w;
|
||||
|
||||
numtyp rinv=ucl_recip(r);
|
||||
numtyp h = coeffy; // cut[itype][jtype]
|
||||
ih = ih = ucl_recip(h); // (numtyp)1.0 / h;
|
||||
numtyp ihsq = ih * ih;
|
||||
numtyp ihcub = ihsq * ih;
|
||||
|
||||
numtyp wfd = h - ucl_sqrt(rsq);
|
||||
// domain->dimension == 3 Lucy Kernel, 3d
|
||||
wfd = (numtyp)-25.066903536973515383 * wfd * wfd * ihsq * ihsq * ihsq * ih;
|
||||
|
||||
// Lucy Kernel, 2d
|
||||
//wfd = -19.098593171027440292e0 * wfd * wfd * ihsq * ihsq * ihsq;
|
||||
|
||||
// function call to LJ EOS
|
||||
numtyp fcj[2];
|
||||
LJEOS2(rhoj, esphj, cvj, fcj);
|
||||
numtyp fj = fcj[0];
|
||||
numtyp cj = fcj[1];
|
||||
fj /= (rhoj * rhoj);
|
||||
|
||||
// apply long-range correction to model a LJ fluid with cutoff
|
||||
// this implies that the modelled LJ fluid has cutoff == SPH cutoff
|
||||
numtyp lrc = (numtyp)-11.1701 * (ihcub * ihcub * ihcub - (numtyp)1.5 * ihcub);
|
||||
fi += lrc;
|
||||
fj += lrc;
|
||||
|
||||
// dot product of velocity delta and distance vector
|
||||
numtyp delvx = iv.x - jv.x;
|
||||
numtyp delvy = iv.y - jv.y;
|
||||
numtyp delvz = iv.z - jv.z;
|
||||
numtyp dot = delx*delvx + dely*delvy + delz*delvz;
|
||||
numtyp vijeij = dot*rinv;
|
||||
numtyp delVdotDelR = delx*delvx + dely*delvy + delz*delvz;
|
||||
|
||||
#ifndef ONETYPE
|
||||
const numtyp coeffx=coeff[mtype].x; // a0[itype][jtype]
|
||||
const numtyp coeffy=coeff[mtype].y; // gamma[itype][jtype]
|
||||
#endif
|
||||
|
||||
const numtyp4 Tcvj = extra[j];
|
||||
numtyp Tj = Tcvj.x;
|
||||
numtyp cvj = Tcvj.y;
|
||||
|
||||
unsigned int tag1=itag, tag2=jtag;
|
||||
if (tag1 > tag2) {
|
||||
tag1 = jtag; tag2 = itag;
|
||||
}
|
||||
numtyp randnum = (numtyp)0.0;
|
||||
saru(tag1, tag2, seed, timestep, randnum);
|
||||
|
||||
numtyp T_ij=(numtyp)0.5*(Ti+Tj);
|
||||
numtyp4 T_pow;
|
||||
T_pow.x = T_ij - (numtyp)1.0;
|
||||
T_pow.y = T_pow.x*T_pow.x;
|
||||
T_pow.z = T_pow.x*T_pow.y;
|
||||
T_pow.w = T_pow.x*T_pow.z;
|
||||
|
||||
numtyp power_d = coeff2x; // power[itype][jtype]
|
||||
if (power_flag) {
|
||||
numtyp factor = (numtyp)1.0;
|
||||
factor += scx*T_pow.x + scy*T_pow.y + scz*T_pow.z + scw*T_pow.w;
|
||||
power_d *= factor;
|
||||
// artificial viscosity (Monaghan 1992)
|
||||
numtyp fvisc = (numtyp)0;
|
||||
if (delVdotDelR < (numyp)0) {
|
||||
numtyp mu = h * delVdotDelR / (rsq + (numyp)0.01 * h * h);
|
||||
fvisc = -coeffx * (ci + cj) * mu / (rhoi + rhoj); // viscosity[itype][jtype]
|
||||
}
|
||||
|
||||
power_d = MAX((numtyp)0.01,power_d);
|
||||
numtyp wc = (numtyp)1.0 - r/coeffz; // cut[itype][jtype]
|
||||
wc = MAX((numtyp)0.0,MIN((numtyp)1.0,wc));
|
||||
numtyp wr = ucl_pow((numtyp)wc, (numtyp)0.5*power_d);
|
||||
|
||||
numtyp kboltz = (numtyp)1.0;
|
||||
numtyp GammaIJ = coeffy; // gamma[itype][jtype]
|
||||
numtyp SigmaIJ = (numtyp)4.0*GammaIJ*kboltz*Ti*Tj/(Ti+Tj);
|
||||
SigmaIJ = ucl_sqrt(SigmaIJ);
|
||||
|
||||
numtyp force = coeffx*T_ij*wc; // a0[itype][jtype]
|
||||
force -= GammaIJ *wr*wr *dot*rinv;
|
||||
force += SigmaIJ* wr *randnum * dtinvsqrt;
|
||||
#ifndef ONETYPE
|
||||
force *= factor_dpd*rinv;
|
||||
#else
|
||||
force *= rinv;
|
||||
#endif
|
||||
// total pair force & thermal energy increment
|
||||
numtyp force = -massi * massj * (fi + fj + fvisc) * wfd;
|
||||
numtyp deltaE = (numtyp)-0.5 * force * delVdotDelR;
|
||||
|
||||
f.x+=delx*force;
|
||||
f.y+=dely*force;
|
||||
f.z+=delz*force;
|
||||
|
||||
// heat transfer
|
||||
|
||||
if (r < coeff2w) {
|
||||
numtyp wrT = (numtyp)1.0 - r/coeff2w;
|
||||
wrT = MAX((numtyp)0.0,MIN((numtyp)1.0,wrT));
|
||||
wrT = ucl_pow(wrT, (numtyp)0.5*coeff2z); // powerT[itype][jtype]
|
||||
numtyp randnumT = (numtyp)0;
|
||||
saru(tag1, tag2, seed+tag1+tag2, timestep, randnumT); // randomT->gaussian();
|
||||
randnumT = MAX((numtyp)-5.0,MIN(randnum,(numtyp)5.0));
|
||||
|
||||
numtyp kappaT = coeff2y; // kappa[itype][jtype]
|
||||
if (kappa_flag) {
|
||||
numtyp factor = (numtyp)1.0;
|
||||
factor += kcx*T_pow.x + kcy*T_pow.y + kcz*T_pow.z + kcw*T_pow.w;
|
||||
kappaT *= factor;
|
||||
}
|
||||
|
||||
numtyp kij = cvi*cvj*kappaT * T_ij*T_ij;
|
||||
numtyp alphaij = ucl_sqrt((numtyp)2.0*kboltz*kij);
|
||||
|
||||
numtyp dQc = kij * wrT*wrT * (Tj - Ti )/(Ti*Tj);
|
||||
numtyp dQd = wr*wr*( GammaIJ * vijeij*vijeij - SigmaIJ*SigmaIJ/mass_itype ) - SigmaIJ * wr *vijeij *randnum;
|
||||
dQd /= (cvi+cvj);
|
||||
numtyp dQr = alphaij * wrT * dtinvsqrt * randnumT;
|
||||
Qi += (dQc + dQd + dQr );
|
||||
}
|
||||
|
||||
if (EVFLAG && eflag) {
|
||||
numtyp e = (numtyp)0.5*coeffx*T_ij*coeffz * wc*wc;
|
||||
#ifndef ONETYPE
|
||||
energy+=factor_dpd*e;
|
||||
#else
|
||||
numtyp e = (numtyp)0;
|
||||
energy+=e;
|
||||
#endif
|
||||
}
|
||||
if (EVFLAG && vflag) {
|
||||
virial[0] += delx*delx*force;
|
||||
@ -401,6 +418,6 @@ __kernel void k_sph_lj_fast(const __global numtyp4 *restrict x_,
|
||||
} // if ii
|
||||
|
||||
store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv);
|
||||
store_drhoE(Qi,ii,inum,tid,t_per_atom,offset,Q);
|
||||
store_drhoE(drhoEacc,ii,inum,tid,t_per_atom,offset,drhoE);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user