Updated sph_lj kernels

This commit is contained in:
Trung Nguyen
2023-12-07 11:41:37 -06:00
parent 379d3c8e20
commit fef28c9daa

View File

@ -29,27 +29,72 @@ _texture_2d( vel_tex,int4);
#if (SHUFFLE_AVAIL == 0)
#define store_drhoE(drhoI, deltaE, ii, inum, tid, t_per_atom, offset, \
drhoE) \
#define store_drhoE(drhoEacc, ii, inum, tid, t_per_atom, offset, drhoE) \
if (t_per_atom>1) { \
simdsync(); \
simd_reduce_add2(t_per_atom, red_acc, offset, tid, rhoEi, deltaE); \
simd_reduce_add2(t_per_atom, red_acc, offset, tid, \
drhoEacc.x, drhoEacc.y); \
} \
if (offset==0 && ii<inum) { \
drhoE[ii].x=drhoI; \
drhoE[ii].y=deltaE; \
drhoE[ii]=drhoEacc; \
}
#else
#define store_drhoE(drhoI, deltaE, ii, inum, tid, t_per_atom, offset, drhoE) \
#define store_drhoE(drhoEacc, ii, inum, tid, t_per_atom, offset, drhoE) \
if (t_per_atom>1) { \
simd_reduce_add2(t_per_atom,drhoI,deltaE); \
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
drhoEacc.x += shfl_down(drhoEacc.x, s, t_per_atom); \
drhoEacc.y += shfl_down(drhoEacc.y, s, t_per_atom); \
} \
} \
if (offset==0 && ii<inum) { \
drhoE[ii].x=drhoI; \
drhoE[ii].y=deltaE; \
drhoE[ii].x=drhoEacc; \
}
#endif
/* ------------------------------------------------------------------------ */
/* Lennard-Jones EOS,
Francis H. Ree
"Analytic representation of thermodynamic data for the LennardJones fluid",
Journal of Chemical Physics 73 pp. 5401-5403 (1980)
return p = pc[0], c = pc[1]
*/
void LJEOS2(const numtyp rho, const numtyp e, const numtyp cv, numtyp pc[2])
{
numtyp T = e/cv;
numtyp beta = ucl_recip(T); // (numtyp)1.0 / T;
numtyp beta_sqrt = ucl_sqrt(beta);
numtyp x = rho * ucl_sqrt(beta_sqrt);
numtyp xsq = x * x;
numtyp xpow3 = xsq * x;
numtyp xpow4 = xsq * xsq;
/* differential of Helmholtz free energy w.r.t. x */
numtyp diff_A_NkT = (numtyp)3.629 + (numtyp)7.264*x -
beta*((numtyp)3.492 - (numtyp)18.698*x + (numtyp)35.505*xsq - (numtyp)31.816*xpow3 +
(numtyp)11.195*xpow4) - beta_sqrt*((numtyp)5.369 + (numtyp)13.16*x +
(numtyp)18.525*xsq - (numtyp)17.076*xpow3 + (numtyp)9.32*xpow4) +
(numtyp)10.4925*xsq + (numtyp)11.46*xpow3 + (numtyp)2.176*xpow4*xpow4*x;
/* differential of Helmholtz free energy w.r.t. x^2 */
numtyp d2A_dx2 = (numtyp)7.264 + (numtyp)20.985*x +
beta*((numtyp)18.698 - (numtyp)71.01*x + (numtyp)95.448*xsq - (numtyp)44.78*xpow3) -
beta_sqrt*((numtyp)13.16 + (numtyp)37.05*x - (numtyp)51.228*xsq + (numtyp)37.28*xpow3) +
(numtyp)34.38*xsq + (numtyp)19.584*xpow4*xpow4;
// p = rho k T * (1 + rho * d(A/(NkT))/drho)
// dx/drho = rho/x
pc[0] = rho * T * ((numtyp)1.0 + diff_A_NkT * x); // pressure
numtyp csq = T * ((numtyp)1.0 + (numtyp)2.0 * diff_A_NkT * x + d2A_dx2 * x * x); // soundspeed squared
if (csq > (numtyp)0.0) {
pc[1] = ucl_sqrt(csq); // soundspeed
} else {
pc[1] = (numtyp)0.0;
}
}
__kernel void k_sph_lj(const __global numtyp4 *restrict x_,
const __global numtyp4 *restrict extra,
const __global numtyp4 *restrict coeff,
@ -87,7 +132,6 @@ __kernel void k_sph_lj(const __global numtyp4 *restrict x_,
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
int itype=ix.w;
numtyp mass_itype = mass[itype];
numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
const numtyp4 extrai = extra[i];
@ -97,11 +141,12 @@ __kernel void k_sph_lj(const __global numtyp4 *restrict x_,
numtyp massi= extrai.w;
// compute pressure of particle i with LJ EOS
numtyp fi, ci;
//LJEOS2(rho[i], esph[i], cv[i], &fi, &ci);
//fi /= (rho[i] * rho[i]);
numtyp fci[2];
LJEOS2(rhoi, esphi, cvi, fci);
numtyp fi = fci[0];
numtyp ci = fci[1];
fi /= (rhoi * rhoi);
numtyp factor_lj;
for ( ; nbor<nbor_end; nbor+=n_stride) {
ucl_prefetch(dev_packed+nbor+n_stride);
@ -132,21 +177,23 @@ __kernel void k_sph_lj(const __global numtyp4 *restrict x_,
numtyp massj= extraj.w;
numtyp h = coeffy; // cut[itype][jtype]
ih = (numtyp) 1.0 / h;
ih = ucl_recip(h); // (numtyp)1.0 / h;
numtyp ihsq = ih * ih;
numtyp ihcub = ihsq * ih;
numtyp wfd = h - ucl_sqrt(rsq);
// domain->dimension == 3 Lucy Kernel, 3d
wfd = -25.066903536973515383e0 * wfd * wfd * ihsq * ihsq * ihsq * ih;
wfd = (numtyp)-25.066903536973515383 * wfd * wfd * ihsq * ihsq * ihsq * ih;
// Lucy Kernel, 2d
//wfd = -19.098593171027440292e0 * wfd * wfd * ihsq * ihsq * ihsq;
// function call to LJ EOS
numtyp fj, cj;
//LJEOS2(rho[j], esph[j], cv[j], &fj, &cj);
//fj /= (rho[j] * rho[j]);
numtyp fcj[2];
LJEOS2(rhoj, esphj, cvj, fcj);
numtyp fj = fcj[0];
numtyp cj = fcj[1];
fj /= (rhoj * rhoj);
// apply long-range correction to model a LJ fluid with cutoff
// this implies that the modelled LJ fluid has cutoff == SPH cutoff
@ -163,7 +210,7 @@ __kernel void k_sph_lj(const __global numtyp4 *restrict x_,
// artificial viscosity (Monaghan 1992)
numtyp fvisc = (numtyp)0;
if (delVdotDelR < (numyp)0) {
mu = h * delVdotDelR / (rsq + (numyp)0.01 * h * h);
numtyp mu = h * delVdotDelR / (rsq + (numyp)0.01 * h * h);
fvisc = -coeffx * (ci + cj) * mu / (rhoi + rhoj); // viscosity[itype][jtype]
}
@ -229,7 +276,8 @@ __kernel void k_sph_lj_fast(const __global numtyp4 *restrict x_,
__syncthreads();
#else
const numtyp coeffx=coeff_in[ONETYPE].x; // viscosity[itype][jtype]
const numtyp coeffy=coeff_in[ONETYPE].y; // cutsq[itype][jtype]
const numtyp coeffy=coeff_in[ONETYPE].y; // cut[itype][jtype]
const numtyp cutsq_p=coeff_in[ONETYPE].z; // cutsq[itype][jtype]
#endif
int n_stride;
@ -242,7 +290,8 @@ __kernel void k_sph_lj_fast(const __global numtyp4 *restrict x_,
energy=(acctyp)0;
for (int i=0; i<6; i++) virial[i]=(acctyp)0;
}
acctyp Qi = (acctyp)0;
acctyp2 drhoEacc;
drhoEacc.x = drhoEacc.x = (acctyp)0;
if (ii<inum) {
int i, numj, nbor, nbor_end;
@ -258,13 +307,19 @@ __kernel void k_sph_lj_fast(const __global numtyp4 *restrict x_,
numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
int itag=iv.w;
const numtyp4 Tcvi = extra[i];
numtyp Ti = Tcvi.x;
numtyp cvi = Tcvi.y;
const numtyp4 extrai = extra[i];
numtyp rhoi = extrai.x;
numtyp esphi = extrai.y;
numtyp cvi = extrai.z;
numtyp massi= extrai.w;
// compute pressure of particle i with LJ EOS
numtyp fci[2];
LJEOS2(rhoi, esphi, cvi, fci);
numtyp fi = fci[0];
numtyp ci = fci[1];
fi /= (rhoi * rhoi);
#ifndef ONETYPE
numtyp factor_dpd;
#endif
for ( ; nbor<nbor_end; nbor+=n_stride) {
ucl_prefetch(dev_packed+nbor+n_stride);
@ -289,103 +344,65 @@ __kernel void k_sph_lj_fast(const __global numtyp4 *restrict x_,
numtyp rsq = delx*delx+dely*dely+delz*delz;
if (rsq<cutsq_p) {
numtyp r=ucl_sqrt(rsq);
if (r < EPSILON) continue;
#ifndef ONETYPE
const numtyp coeffx=coeff[mtype].x; // viscosity[itype][jtype]
const numtyp coeffy=coeff[mtype].y; // cut[itype][jtype]
#endif
const numtyp4 extraj = extra[j];
numtyp rhoj = extraj.x;
numtyp esphj = extraj.y;
numtyp cvj = extraj.z;
numtyp massj= extraj.w;
numtyp rinv=ucl_recip(r);
numtyp h = coeffy; // cut[itype][jtype]
ih = ih = ucl_recip(h); // (numtyp)1.0 / h;
numtyp ihsq = ih * ih;
numtyp ihcub = ihsq * ih;
numtyp wfd = h - ucl_sqrt(rsq);
// domain->dimension == 3 Lucy Kernel, 3d
wfd = (numtyp)-25.066903536973515383 * wfd * wfd * ihsq * ihsq * ihsq * ih;
// Lucy Kernel, 2d
//wfd = -19.098593171027440292e0 * wfd * wfd * ihsq * ihsq * ihsq;
// function call to LJ EOS
numtyp fcj[2];
LJEOS2(rhoj, esphj, cvj, fcj);
numtyp fj = fcj[0];
numtyp cj = fcj[1];
fj /= (rhoj * rhoj);
// apply long-range correction to model a LJ fluid with cutoff
// this implies that the modelled LJ fluid has cutoff == SPH cutoff
numtyp lrc = (numtyp)-11.1701 * (ihcub * ihcub * ihcub - (numtyp)1.5 * ihcub);
fi += lrc;
fj += lrc;
// dot product of velocity delta and distance vector
numtyp delvx = iv.x - jv.x;
numtyp delvy = iv.y - jv.y;
numtyp delvz = iv.z - jv.z;
numtyp dot = delx*delvx + dely*delvy + delz*delvz;
numtyp vijeij = dot*rinv;
numtyp delVdotDelR = delx*delvx + dely*delvy + delz*delvz;
#ifndef ONETYPE
const numtyp coeffx=coeff[mtype].x; // a0[itype][jtype]
const numtyp coeffy=coeff[mtype].y; // gamma[itype][jtype]
#endif
const numtyp4 Tcvj = extra[j];
numtyp Tj = Tcvj.x;
numtyp cvj = Tcvj.y;
unsigned int tag1=itag, tag2=jtag;
if (tag1 > tag2) {
tag1 = jtag; tag2 = itag;
}
numtyp randnum = (numtyp)0.0;
saru(tag1, tag2, seed, timestep, randnum);
numtyp T_ij=(numtyp)0.5*(Ti+Tj);
numtyp4 T_pow;
T_pow.x = T_ij - (numtyp)1.0;
T_pow.y = T_pow.x*T_pow.x;
T_pow.z = T_pow.x*T_pow.y;
T_pow.w = T_pow.x*T_pow.z;
numtyp power_d = coeff2x; // power[itype][jtype]
if (power_flag) {
numtyp factor = (numtyp)1.0;
factor += scx*T_pow.x + scy*T_pow.y + scz*T_pow.z + scw*T_pow.w;
power_d *= factor;
// artificial viscosity (Monaghan 1992)
numtyp fvisc = (numtyp)0;
if (delVdotDelR < (numyp)0) {
numtyp mu = h * delVdotDelR / (rsq + (numyp)0.01 * h * h);
fvisc = -coeffx * (ci + cj) * mu / (rhoi + rhoj); // viscosity[itype][jtype]
}
power_d = MAX((numtyp)0.01,power_d);
numtyp wc = (numtyp)1.0 - r/coeffz; // cut[itype][jtype]
wc = MAX((numtyp)0.0,MIN((numtyp)1.0,wc));
numtyp wr = ucl_pow((numtyp)wc, (numtyp)0.5*power_d);
numtyp kboltz = (numtyp)1.0;
numtyp GammaIJ = coeffy; // gamma[itype][jtype]
numtyp SigmaIJ = (numtyp)4.0*GammaIJ*kboltz*Ti*Tj/(Ti+Tj);
SigmaIJ = ucl_sqrt(SigmaIJ);
numtyp force = coeffx*T_ij*wc; // a0[itype][jtype]
force -= GammaIJ *wr*wr *dot*rinv;
force += SigmaIJ* wr *randnum * dtinvsqrt;
#ifndef ONETYPE
force *= factor_dpd*rinv;
#else
force *= rinv;
#endif
// total pair force & thermal energy increment
numtyp force = -massi * massj * (fi + fj + fvisc) * wfd;
numtyp deltaE = (numtyp)-0.5 * force * delVdotDelR;
f.x+=delx*force;
f.y+=dely*force;
f.z+=delz*force;
// heat transfer
if (r < coeff2w) {
numtyp wrT = (numtyp)1.0 - r/coeff2w;
wrT = MAX((numtyp)0.0,MIN((numtyp)1.0,wrT));
wrT = ucl_pow(wrT, (numtyp)0.5*coeff2z); // powerT[itype][jtype]
numtyp randnumT = (numtyp)0;
saru(tag1, tag2, seed+tag1+tag2, timestep, randnumT); // randomT->gaussian();
randnumT = MAX((numtyp)-5.0,MIN(randnum,(numtyp)5.0));
numtyp kappaT = coeff2y; // kappa[itype][jtype]
if (kappa_flag) {
numtyp factor = (numtyp)1.0;
factor += kcx*T_pow.x + kcy*T_pow.y + kcz*T_pow.z + kcw*T_pow.w;
kappaT *= factor;
}
numtyp kij = cvi*cvj*kappaT * T_ij*T_ij;
numtyp alphaij = ucl_sqrt((numtyp)2.0*kboltz*kij);
numtyp dQc = kij * wrT*wrT * (Tj - Ti )/(Ti*Tj);
numtyp dQd = wr*wr*( GammaIJ * vijeij*vijeij - SigmaIJ*SigmaIJ/mass_itype ) - SigmaIJ * wr *vijeij *randnum;
dQd /= (cvi+cvj);
numtyp dQr = alphaij * wrT * dtinvsqrt * randnumT;
Qi += (dQc + dQd + dQr );
}
if (EVFLAG && eflag) {
numtyp e = (numtyp)0.5*coeffx*T_ij*coeffz * wc*wc;
#ifndef ONETYPE
energy+=factor_dpd*e;
#else
numtyp e = (numtyp)0;
energy+=e;
#endif
}
if (EVFLAG && vflag) {
virial[0] += delx*delx*force;
@ -401,6 +418,6 @@ __kernel void k_sph_lj_fast(const __global numtyp4 *restrict x_,
} // if ii
store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv);
store_drhoE(Qi,ii,inum,tid,t_per_atom,offset,Q);
store_drhoE(drhoEacc,ii,inum,tid,t_per_atom,offset,drhoE);
}