From 379d3c8e2073af5b4394eaf127fb9d6b18c5ba36 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Thu, 7 Dec 2023 11:06:17 -0600 Subject: [PATCH] Updated host_esph to extra data and cut to coeff --- lib/gpu/lal_sph_lj.cpp | 16 ++-- lib/gpu/lal_sph_lj.cu | 156 ++++++++++++++++--------------------- lib/gpu/lal_sph_lj.h | 10 ++- lib/gpu/lal_sph_lj_ext.cpp | 20 ++--- 4 files changed, 91 insertions(+), 111 deletions(-) diff --git a/lib/gpu/lal_sph_lj.cpp b/lib/gpu/lal_sph_lj.cpp index 8367a81b1e..3abef71cfb 100644 --- a/lib/gpu/lal_sph_lj.cpp +++ b/lib/gpu/lal_sph_lj.cpp @@ -45,8 +45,8 @@ int SPHLJT::bytes_per_atom(const int max_nbors) const { template int SPHLJT::init(const int ntypes, - double **host_cutsq, double **host_viscosity, - double *host_special_lj, + double **host_cutsq, double **host_cut, + double **host_viscosity, double *host_special_lj, const int nlocal, const int nall, const int max_nbors, const int maxspecial, const double cell_size, @@ -92,8 +92,8 @@ int SPHLJT::init(const int ntypes, host_write[i]=0.0; coeff.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); - this->atom->type_pack2(ntypes,lj_types,coeff,host_write,host_viscosity, - host_cutsq); + this->atom->type_pack4(ntypes,lj_types,coeff,host_write,host_viscosity, + host_cut, host_cutsq); UCL_H_Vec dview; sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY); @@ -163,9 +163,9 @@ int SPHLJT::loop(const int eflag, const int vflag) { int idx = n+i*nstride; numtyp4 v; v.x = rho[i]; + v.x = esph[i]; v.y = cv[i]; - v.z = mass[i]; - v.w = 0; + v.w = mass[i]; pextra[idx] = v; } this->atom->add_extra_data(); @@ -202,8 +202,10 @@ int SPHLJT::loop(const int eflag, const int vflag) { // --------------------------------------------------------------------------- template -void SPHLJT::get_extra_data(double *host_rho, double *host_cv, double* host_mass) { +void SPHLJT::get_extra_data(double *host_rho, double *host_esph, + double *host_cv, double* host_mass) { rho = host_rho; + esph = host_esph; cv = host_cv; mass = host_mass; } diff --git a/lib/gpu/lal_sph_lj.cu b/lib/gpu/lal_sph_lj.cu index c6fe071399..4d21c093a8 100644 --- a/lib/gpu/lal_sph_lj.cu +++ b/lib/gpu/lal_sph_lj.cu @@ -77,7 +77,8 @@ __kernel void k_sph_lj(const __global numtyp4 *restrict x_, energy=(acctyp)0; for (int i=0; i<6; i++) virial[i]=(acctyp)0; } - acctyp Qi = (acctyp)0; + acctyp2 drhoEacc; + drhoEacc.x = drhoEacc.x = (acctyp)0; if (iidimension == 3 Lucy Kernel, 3d + wfd = -25.066903536973515383e0 * wfd * wfd * ihsq * ihsq * ihsq * ih; + + // Lucy Kernel, 2d + //wfd = -19.098593171027440292e0 * wfd * wfd * ihsq * ihsq * ihsq; + + // function call to LJ EOS + numtyp fj, cj; + //LJEOS2(rho[j], esph[j], cv[j], &fj, &cj); + //fj /= (rho[j] * rho[j]); + + // apply long-range correction to model a LJ fluid with cutoff + // this implies that the modelled LJ fluid has cutoff == SPH cutoff + numtyp lrc = (numtyp)-11.1701 * (ihcub * ihcub * ihcub - (numtyp)1.5 * ihcub); + fi += lrc; + fj += lrc; + + // dot product of velocity delta and distance vector numtyp delvx = iv.x - jv.x; numtyp delvy = iv.y - jv.y; numtyp delvz = iv.z - jv.z; - numtyp dot = delx*delvx + dely*delvy + delz*delvz; - numtyp vijeij = dot*rinv; + numtyp delVdotDelR = delx*delvx + dely*delvy + delz*delvz; - const numtyp coeffx=coeff[mtype].x; // a0[itype][jtype] - const numtyp coeffy=coeff[mtype].y; // gamma[itype][jtype] - const numtyp coeffz=coeff[mtype].z; // cut[itype][jtype] - - const numtyp4 Tcvj = extra[j]; - numtyp Tj = Tcvj.x; - numtyp cvj = Tcvj.y; - - unsigned int tag1=itag, tag2=jtag; - if (tag1 > tag2) { - tag1 = jtag; tag2 = itag; + // artificial viscosity (Monaghan 1992) + numtyp fvisc = (numtyp)0; + if (delVdotDelR < (numyp)0) { + mu = h * delVdotDelR / (rsq + (numyp)0.01 * h * h); + fvisc = -coeffx * (ci + cj) * mu / (rhoi + rhoj); // viscosity[itype][jtype] } - numtyp randnum = (numtyp)0.0; - saru(tag1, tag2, seed, timestep, randnum); - - numtyp T_ij=(numtyp)0.5*(Ti+Tj); - numtyp4 T_pow; - T_pow.x = T_ij - (numtyp)1.0; - T_pow.y = T_pow.x*T_pow.x; - T_pow.z = T_pow.x*T_pow.y; - T_pow.w = T_pow.x*T_pow.z; - - numtyp coeff2x = coeff2[mtype].x; //power[itype][jtype] - numtyp coeff2y = coeff2[mtype].y; //kappa[itype][jtype] - numtyp coeff2z = coeff2[mtype].z; //powerT[itype][jtype] - numtyp coeff2w = coeff2[mtype].w; //cutT[itype][jtype] - numtyp power_d = coeff2x; - if (power_flag) { - numtyp factor = (numtyp)1.0; - factor += sc[mtype].x*T_pow.x + sc[mtype].y*T_pow.y + - sc[mtype].z*T_pow.z + sc[mtype].w*T_pow.w; - power_d *= factor; - } - - power_d = MAX((numtyp)0.01,power_d); - numtyp wc = (numtyp)1.0 - r/coeffz; // cut[itype][jtype] - wc = MAX((numtyp)0.0,MIN((numtyp)1.0,wc)); - numtyp wr = ucl_pow(wc, (numtyp)0.5*power_d); - - numtyp kboltz = (numtyp)1.0; - numtyp GammaIJ = coeffy; // gamma[itype][jtype] - numtyp SigmaIJ = (numtyp)4.0*GammaIJ*kboltz*Ti*Tj/(Ti+Tj); - SigmaIJ = ucl_sqrt(SigmaIJ); - - numtyp force = coeffx*T_ij*wc; // a0[itype][jtype] - force -= GammaIJ *wr*wr *dot*rinv; - force += SigmaIJ * wr *randnum * dtinvsqrt; - force *= factor_dpd*rinv; + // total pair force & thermal energy increment + numtyp force = -massi * massj * (fi + fj + fvisc) * wfd; + numtyp deltaE = (numtyp)-0.5 * force * delVdotDelR; f.x+=delx*force; f.y+=dely*force; f.z+=delz*force; - // heat transfer - - if (r < coeff2w) { - numtyp wrT = (numtyp)1.0 - r/coeff2w; - wrT = MAX((numtyp)0.0,MIN((numtyp)1.0,wrT)); - wrT = ucl_pow(wrT, (numtyp)0.5*coeff2z); // powerT[itype][jtype] - numtyp randnumT = (numtyp)0; - saru(tag1, tag2, seed+tag1+tag2, timestep, randnumT); // randomT->gaussian(); - randnumT = MAX((numtyp)-5.0,MIN(randnum,(numtyp)5.0)); + // and change in density, drho[i] + drhoEacc.x += massj * delVdotDelR * wfd; - numtyp kappaT = coeff2y; // kappa[itype][jtype] - if (kappa_flag) { - numtyp factor = (numtyp)1.0; - factor += kc[mtype].x*T_pow.x + kc[mtype].y*T_pow.y + - kc[mtype].z*T_pow.z + kc[mtype].w*T_pow.w; - kappaT *= factor; - } - - numtyp kij = cvi*cvj*kappaT * T_ij*T_ij; - numtyp alphaij = ucl_sqrt((numtyp)2.0*kboltz*kij); - - numtyp dQc = kij * wrT*wrT * (Tj - Ti)/(Ti*Tj); - numtyp dQd = wr*wr*( GammaIJ * vijeij*vijeij - SigmaIJ*SigmaIJ/mass_itype ) - SigmaIJ * wr *vijeij *randnum; - dQd /= (cvi+cvj); - numtyp dQr = alphaij * wrT * dtinvsqrt * randnumT; - Qi += (dQc + dQd + dQr ); - } + // change in thermal energy, desph[i] + drhoEacc.y += deltaE; if (EVFLAG && eflag) { - numtyp e = (numtyp)0.5*coeffx*T_ij*coeffz * wc*wc; - energy+=factor_dpd*e; + numtyp e = (numtyp)0; + energy+=e; } if (EVFLAG && vflag) { virial[0] += delx*delx*force; @@ -224,12 +198,12 @@ __kernel void k_sph_lj(const __global numtyp4 *restrict x_, } // if ii store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv); - store_drhoE(Qi,ii,inum,tid,t_per_atom,offset,Q); + store_drhoE(drhoEacc,ii,inum,tid,t_per_atom,offset,drhoE); } __kernel void k_sph_lj_fast(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict extra, - const __global numtyp2 *restrict coeff_in, + const __global numtyp4 *restrict coeff_in, const __global numtyp *restrict sp_lj_in, const __global int * dev_nbor, const __global int * dev_packed, diff --git a/lib/gpu/lal_sph_lj.h b/lib/gpu/lal_sph_lj.h index 5e26d019b2..04423f85fa 100644 --- a/lib/gpu/lal_sph_lj.h +++ b/lib/gpu/lal_sph_lj.h @@ -37,7 +37,8 @@ class SPHLJ : public BaseDPD { * - -3 if there is an out of memory error * - -4 if the GPU library was not compiled for GPU * - -5 Double precision is not supported on card **/ - int init(const int ntypes, double **host_cutsq, double **host_viscosity, + int init(const int ntypes, double **host_cutsq, + double** host_cut, double **host_viscosity, double *host_special_lj, const int nlocal, const int nall, const int max_nbors, const int maxspecial, const double cell_size, const double gpu_split, FILE *screen); @@ -52,14 +53,15 @@ class SPHLJ : public BaseDPD { /// Total host memory used by library for pair style double host_memory_usage() const; - void get_extra_data(double *host_rho, double *host_cv, double* host_mass); + void get_extra_data(double *host_rho, double *host_esph, + double *host_cv, double* host_mass); /// copy drho and desph from device to host void update_drhoE(void **drhoE_ptr); // --------------------------- TYPE DATA -------------------------- - /// coeff.x = viscosity, coeff.y = cutsq + /// coeff.x = viscosity, coeff.y = cut, coeff.z = cutsq UCL_D_Vec coeff; /// Special LJ values @@ -76,7 +78,7 @@ class SPHLJ : public BaseDPD { int _max_drhoE_size; /// pointer to host data - double *rho, *cv, *mass; + double *rho, *esph, *cv, *mass; private: bool _allocated; diff --git a/lib/gpu/lal_sph_lj_ext.cpp b/lib/gpu/lal_sph_lj_ext.cpp index 7abaa10805..7db601c9eb 100644 --- a/lib/gpu/lal_sph_lj_ext.cpp +++ b/lib/gpu/lal_sph_lj_ext.cpp @@ -27,8 +27,9 @@ static SPHLJ SPHLJMF; // --------------------------------------------------------------------------- // Allocate memory on host and device and copy constants to device // --------------------------------------------------------------------------- -int sph_lj_gpu_init(const int ntypes, double **cutsq, double **host_viscosity, - double *special_lj, const int inum, const int nall, +int sph_lj_gpu_init(const int ntypes, double **cutsq, double** host_cut, + double **host_viscosity, double *special_lj, + const int inum, const int nall, const int max_nbors, const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen) { SPHLJMF.clear(); @@ -53,8 +54,8 @@ int sph_lj_gpu_init(const int ntypes, double **cutsq, double **host_viscosity, int init_ok=0; if (world_me==0) - init_ok=SPHLJMF.init(ntypes, cutsq, host_viscosity, special_lj, - inum, nall, max_nbors, maxspecial, + init_ok=SPHLJMF.init(ntypes, cutsq, host_cut, host_viscosity, + special_lj, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); SPHLJMF.device->world_barrier(); @@ -71,9 +72,9 @@ int sph_lj_gpu_init(const int ntypes, double **cutsq, double **host_viscosity, fflush(screen); } if (gpu_rank==i && world_me!=0) - init_ok=SPHLJMF.init(ntypes, cutsq, host_viscosity, special_lj, - inum, nall, max_nbors, maxspecial, - cell_size, gpu_split, screen); + init_ok=SPHLJMF.init(ntypes, cutsq, host_cut, host_viscosity, + special_lj, inum, nall, max_nbors, maxspecial, + cell_size, gpu_split, screen); SPHLJMF.device->serialize_init(); if (message) @@ -119,8 +120,9 @@ void sph_lj_gpu_compute(const int ago, const int inum_full, const int nall, tag, host_v, dtinvsqrt, seed, timestep, nlocal, boxlo, prd); } -void sph_lj_gpu_get_extra_data(double *host_rho, double *host_cv, double *host_mass) { - SPHLJMF.get_extra_data(host_rho, host_cv, host_mass); +void sph_lj_gpu_get_extra_data(double *host_rho, double *host_esph, + double *host_cv, double *host_mass) { + SPHLJMF.get_extra_data(host_rho, host_esph, host_cv, host_mass); } void sph_lj_gpu_update_drhoE(void **drhoE_ptr) {