// ************************************************************************** // sph_lj.cu // ------------------- // Trung Dac Nguyen (U Chicago) // // Device code for acceleration of the sph/lj pair style // // __________________________________________________________________________ // This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) // __________________________________________________________________________ // // begin : September 2023 // email : ndactrung@gmail.com // *************************************************************************** #if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE _texture( pos_tex,float4); _texture( vel_tex,float4); #else _texture_2d( pos_tex,int4); _texture_2d( vel_tex,int4); #endif #else #define pos_tex x_ #define vel_tex v_ #endif #if (SHUFFLE_AVAIL == 0) #define store_drhoE(drhoEacc, ii, inum, tid, t_per_atom, offset, i, drhoE) \ if (t_per_atom>1) { \ simdsync(); \ simd_reduce_add2(t_per_atom, red_acc, offset, tid, \ drhoEacc.x, drhoEacc.y); \ } \ if (offset==0 && ii1) { \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ drhoEacc.x += shfl_down(drhoEacc.x, s, t_per_atom); \ drhoEacc.y += shfl_down(drhoEacc.y, s, t_per_atom); \ } \ } \ if (offset==0 && ii (numtyp)0.0) { pc[1] = ucl_sqrt(csq); // soundspeed } else { pc[1] = (numtyp)0.0; } } __kernel void k_sph_lj(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict extra, const __global numtyp4 *restrict coeff, const __global numtyp *restrict mass, const int lj_types, const __global numtyp *restrict sp_lj, const __global int * dev_nbor, const __global int * dev_packed, __global acctyp3 *restrict ans, __global acctyp *restrict engv, __global acctyp *restrict drhoE, const int eflag, const int vflag, const int inum, const int nbor_pitch, const __global numtyp4 *restrict v_, const int dimension, const int t_per_atom) { int tid, ii, offset, i; atom_info(t_per_atom,ii,tid,offset); int n_stride; local_allocate_store_pair(); acctyp3 f; f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; acctyp energy, virial[6]; if (EVFLAG) { energy=(acctyp)0; for (int i=0; i<6; i++) virial[i]=(acctyp)0; } acctyp2 drhoEacc; drhoEacc.x = drhoEacc.y = (acctyp)0; if (ii