// ************************************************************************** // coul_slater_long.cu // ------------------- // Trung Nguyen (U Chicago) // // Device code for acceleration of the coul/slater/long pair style // // __________________________________________________________________________ // This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) // __________________________________________________________________________ // // begin : September 2023 // email : ndactrung@gmail.com // *************************************************************************** #if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE _texture( pos_tex,float4); _texture( q_tex,float); #else _texture_2d( pos_tex,int4); _texture( q_tex,int2); #endif #else #define pos_tex x_ #define q_tex q_ #endif __kernel void k_coul_slater_long(const __global numtyp4 *restrict x_, const __global numtyp *restrict scale, const int lj_types, const __global numtyp *restrict sp_cl_in, const __global int *dev_nbor, const __global int *dev_packed, __global acctyp3 *restrict ans, __global acctyp *restrict engv, const int eflag, const int vflag, const int inum, const int nbor_pitch, const __global numtyp *restrict q_, const numtyp cut_coulsq, const numtyp qqrd2e, const numtyp g_ewald, const numtyp lamda, const int t_per_atom) { int tid, ii, offset; atom_info(t_per_atom,ii,tid,offset); __local numtyp sp_cl[4]; int n_stride; local_allocate_store_charge(); sp_cl[0]=sp_cl_in[0]; sp_cl[1]=sp_cl_in[1]; sp_cl[2]=sp_cl_in[2]; sp_cl[3]=sp_cl_in[3]; acctyp3 f; f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; acctyp e_coul, virial[6]; if (EVFLAG) { e_coul=(acctyp)0; for (int i=0; i<6; i++) virial[i]=(acctyp)0; } if (ii (numtyp)0) force -= factor_coul*prefactor*((numtyp)1.0-slater_term); force *= r2inv; f.x+=delx*force; f.y+=dely*force; f.z+=delz*force; if (EVFLAG && eflag) { numtyp e_slater = ((numtyp)1.0 + rlamdainv)*exprlmdainv; numtyp e = prefactor*(_erfc-e_slater); if (factor_coul > (numtyp)0) e -= factor_coul*prefactor*((numtyp)1.0 - e_slater); e_coul += e; } if (EVFLAG && vflag) { virial[0] += delx*delx*force; virial[1] += dely*dely*force; virial[2] += delz*delz*force; virial[3] += delx*dely*force; virial[4] += delx*delz*force; virial[5] += dely*delz*force; } } } // for nbor } // if ii acctyp energy; if (EVFLAG) energy=(acctyp)0.0; store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag, vflag,ans,engv); } __kernel void k_coul_slater_long_fast(const __global numtyp4 *restrict x_, const __global numtyp *restrict scale_in, const __global numtyp *restrict sp_cl_in, const __global int *dev_nbor, const __global int *dev_packed, __global acctyp3 *restrict ans, __global acctyp *restrict engv, const int eflag, const int vflag, const int inum, const int nbor_pitch, const __global numtyp *restrict q_, const numtyp cut_coulsq, const numtyp qqrd2e, const numtyp g_ewald, const numtyp lamda, const int t_per_atom) { int tid, ii, offset; atom_info(t_per_atom,ii,tid,offset); __local numtyp scale[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; __local numtyp sp_cl[4]; int n_stride; local_allocate_store_charge(); if (tid<4) sp_cl[tid]=sp_cl_in[tid]; if (tid (numtyp)0) force -= factor_coul*prefactor*((numtyp)1.0-slater_term); force *= r2inv; f.x+=delx*force; f.y+=dely*force; f.z+=delz*force; if (EVFLAG && eflag) { numtyp e_slater = ((numtyp)1.0 + rlamdainv)*exprlmdainv; numtyp e = prefactor*(_erfc-e_slater); if (factor_coul > (numtyp)0) e -= factor_coul*prefactor*((numtyp)1.0 - e_slater); e_coul += e; } if (EVFLAG && vflag) { virial[0] += delx*delx*force; virial[1] += dely*dely*force; virial[2] += delz*delz*force; virial[3] += delx*dely*force; virial[4] += delx*delz*force; virial[5] += dely*delz*force; } } } // for nbor } // if ii acctyp energy; if (EVFLAG) energy=(acctyp)0.0; store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag, vflag,ans,engv); }