// ************************************************************************** // soft.cu // ------------------- // Trung Dac Nguyen (ORNL) // // Device code for acceleration of the soft pair style // // __________________________________________________________________________ // This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) // __________________________________________________________________________ // // begin : // email : nguyentd@ornl.gov // *************************************************************************** #if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE _texture( pos_tex,float4); #else _texture_2d( pos_tex,int4); #endif #else #define pos_tex x_ // hack for Intel GPU with double precision #if defined(_DOUBLE_DOUBLE) && (CONFIG_ID == 500) #define MY_PI_HALF (acctyp)1.57079632679489661923 #define my_cos(x) sin(x+MY_PI_HALF) #endif #endif #if !defined(my_cos) #define my_cos(x) cos(x) #endif #define MY_PI (acctyp)3.14159265358979323846 __kernel void k_soft(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict coeff, const int lj_types, const __global numtyp *restrict sp_lj_in, const __global int *dev_nbor, const __global int *dev_packed, __global acctyp4 *restrict ans, __global acctyp *restrict engv, const int eflag, const int vflag, const int inum, const int nbor_pitch, const int t_per_atom) { int tid, ii, offset; atom_info(t_per_atom,ii,tid,offset); __local numtyp sp_lj[4]; int n_stride; local_allocate_store_pair(); sp_lj[0]=sp_lj_in[0]; sp_lj[1]=sp_lj_in[1]; sp_lj[2]=sp_lj_in[2]; sp_lj[3]=sp_lj_in[3]; acctyp4 f; f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; acctyp energy, virial[6]; if (EVFLAG) { energy=(acctyp)0; for (int i=0; i<6; i++) virial[i]=(acctyp)0; } if (ii (numtyp)0.0) force = factor_lj * coeff[mtype].x * sin(arg) * MY_PI/coeff[mtype].y*ucl_recip(r); else force = (numtyp)0.0; f.x+=delx*force; f.y+=dely*force; f.z+=delz*force; if (EVFLAG && eflag) { numtyp e=coeff[mtype].x * ((numtyp)1.0+my_cos(arg)); energy+=factor_lj*e; } if (EVFLAG && vflag) { virial[0] += delx*delx*force; virial[1] += dely*dely*force; virial[2] += delz*delz*force; virial[3] += delx*dely*force; virial[4] += delx*delz*force; virial[5] += dely*delz*force; } } } // for nbor } // if ii store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv); } __kernel void k_soft_fast(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict coeff_in, const __global numtyp *restrict sp_lj_in, const __global int *dev_nbor, const __global int *dev_packed, __global acctyp4 *restrict ans, __global acctyp *restrict engv, const int eflag, const int vflag, const int inum, const int nbor_pitch, const int t_per_atom) { int tid, ii, offset; atom_info(t_per_atom,ii,tid,offset); __local numtyp4 coeff[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; __local numtyp sp_lj[4]; int n_stride; local_allocate_store_pair(); if (tid<4) sp_lj[tid]=sp_lj_in[tid]; if (tid (numtyp)0.0) force = factor_lj * coeff[mtype].x * sin(arg) * MY_PI/coeff[mtype].y*ucl_recip(r); else force = (numtyp)0.0; f.x+=delx*force; f.y+=dely*force; f.z+=delz*force; if (EVFLAG && eflag) { numtyp e=coeff[mtype].x * ((numtyp)1.0+my_cos(arg)); energy+=factor_lj*e; } if (EVFLAG && vflag) { virial[0] += delx*delx*force; virial[1] += dely*dely*force; virial[2] += delz*delz*force; virial[3] += delx*dely*force; virial[4] += delx*delz*force; virial[5] += dely*delz*force; } } } // for nbor } // if ii store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv); }