From 4960aeb3c825b831e4bad060a848cb0e7c8d4dfc Mon Sep 17 00:00:00 2001 From: Gurgen Date: Sat, 6 Mar 2021 23:39:37 +0300 Subject: [PATCH] error on line 194 --- lib/gpu/lal_lj_smooth.cu | 233 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 233 insertions(+) create mode 100644 lib/gpu/lal_lj_smooth.cu diff --git a/lib/gpu/lal_lj_smooth.cu b/lib/gpu/lal_lj_smooth.cu new file mode 100644 index 0000000000..b69f8b9388 --- /dev/null +++ b/lib/gpu/lal_lj_smooth.cu @@ -0,0 +1,233 @@ +// ************************************************************************** +// lj_smooth.cu +// ------------------- +// W. Michael Brown (ORNL) +// +// Device code for acceleration of the lj/smooth pair style +// +// __________________________________________________________________________ +// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) +// __________________________________________________________________________ +// +// begin : +// email : brownw@ornl.gov +// *************************************************************************** + +#if defined(NV_KERNEL) || defined(USE_HIP) +#include "lal_aux_fun1.h" +#ifndef _DOUBLE_DOUBLE +_texture( pos_tex,float4); +#else +_texture_2d( pos_tex,int4); +#endif +#else +#define pos_tex x_ +#endif + +__kernel void k_lj_smooth(const __global numtyp4 *restrict x_, + const __global numtyp4 *restrict lj1, + const __global numtyp4 *restrict lj3, + const __global numtyp4 *restrict ljsw, + const int lj_types, + const __global numtyp *restrict sp_lj, + const __global int * dev_nbor, + const __global int * dev_packed, + __global acctyp4 *restrict ans, + __global acctyp *restrict engv, + const int eflag, const int vflag, const int inum, + const int nbor_pitch, const int t_per_atom) { + int tid, ii, offset; + atom_info(t_per_atom,ii,tid,offset); + + acctyp energy=(acctyp)0; + acctyp4 f; + f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; + acctyp virial[6]; + for (int i=0; i<6; i++) + virial[i]=(acctyp)0; + + if (ii0) { + numtyp e; + if (rsq < lj1[mtype].w) + e = r6inv * (lj3[mtype].x*r6inv - lj3[mtype].y) - lj3[mtype].z; + else + e = ljsw[mtype].x - ljsw[mtype].x*t - + ljsw[mtype].y*tsq/2.0 - ljsw[mtype].z*tsq*t/3.0 - + ljsw[mtype].z*tsq*tsq/4.0 - lj3[mtype].z; + + //numtyp e=r6inv*(lj3[mtype].x*r6inv-lj3[mtype].y); + energy+=factor_lj*e; + } + if (vflag>0) { + virial[0] += delx*delx*force; + virial[1] += dely*dely*force; + virial[2] += delz*delz*force; + virial[3] += delx*dely*force; + virial[4] += delx*delz*force; + virial[5] += dely*delz*force; + } + } + + } // for nbor + store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, + ans,engv); + } // if ii +} + +__kernel void k_lj_smooth_fast(const __global numtyp4 *restrict x_, + const __global numtyp4 *restrict lj1_in, + const __global numtyp4 *restrict lj3_in, + const __global numtyp4 *restrict ljsw, + const __global numtyp *restrict sp_lj_in, + const __global int * dev_nbor, + const __global int * dev_packed, + __global acctyp4 *restrict ans, + __global acctyp *restrict engv, + const int eflag, const int vflag, const int inum, + const int nbor_pitch, const int t_per_atom) { + int tid, ii, offset; + atom_info(t_per_atom,ii,tid,offset); + + __local numtyp4 lj1[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; + __local numtyp4 lj3[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; + __local numtyp sp_lj[4]; + if (tid<4) + sp_lj[tid]=sp_lj_in[tid]; + if (tid0) + lj3[tid]=lj3_in[tid]; + } + + acctyp energy=(acctyp)0; + acctyp4 f; + f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; + acctyp virial[6]; + for (int i=0; i<6; i++) + virial[i]=(acctyp)0; + + __syncthreads(); + + if (ii0) { + numtyp e; + if (rsq < lj1[mtype].w) + e = r6inv * (lj3[mtype].x*r6inv - lj3[mtype].y) - lj3[mtype].z; + else + e = ljsw[mtype].x - ljsw[mtype].x*t - + ljsw[mtype].y*tsq/2.0 - ljsw[mtype].z*tsq*t/3.0 - + ljsw[mtype].z*tsq*tsq/4.0 - lj3[mtype].z; //??? + + //numtyp e=r6inv*(lj3[mtype].x*r6inv-lj3[mtype].y); + energy+=factor_lj*e; + } + if (vflag>0) { + virial[0] += delx*delx*force; + virial[1] += dely*dely*force; + virial[2] += delz*delz*force; + virial[3] += delx*dely*force; + virial[4] += delx*delz*force; + virial[5] += dely*delz*force; + } + } + + } // for nbor + store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, + ans,engv); + } // if ii +} +