// ************************************************************************** // tersoff.cu // ------------------- // Trung Dac Nguyen // // Device code for acceleration of the tersoff pair style // // __________________________________________________________________________ // This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) // __________________________________________________________________________ // // begin : Thu April 17, 2014 // email : ndactrung@gmail.com // *************************************************************************** #if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_tersoff_extra.h" #ifndef _DOUBLE_DOUBLE _texture( pos_tex,float4); #else _texture_2d( pos_tex,int4); #endif #else #define pos_tex x_ #endif //#define THREE_CONCURRENT #define TWOTHIRD (numtyp)0.66666666666666666667 #if (SHUFFLE_AVAIL == 0) #define local_allocate_acc_zeta() \ __local acctyp red_acc[BLOCK_PAIR]; #define acc_zeta(z, tid, t_per_atom, offset) \ if (t_per_atom>1) { \ red_acc[tid]=z; \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ simdsync(); \ if (offset < s) { \ red_acc[tid] += red_acc[tid+s]; \ } \ } \ z=red_acc[tid]; \ } #define store_answers_p(f, energy, virial, ii, inum, tid, t_per_atom, \ offset, eflag, vflag, ans, engv, ev_stride) \ if (t_per_atom>1) { \ simd_reduce_add3(t_per_atom, red_acc, offset, tid, f.x, f.y, f.z); \ if (EVFLAG && (vflag==2 || eflag==2)) { \ if (eflag) { \ simdsync(); \ simd_reduce_add1(t_per_atom, red_acc, offset, tid, energy); \ } \ if (vflag) { \ simdsync(); \ simd_reduce_arr(6, t_per_atom, red_acc, offset, tid, virial); \ } \ } \ } \ if (offset==0 && ii1) { \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ z += shfl_down(z, s, t_per_atom); \ } \ } #if (EVFLAG == 1) #define store_answers_p(f, energy, virial, ii, inum, tid, t_per_atom, \ offset, eflag, vflag, ans, engv, ev_stride) \ if (t_per_atom>1) { \ simd_reduce_add3(t_per_atom, f.x, f.y, f.z); \ if (vflag==2 || eflag==2) { \ if (eflag) \ simd_reduce_add1(t_per_atom,energy); \ if (vflag) \ simd_reduce_arr(6, t_per_atom,virial); \ } \ } \ if (offset==0 && ii 1; active_subgs /= vwidth) { \ if (active_subgs < BLOCK_SIZE_X/simd_size()) __syncthreads(); \ if (bnum < active_subgs) { \ if (eflag) { \ simd_reduce_add1(vwidth, energy); \ if (voffset==0) red_acc[6][bnum] = energy; \ } \ if (vflag) { \ simd_reduce_arr(6, vwidth, virial); \ if (voffset==0) \ for (int r=0; r<6; r++) red_acc[r][bnum]=virial[r]; \ } \ } \ \ __syncthreads(); \ if (tid < active_subgs) { \ if (eflag) energy = red_acc[6][tid]; \ if (vflag) \ for (int r = 0; r < 6; r++) virial[r] = red_acc[r][tid]; \ } else { \ if (eflag) energy = (acctyp)0; \ if (vflag) for (int r = 0; r < 6; r++) virial[r] = (acctyp)0; \ } \ } \ \ if (bnum == 0) { \ int ei=BLOCK_ID_X; \ if (eflag) { \ simd_reduce_add1(vwidth, energy); \ if (tid==0) { \ engv[ei]+=energy*(acctyp)0.5; \ ei+=ev_stride; \ } \ } \ if (vflag) { \ simd_reduce_arr(6, vwidth, virial); \ if (tid==0) { \ for (int r=0; r<6; r++) { \ engv[ei]+=virial[r]*(acctyp)0.5; \ ei+=ev_stride; \ } \ } \ } \ } \ } else if (offset==0 && ii1) \ simd_reduce_add3(t_per_atom, f.x, f.y, f.z); \ if (offset==0 && ii