// ************************************************************************** // sw.cu // ------------------- // W. Michael Brown (ORNL) // // Device code for acceleration of the sw pair style // // __________________________________________________________________________ // This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) // __________________________________________________________________________ // // begin : Tue March 26, 2013 // email : brownw@ornl.gov // *************************************************************************** #if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE _texture( pos_tex,float4); _texture( sw1_tex,float4); _texture( sw2_tex,float4); _texture( sw3_tex,float4); #else _texture_2d( pos_tex,int4); _texture( sw1_tex,int4); _texture( sw2_tex,int4); _texture( sw3_tex,int4); #endif #else #define pos_tex x_ #define sw1_tex sw1 #define sw2_tex sw2 #define sw3_tex sw3 #endif #define THIRD (numtyp)0.66666666666666666667 //#define THREE_CONCURRENT #if (SHUFFLE_AVAIL == 0) #define store_answers_p(f, energy, virial, ii, inum, tid, t_per_atom, \ offset, eflag, vflag, ans, engv, ev_stride) \ if (t_per_atom>1) { \ simd_reduce_add3(t_per_atom, red_acc, offset, tid, f.x, f.y, f.z); \ if (EVFLAG && (vflag==2 || eflag==2)) { \ if (eflag) { \ simdsync(); \ simd_reduce_add1(t_per_atom, red_acc, offset, tid, energy); \ } \ if (vflag) { \ simdsync(); \ simd_reduce_arr(6, t_per_atom, red_acc, offset, tid, virial); \ } \ } \ } \ if (offset==0 && ii1) { \ simd_reduce_add3(t_per_atom, f.x, f.y, f.z); \ if (vflag==2 || eflag==2) { \ if (eflag) \ simd_reduce_add1(t_per_atom,energy); \ if (vflag) \ simd_reduce_arr(6, t_per_atom,virial); \ } \ } \ if (offset==0 && ii 1; active_subgs /= vwidth) { \ if (active_subgs < BLOCK_SIZE_X/simd_size()) __syncthreads(); \ if (bnum < active_subgs) { \ if (eflag) { \ simd_reduce_add1(vwidth, energy); \ if (voffset==0) red_acc[6][bnum] = energy; \ } \ if (vflag) { \ simd_reduce_arr(6, vwidth, virial); \ if (voffset==0) \ for (int r=0; r<6; r++) red_acc[r][bnum]=virial[r]; \ } \ } \ \ __syncthreads(); \ if (tid < active_subgs) { \ if (eflag) energy = red_acc[6][tid]; \ if (vflag) \ for (int r = 0; r < 6; r++) virial[r] = red_acc[r][tid]; \ } else { \ if (eflag) energy = (acctyp)0; \ if (vflag) for (int r = 0; r < 6; r++) virial[r] = (acctyp)0; \ } \ } \ \ if (bnum == 0) { \ int ei=BLOCK_ID_X; \ if (eflag) { \ simd_reduce_add1(vwidth, energy); \ if (tid==0) { \ engv[ei]+=energy*(acctyp)0.5; \ ei+=ev_stride; \ } \ } \ if (vflag) { \ simd_reduce_arr(6, vwidth, virial); \ if (tid==0) { \ for (int r=0; r<6; r++) { \ engv[ei]+=virial[r]*(acctyp)0.5; \ ei+=ev_stride; \ } \ } \ } \ } \ } else if (offset==0 && ii1) \ simd_reduce_add3(t_per_atom, f.x, f.y, f.z); \ if (offset==0 && ii(numtyp)0 && rsq2(numtyp)0 && rsq2