// ************************************************************************** // eam.cu // ------------------- // Trung Dac Nguyen, W. Michael Brown (ORNL) // // Device code for acceleration of the eam pair style // // __________________________________________________________________________ // This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) // __________________________________________________________________________ // // begin : // email : brownw@ornl.gov nguyentd@ornl.gov // ***************************************************************************/ #ifdef NV_KERNEL #include "lal_aux_fun1.h" texture pos_tex; texture fp_tex; texture rhor_sp1_tex; texture rhor_sp2_tex; texture frho_sp1_tex; texture frho_sp2_tex; texture z2r_sp1_tex; texture z2r_sp2_tex; #ifdef _DOUBLE_DOUBLE ucl_inline double4 fetch_rhor_sp1(const int& i, const double4 *rhor_spline1) { return rhor_spline1[i]; } ucl_inline double4 fetch_rhor_sp2(const int& i, const double4 *rhor_spline2) { return rhor_spline2[i]; } ucl_inline double4 fetch_frho_sp1(const int& i, const double4 *frho_spline1) { return frho_spline1[i]; } ucl_inline double4 fetch_frho_sp2(const int& i, const double4 *frho_spline2) { return frho_spline2[i]; } ucl_inline double4 fetch_z2r_sp1(const int& i, const double4 *z2r_spline1) { return z2r_spline1[i]; } ucl_inline double4 fetch_z2r_sp2(const int& i, const double4 *z2r_spline2) { return z2r_spline2[i]; } #endif #ifndef _DOUBLE_DOUBLE ucl_inline float4 fetch_pos(const int& i, const float4 *pos) { return tex1Dfetch(pos_tex, i); } ucl_inline float fetch_q(const int& i, const float *fp) { return tex1Dfetch(fp_tex, i); } ucl_inline float4 fetch_rhor_sp1(const int& i, const float4 *rhor_spline1) { return tex1Dfetch(rhor_sp1_tex, i); } ucl_inline float4 fetch_rhor_sp2(const int& i, const float4 *rhor_spline2) { return tex1Dfetch(rhor_sp2_tex, i); } ucl_inline float4 fetch_frho_sp1(const int& i, const float4 *frho_spline1) { return tex1Dfetch(frho_sp1_tex, i); } ucl_inline float4 fetch_frho_sp2(const int& i, const float4 *frho_spline2) { return tex1Dfetch(frho_sp2_tex, i); } ucl_inline float4 fetch_z2r_sp1(const int& i, const float4 *z2r_spline1) { return tex1Dfetch(z2r_sp1_tex, i); } ucl_inline float4 fetch_z2r_sp2(const int& i, const float4 *z2r_spline2) { return tex1Dfetch(z2r_sp2_tex, i); } #endif #else // OPENCL #define fetch_q(i,y) fp_[i] #define fetch_rhor_sp1(i,y) rhor_spline1[i] #define fetch_rhor_sp2(i,y) rhor_spline2[i] #define fetch_frho_sp1(i,y) frho_spline1[i] #define fetch_frho_sp2(i,y) frho_spline2[i] #define fetch_z2r_sp1(i,y) z2r_spline1[i] #define fetch_z2r_sp2(i,y) z2r_spline2[i] #endif #define MIN(A,B) ((A) < (B) ? (A) : (B)) #define MAX(A,B) ((A) > (B) ? (A) : (B)) #define store_energy_fp(rho,energy,ii,inum,tid,t_per_atom,offset, \ eflag,vflag,engv,rdrho,nrho,i) \ if (t_per_atom>1) { \ __local acctyp red_acc[BLOCK_PAIR]; \ red_acc[tid]=rho; \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ if (offset < s) \ red_acc[tid] += red_acc[tid+s]; \ } \ rho=red_acc[tid]; \ } \ if (offset==0) { \ numtyp p = rho*rdrho + (numtyp)1.0; \ int m=p; \ m = MAX(1,MIN(m,nrho-1)); \ p -= m; \ p = MIN(p,(numtyp)1.0); \ int index = type2frho[itype]*(nrho+1)+m; \ numtyp4 coeff = fetch_frho_sp1(index, frho_spline1); \ numtyp fp = (coeff.x*p + coeff.y)*p + coeff.z; \ fp_[i]=fp; \ if (eflag>0) { \ coeff = fetch_frho_sp2(index, frho_spline2); \ energy = ((coeff.x*p + coeff.y)*p + coeff.z)*p + coeff.w; \ engv[ii]=(acctyp)2.0*energy; \ } \ } #define store_answers_eam(f, energy, virial, ii, inum, tid, t_per_atom, \ offset, elag, vflag, ans, engv) \ if (t_per_atom>1) { \ __local acctyp red_acc[6][BLOCK_PAIR]; \ red_acc[0][tid]=f.x; \ red_acc[1][tid]=f.y; \ red_acc[2][tid]=f.z; \ red_acc[3][tid]=energy; \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ if (offset < s) { \ for (int r=0; r<4; r++) \ red_acc[r][tid] += red_acc[r][tid+s]; \ } \ } \ f.x=red_acc[0][tid]; \ f.y=red_acc[1][tid]; \ f.z=red_acc[2][tid]; \ energy=red_acc[3][tid]; \ if (vflag>0) { \ for (int r=0; r<6; r++) \ red_acc[r][tid]=virial[r]; \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ if (offset < s) { \ for (int r=0; r<6; r++) \ red_acc[r][tid] += red_acc[r][tid+s]; \ } \ } \ for (int r=0; r<6; r++) \ virial[r]=red_acc[r][tid]; \ } \ } \ if (offset==0) { \ if (eflag>0) { \ engv[ii]+=energy; \ engv+=inum; \ } \ if (vflag>0) { \ for (int i=0; i<6; i++) { \ engv[ii]=virial[i]; \ engv+=inum; \ } \ } \ ans[ii]=f; \ } __kernel void kernel_energy(__global numtyp4 *x_, __global int2 *type2rhor_z2r, __global int *type2frho, __global numtyp4 *rhor_spline2, __global numtyp4 *frho_spline1, __global numtyp4 *frho_spline2, __global int *dev_nbor, __global int *dev_packed, __global numtyp *fp_, __global acctyp *engv, const int eflag, const int inum, const int nbor_pitch, const int ntypes, const numtyp cutforcesq, const numtyp rdr, const numtyp rdrho, const int nrho, const int nr, const int t_per_atom) { int tid, ii, offset; atom_info(t_per_atom,ii,tid,offset); acctyp rho = (acctyp)0; acctyp energy = (acctyp)0; if (ii0) { energy += phi; } if (vflag>0) { virial[0] += delx*delx*force; virial[1] += dely*dely*force; virial[2] += delz*delz*force; virial[3] += delx*dely*force; virial[4] += delx*delz*force; virial[5] += dely*delz*force; } } } // for nbor store_answers_eam(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv); } // if ii } __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp *fp_, __global int2 *type2rhor_z2r_in, __global numtyp4 *rhor_spline1, __global numtyp4 *z2r_spline1, __global numtyp4 *z2r_spline2, __global int *dev_nbor, __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, const int vflag, const int inum, const int nbor_pitch, const numtyp cutforcesq, const numtyp rdr, const int nr, const int t_per_atom) { int tid, ii, offset; atom_info(t_per_atom,ii,tid,offset); __local int2 type2rhor_z2r[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; if (tid0) { energy += phi; } if (vflag>0) { virial[0] += delx*delx*force; virial[1] += dely*dely*force; virial[2] += delz*delz*force; virial[3] += delx*dely*force; virial[4] += delx*delz*force; virial[5] += dely*delz*force; } } } // for nbor store_answers_eam(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv); } // if ii }