// ************************************************************************** // hippo.cu // ------------------- // Trung Dac Nguyen (Northwestern) // // Device code for acceleration of the hippo pair style // // __________________________________________________________________________ // This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) // __________________________________________________________________________ // // begin : // email : trung.nguyen@northwestern.edu // *************************************************************************** #if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_hippo_extra.h" #ifdef LAMMPS_SMALLBIG #define tagint int #endif #ifdef LAMMPS_BIGBIG #include "inttypes.h" #define tagint int64_t #endif #ifdef LAMMPS_SMALLSMALL #define tagint int #endif #ifndef _DOUBLE_DOUBLE _texture( pos_tex,float4); _texture( q_tex,float); #else _texture_2d( pos_tex,int4); _texture( q_tex,int2); #endif #else #define pos_tex x_ #define q_tex q_ #ifdef LAMMPS_SMALLBIG #define tagint int #endif #ifdef LAMMPS_BIGBIG #define tagint long #endif #ifdef LAMMPS_SMALLSMALL #define tagint int #endif #endif // defined(NV_KERNEL) || defined(USE_HIP) #if (SHUFFLE_AVAIL == 0) #define local_allocate_store_ufld() \ __local acctyp red_acc[6][BLOCK_PAIR]; #define store_answers_hippo_tq(tq, ii, inum,tid, t_per_atom, offset, i, \ tep) \ if (t_per_atom>1) { \ red_acc[0][tid]=tq.x; \ red_acc[1][tid]=tq.y; \ red_acc[2][tid]=tq.z; \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ simdsync(); \ if (offset < s) { \ for (int r=0; r<3; r++) \ red_acc[r][tid] += red_acc[r][tid+s]; \ } \ } \ tq.x=red_acc[0][tid]; \ tq.y=red_acc[1][tid]; \ tq.z=red_acc[2][tid]; \ } \ if (offset==0 && ii1) { \ red_acc[0][tid]=ufld[0]; \ red_acc[1][tid]=ufld[1]; \ red_acc[2][tid]=ufld[2]; \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ simdsync(); \ if (offset < s) { \ for (int r=0; r<3; r++) \ red_acc[r][tid] += red_acc[r][tid+s]; \ } \ } \ ufld[0]=red_acc[0][tid]; \ ufld[1]=red_acc[1][tid]; \ ufld[2]=red_acc[2][tid]; \ red_acc[0][tid]=dufld[0]; \ red_acc[1][tid]=dufld[1]; \ red_acc[2][tid]=dufld[2]; \ red_acc[3][tid]=dufld[3]; \ red_acc[4][tid]=dufld[4]; \ red_acc[5][tid]=dufld[5]; \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ simdsync(); \ if (offset < s) { \ for (int r=0; r<6; r++) \ red_acc[r][tid] += red_acc[r][tid+s]; \ } \ } \ dufld[0]=red_acc[0][tid]; \ dufld[1]=red_acc[1][tid]; \ dufld[2]=red_acc[2][tid]; \ dufld[3]=red_acc[3][tid]; \ dufld[4]=red_acc[4][tid]; \ dufld[5]=red_acc[5][tid]; \ } \ if (offset==0 && ii1) { \ red_acc[0][tid]=_fieldp[0]; \ red_acc[1][tid]=_fieldp[1]; \ red_acc[2][tid]=_fieldp[2]; \ red_acc[3][tid]=_fieldp[3]; \ red_acc[4][tid]=_fieldp[4]; \ red_acc[5][tid]=_fieldp[5]; \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ simdsync(); \ if (offset < s) { \ for (int r=0; r<6; r++) \ red_acc[r][tid] += red_acc[r][tid+s]; \ } \ } \ _fieldp[0]=red_acc[0][tid]; \ _fieldp[1]=red_acc[1][tid]; \ _fieldp[2]=red_acc[2][tid]; \ _fieldp[3]=red_acc[3][tid]; \ _fieldp[4]=red_acc[4][tid]; \ _fieldp[5]=red_acc[5][tid]; \ } \ if (offset==0 && ii1) { \ simd_reduce_add3(t_per_atom, red_acc, offset, tid, f.x, f.y, f.z); \ if (EVFLAG && (vflag==2 || eflag==2)) { \ if (eflag) { \ simdsync(); \ simd_reduce_add2(t_per_atom, red_acc, offset, tid, energy, e_coul); \ } \ if (vflag) { \ simdsync(); \ simd_reduce_arr(6, t_per_atom, red_acc, offset, tid, virial); \ } \ } \ } \ if (offset==0 && ii1) { \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ tq.x += shfl_down(tq.x, s, t_per_atom); \ tq.y += shfl_down(tq.y, s, t_per_atom); \ tq.z += shfl_down(tq.z, s, t_per_atom); \ } \ } \ if (offset==0 && ii1) { \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ ufld[0] += shfl_down(ufld[0], s, t_per_atom); \ ufld[1] += shfl_down(ufld[1], s, t_per_atom); \ ufld[2] += shfl_down(ufld[2], s, t_per_atom); \ dufld[0] += shfl_down(dufld[0], s, t_per_atom); \ dufld[1] += shfl_down(dufld[1], s, t_per_atom); \ dufld[2] += shfl_down(dufld[2], s, t_per_atom); \ dufld[3] += shfl_down(dufld[3], s, t_per_atom); \ dufld[4] += shfl_down(dufld[4], s, t_per_atom); \ dufld[5] += shfl_down(dufld[5], s, t_per_atom); \ } \ } \ if (offset==0 && ii1) { \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ _fieldp[0] += shfl_down(_fieldp[0], s, t_per_atom); \ _fieldp[1] += shfl_down(_fieldp[1], s, t_per_atom); \ _fieldp[2] += shfl_down(_fieldp[2], s, t_per_atom); \ _fieldp[3] += shfl_down(_fieldp[3], s, t_per_atom); \ _fieldp[4] += shfl_down(_fieldp[4], s, t_per_atom); \ _fieldp[5] += shfl_down(_fieldp[5], s, t_per_atom); \ } \ } \ if (offset==0 && ii1) { \ simd_reduce_add3(t_per_atom, f.x, f.y, f.z); \ if (vflag==2 || eflag==2) { \ if (eflag) \ simd_reduce_add2(t_per_atom,energy,e_coul); \ if (vflag) \ simd_reduce_arr(6, t_per_atom,virial); \ } \ } \ if (offset==0 && ii 1; active_subgs /= vwidth) { \ if (active_subgs < BLOCK_SIZE_X/simd_size()) __syncthreads(); \ if (bnum < active_subgs) { \ if (eflag) { \ simd_reduce_add2(vwidth, energy, e_coul); \ if (voffset==0) { \ red_acc[6][bnum] = energy; \ red_acc[7][bnum] = e_coul; \ } \ } \ if (vflag) { \ simd_reduce_arr(6, vwidth, virial); \ if (voffset==0) \ for (int r=0; r<6; r++) red_acc[r][bnum]=virial[r]; \ } \ } \ \ __syncthreads(); \ if (tid < active_subgs) { \ if (eflag) { \ energy = red_acc[6][tid]; \ e_coul = red_acc[7][tid]; \ } \ if (vflag) \ for (int r = 0; r < 6; r++) virial[r] = red_acc[r][tid]; \ } else { \ if (eflag) energy = e_coul = (acctyp)0; \ if (vflag) for (int r = 0; r < 6; r++) virial[r] = (acctyp)0; \ } \ } \ \ if (bnum == 0) { \ int ei=BLOCK_ID_X; \ if (eflag) { \ simd_reduce_add2(vwidth, energy, e_coul); \ if (tid==0) { \ engv[ei]+=energy*(acctyp)0.5; \ ei+=ev_stride; \ engv[ei]+=e_coul*(acctyp)0.5; \ ei+=ev_stride; \ } \ } \ if (vflag) { \ simd_reduce_arr(6, vwidth, virial); \ if (tid==0) { \ for (int r=0; r<6; r++) { \ engv[ei]+=virial[r]*(acctyp)0.5; \ ei+=ev_stride; \ } \ } \ } \ } \ } else if (offset==0 && ii1) \ simd_reduce_add3(t_per_atom, f.x, f.y, f.z); \ if (offset==0 && ii cut2) { numtyp r3 = r2 * r; numtyp r4 = r2 * r2; numtyp r5 = r2 * r3; numtyp taper = c5*r5 + c4*r4 + c3*r3 + c2*r2 + c1*r + c0; numtyp dtaper = (numtyp)5.0*c5*r4 + (numtyp)4.0*c4*r3 + (numtyp)3.0*c3*r2 + (numtyp)2.0*c2*r + c1; dtaper *= e * rr1; e *= taper; frcx = frcx*taper - dtaper*xr; frcy = frcy*taper - dtaper*yr; frcz = frcz*taper - dtaper*zr; ttmix *= taper; ttmiy *= taper; ttmiz *= taper; } energy += e; // increment force-based gradient and torque on atom I f.x -= frcx; f.y -= frcy; f.z -= frcz; tq.x += ttmix; tq.y += ttmiy; tq.z += ttmiz; // increment the internal virial tensor components if (EVFLAG && vflag) { numtyp vxx = -xr * frcx; numtyp vxy = (numtyp)-0.5 * (yr*frcx+xr*frcy); numtyp vxz = (numtyp)-0.5 * (zr*frcx+xr*frcz); numtyp vyy = -yr * frcy; numtyp vyz = (numtyp)-0.5 * (zr*frcy+yr*frcz); numtyp vzz = -zr * frcz; virial[0] -= vxx; virial[1] -= vyy; virial[2] -= vzz; virial[3] -= vxy; virial[4] -= vxz; virial[5] -= vyz; } } // nbor } // ii (numtyp)0.0) alsq2n = (numtyp)1.0 / (MY_PIS*aewald); int m; for (m = 1; m < 6; m++) { numtyp bfac = (numtyp) (m+m-1); alsq2n = alsq2 * alsq2n; bn[m] = (bfac*bn[m-1]+alsq2n*exp2a) * r2inv; } for (m = 0; m < 6; m++) bn[m] *= felec; numtyp term1,term2,term3; numtyp term4,term5,term6; term1 = corei*corek; numtyp term1i = corek*vali; numtyp term2i = corek*dir; numtyp term3i = corek*qir; numtyp term1k = corei*valk; numtyp term2k = -corei*dkr; numtyp term3k = corei*qkr; numtyp term1ik = vali*valk; numtyp term2ik = valk*dir - vali*dkr + dik; numtyp term3ik = vali*qkr + valk*qir - dir*dkr + 2.0*(dkqi-diqk+qiqk); numtyp term4ik = dir*qkr - dkr*qir - 4.0*qik; numtyp term5ik = qir*qkr; numtyp dmpi[9],dmpj[9]; numtyp dmpij[11]; damppole(r,11,alphai,alphak,dmpi,dmpj,dmpij); numtyp scalek = factor_mpole; numtyp rr1i = bn[0] - ((numtyp)1.0-scalek*dmpi[0])*rr1; numtyp rr3i = bn[1] - ((numtyp)1.0-scalek*dmpi[2])*rr3; numtyp rr5i = bn[2] - ((numtyp)1.0-scalek*dmpi[4])*rr5; numtyp rr7i = bn[3] - ((numtyp)1.0-scalek*dmpi[6])*rr7; numtyp rr1k = bn[0] - ((numtyp)1.0-scalek*dmpj[0])*rr1; numtyp rr3k = bn[1] - ((numtyp)1.0-scalek*dmpj[2])*rr3; numtyp rr5k = bn[2] - ((numtyp)1.0-scalek*dmpj[4])*rr5; numtyp rr7k = bn[3] - ((numtyp)1.0-scalek*dmpj[6])*rr7; numtyp rr1ik = bn[0] - ((numtyp)1.0-scalek*dmpij[0])*rr1; numtyp rr3ik = bn[1] - ((numtyp)1.0-scalek*dmpij[2])*rr3; numtyp rr5ik = bn[2] - ((numtyp)1.0-scalek*dmpij[4])*rr5; numtyp rr7ik = bn[3] - ((numtyp)1.0-scalek*dmpij[6])*rr7; numtyp rr9ik = bn[4] - ((numtyp)1.0-scalek*dmpij[8])*rr9; numtyp rr11ik = bn[5] - ((numtyp)1.0-scalek*dmpij[10])*rr11; rr1 = bn[0] - ((numtyp)1.0-scalek)*rr1; rr3 = bn[1] - ((numtyp)1.0-scalek)*rr3; numtyp e = term1*rr1 + term4ik*rr7ik + term5ik*rr9ik + term1i*rr1i + term1k*rr1k + term1ik*rr1ik + term2i*rr3i + term2k*rr3k + term2ik*rr3ik + term3i*rr5i + term3k*rr5k + term3ik*rr5ik; // find damped multipole intermediates for force and torque numtyp de = term1*rr3 + term4ik*rr9ik + term5ik*rr11ik + term1i*rr3i + term1k*rr3k + term1ik*rr3ik + term2i*rr5i + term2k*rr5k + term2ik*rr5ik + term3i*rr7i + term3k*rr7k + term3ik*rr7ik; term1 = -corek*rr3i - valk*rr3ik + dkr*rr5ik - qkr*rr7ik; term2 = corei*rr3k + vali*rr3ik + dir*rr5ik + qir*rr7ik; term3 = (numtyp)2.0 * rr5ik; term4 = (numtyp)-2.0 * (corek*rr5i+valk*rr5ik - dkr*rr7ik+qkr*rr9ik); term5 = (numtyp)-2.0 * (corei*rr5k+vali*rr5ik + dir*rr7ik+qir*rr9ik); term6 = (numtyp)4.0 * rr7ik; rr3 = rr3ik; energy += e; // compute the force components for this interaction numtyp frcx = de*xr + term1*dix + term2*dkx + term3*(diqkx-dkqix) + term4*qix + term5*qkx + term6*(qixk+qkxi); numtyp frcy = de*yr + term1*diy + term2*dky + term3*(diqky-dkqiy) + term4*qiy + term5*qky + term6*(qiyk+qkyi); numtyp frcz = de*zr + term1*diz + term2*dkz + term3*(diqkz-dkqiz) + term4*qiz + term5*qkz + term6*(qizk+qkzi); // compute the torque components for this interaction numtyp ttmix = -rr3*dikx + term1*dirx + term3*(dqikx+dkqirx) - term4*qirx - term6*(qikrx+qikx); numtyp ttmiy = -rr3*diky + term1*diry + term3*(dqiky+dkqiry) - term4*qiry - term6*(qikry+qiky); numtyp ttmiz = -rr3*dikz + term1*dirz + term3*(dqikz+dkqirz) - term4*qirz - term6*(qikrz+qikz); // increment force-based gradient and torque on first site f.x -= frcx; f.y -= frcy; f.z -= frcz; tq.x += ttmix; tq.y += ttmiy; tq.z += ttmiz; if (EVFLAG && vflag) { numtyp vxx = -xr * frcx; numtyp vxy = (numtyp)-0.5 * (yr*frcx+xr*frcy); numtyp vxz = (numtyp)-0.5 * (zr*frcx+xr*frcz); numtyp vyy = -yr * frcy; numtyp vyz = (numtyp)-0.5 * (zr*frcy+yr*frcz); numtyp vzz = -zr * frcz; virial[0] -= vxx; virial[1] -= vyy; virial[2] -= vzz; virial[3] -= vxy; virial[4] -= vxz; virial[5] -= vyz; } } // nbor } // ii (numtyp)0.0) aesq2n = (numtyp)1.0 / (MY_PIS*aewald); for ( ; nbor (numtyp)0.0) aesq2n = (numtyp)1.0 / (MY_PIS*aewald); for ( ; nbor (numtyp)0.0) alsq2n = (numtyp)1.0 / (MY_PIS*aewald); for (m = 1; m <= 4; m++) { numtyp bfac = (numtyp) (m+m-1); alsq2n = alsq2 * alsq2n; bn[m] = (bfac*bn[m-1]+alsq2n*exp2a) * r2inv; } for (m = 0; m < 5; m++) bn[m] *= felec; // apply charge penetration damping to scale factors numtyp corek = coeff_amclass[jtype].z; // pcore[jclass]; numtyp alphak = coeff_amclass[jtype].w; // palpha[jclass]; numtyp valk = polar6[j].x; numtyp dmpi[9],dmpk[9]; numtyp dmpik[9]; damppole(r,9,alphai,alphak,dmpi,dmpk,dmpik); numtyp rr3core = bn[1] - ((numtyp)1.0-factor_dscale)*rr3; numtyp rr5core = bn[2] - ((numtyp)1.0-factor_dscale)*rr5; numtyp rr3i = bn[1] - ((numtyp)1.0-factor_dscale*dmpi[2])*rr3; numtyp rr5i = bn[2] - ((numtyp)1.0-factor_dscale*dmpi[4])*rr5; numtyp rr7i = bn[3] - ((numtyp)1.0-factor_dscale*dmpi[6])*rr7; numtyp rr9i = bn[4] - ((numtyp)1.0-factor_dscale*dmpi[8])*rr9; numtyp rr3k = bn[1] - ((numtyp)1.0-factor_dscale*dmpk[2])*rr3; numtyp rr5k = bn[2] - ((numtyp)1.0-factor_dscale*dmpk[4])*rr5; numtyp rr7k = bn[3] - ((numtyp)1.0-factor_dscale*dmpk[6])*rr7; numtyp rr9k = bn[4] - ((numtyp)1.0-factor_dscale*dmpk[8])*rr9; numtyp rr5ik = bn[2] - ((numtyp)1.0-factor_wscale*dmpik[4])*rr5; numtyp rr7ik = bn[3] - ((numtyp)1.0-factor_wscale*dmpik[6])*rr7; // get the induced dipole field used for dipole torques numtyp tix3 = (numtyp)2.0*rr3i*ukx; numtyp tiy3 = (numtyp)2.0*rr3i*uky; numtyp tiz3 = (numtyp)2.0*rr3i*ukz; numtyp tuir = (numtyp)-2.0*rr5i*ukr; ufld[0] += tix3 + xr*tuir; ufld[1] += tiy3 + yr*tuir; ufld[2] += tiz3 + zr*tuir; // get induced dipole field gradient used for quadrupole torques numtyp tix5 = (numtyp)4.0 * (rr5i*ukx); numtyp tiy5 = (numtyp)4.0 * (rr5i*uky); numtyp tiz5 = (numtyp)4.0 * (rr5i*ukz); tuir = (numtyp)-2.0*rr7i*ukr; dufld[0] += xr*tix5 + xr*xr*tuir; dufld[1] += xr*tiy5 + yr*tix5 + (numtyp)2.0*xr*yr*tuir; dufld[2] += yr*tiy5 + yr*yr*tuir; dufld[3] += xr*tiz5 + zr*tix5 + (numtyp)2.0*xr*zr*tuir; dufld[4] += yr*tiz5 + zr*tiy5 + (numtyp)2.0*yr*zr*tuir; dufld[5] += zr*tiz5 + zr*zr*tuir; // get the field gradient for direct polarization force numtyp term1i,term2i,term3i,term4i,term5i,term6i,term7i,term8i; numtyp term1k,term2k,term3k,term4k,term5k,term6k,term7k,term8k; numtyp term1core; numtyp tixx,tiyy,tizz,tixy,tixz,tiyz; numtyp tkxx,tkyy,tkzz,tkxy,tkxz,tkyz; term1i = rr3i - rr5i*xr*xr; term1core = rr3core - rr5core*xr*xr; term2i = (numtyp)2.0*rr5i*xr ; term3i = rr7i*xr*xr - rr5i; term4i = (numtyp)2.0*rr5i; term5i = (numtyp)5.0*rr7i*xr; term6i = rr9i*xr*xr; term1k = rr3k - rr5k*xr*xr; term2k = (numtyp)2.0*rr5k*xr; term3k = rr7k*xr*xr - rr5k; term4k = (numtyp)2.0*rr5k; term5k = (numtyp)5.0*rr7k*xr; term6k = rr9k*xr*xr; tixx = vali*term1i + corei*term1core + dix*term2i - dir*term3i - qixx*term4i + qix*term5i - qir*term6i + (qiy*yr+qiz*zr)*rr7i; tkxx = valk*term1k + corek*term1core - dkx*term2k + dkr*term3k - qkxx*term4k + qkx*term5k - qkr*term6k + (qky*yr+qkz*zr)*rr7k; term1i = rr3i - rr5i*yr*yr; term1core = rr3core - rr5core*yr*yr; term2i = (numtyp)2.0*rr5i*yr; term3i = rr7i*yr*yr - rr5i; term4i = (numtyp)2.0*rr5i; term5i = (numtyp)5.0*rr7i*yr; term6i = rr9i*yr*yr; term1k = rr3k - rr5k*yr*yr; term2k = (numtyp)2.0*rr5k*yr; term3k = rr7k*yr*yr - rr5k; term4k = (numtyp)2.0*rr5k; term5k = (numtyp)5.0*rr7k*yr; term6k = rr9k*yr*yr; tiyy = vali*term1i + corei*term1core + diy*term2i - dir*term3i - qiyy*term4i + qiy*term5i - qir*term6i + (qix*xr+qiz*zr)*rr7i; tkyy = valk*term1k + corek*term1core - dky*term2k + dkr*term3k - qkyy*term4k + qky*term5k - qkr*term6k + (qkx*xr+qkz*zr)*rr7k; term1i = rr3i - rr5i*zr*zr; term1core = rr3core - rr5core*zr*zr; term2i = (numtyp)2.0*rr5i*zr; term3i = rr7i*zr*zr - rr5i; term4i = (numtyp)2.0*rr5i; term5i = (numtyp)5.0*rr7i*zr; term6i = rr9i*zr*zr; term1k = rr3k - rr5k*zr*zr; term2k = (numtyp)2.0*rr5k*zr; term3k = rr7k*zr*zr - rr5k; term4k = (numtyp)2.0*rr5k; term5k = (numtyp)5.0*rr7k*zr; term6k = rr9k*zr*zr; tizz = vali*term1i + corei*term1core + diz*term2i - dir*term3i - qizz*term4i + qiz*term5i - qir*term6i + (qix*xr+qiy*yr)*rr7i; tkzz = valk*term1k + corek*term1core - dkz*term2k + dkr*term3k - qkzz*term4k + qkz*term5k - qkr*term6k + (qkx*xr+qky*yr)*rr7k; term2i = rr5i*xr ; term1i = yr * term2i; term1core = rr5core*xr*yr; term3i = rr5i*yr; term4i = yr * (rr7i*xr); term5i = (numtyp)2.0*rr5i; term6i = (numtyp)2.0*rr7i*xr; term7i = (numtyp)2.0*rr7i*yr; term8i = yr*rr9i*xr; term2k = rr5k*xr; term1k = yr * term2k; term3k = rr5k*yr; term4k = yr * (rr7k*xr); term5k = (numtyp)2.0*rr5k; term6k = (numtyp)2.0*rr7k*xr; term7k = (numtyp)2.0*rr7k*yr; term8k = yr*rr9k*xr; tixy = -vali*term1i - corei*term1core + diy*term2i + dix*term3i - dir*term4i - qixy*term5i + qiy*term6i + qix*term7i - qir*term8i; tkxy = -valk*term1k - corek*term1core - dky*term2k - dkx*term3k + dkr*term4k - qkxy*term5k + qky*term6k + qkx*term7k - qkr*term8k; term2i = rr5i*xr; term1i = zr * term2i; term1core = rr5core*xr*zr; term3i = rr5i*zr; term4i = zr * (rr7i*xr); term5i = (numtyp)2.0*rr5i; term6i = (numtyp)2.0*rr7i*xr; term7i = (numtyp)2.0*rr7i*zr; term8i = zr*rr9i*xr; term2k = rr5k*xr; term1k = zr * term2k; term3k = rr5k*zr; term4k = zr * (rr7k*xr); term5k = (numtyp)2.0*rr5k; term6k = (numtyp)2.0*rr7k*xr; term7k = (numtyp)2.0*rr7k*zr; term8k = zr*rr9k*xr; tixz = -vali*term1i - corei*term1core + diz*term2i + dix*term3i - dir*term4i - qixz*term5i + qiz*term6i + qix*term7i - qir*term8i; tkxz = -valk*term1k - corek*term1core - dkz*term2k - dkx*term3k + dkr*term4k - qkxz*term5k + qkz*term6k + qkx*term7k - qkr*term8k; term2i = rr5i*yr; term1i = zr * term2i; term1core = rr5core*yr*zr; term3i = rr5i*zr; term4i = zr * (rr7i*yr); term5i = (numtyp)2.0*rr5i; term6i = (numtyp)2.0*rr7i*yr; term7i = (numtyp)2.0*rr7i*zr; term8i = zr*rr9i*yr; term2k = rr5k*yr; term1k = zr * term2k; term3k = rr5k*zr; term4k = zr * (rr7k*yr); term5k = (numtyp)2.0*rr5k; term6k = (numtyp)2.0*rr7k*yr; term7k = (numtyp)2.0*rr7k*zr; term8k = zr*rr9k*yr; tiyz = -vali*term1i - corei*term1core + diz*term2i + diy*term3i - dir*term4i - qiyz*term5i + qiz*term6i + qiy*term7i - qir*term8i; tkyz = -valk*term1k - corek*term1core - dkz*term2k - dky*term3k + dkr*term4k - qkyz*term5k + qkz*term6k + qky*term7k - qkr*term8k; numtyp depx = tixx*ukx + tixy*uky + tixz*ukz - tkxx*uix - tkxy*uiy - tkxz*uiz; numtyp depy = tixy*ukx + tiyy*uky + tiyz*ukz - tkxy*uix - tkyy*uiy - tkyz*uiz; numtyp depz = tixz*ukx + tiyz*uky + tizz*ukz - tkxz*uix - tkyz*uiy - tkzz*uiz; numtyp frcx = (numtyp)-2.0 * depx; numtyp frcy = (numtyp)-2.0 * depy; numtyp frcz = (numtyp)-2.0 * depz; numtyp term1,term2,term3; // get the dEp/dR terms used for direct polarization force // poltyp == MUTUAL && hippo // tixx and tkxx term1 = (numtyp)2.0 * rr5ik; term2 = term1*xr; term3 = rr5ik - rr7ik*xr*xr; tixx = uix*term2 + uir*term3; tkxx = ukx*term2 + ukr*term3; // tiyy and tkyy term2 = term1*yr; term3 = rr5ik - rr7ik*yr*yr; tiyy = uiy*term2 + uir*term3; tkyy = uky*term2 + ukr*term3; // tiz and tkzz term2 = term1*zr; term3 = rr5ik - rr7ik*zr*zr; tizz = uiz*term2 + uir*term3; tkzz = ukz*term2 + ukr*term3; // tixy and tkxy term1 = rr5ik*yr; term2 = rr5ik*xr; term3 = yr * (rr7ik*xr); tixy = uix*term1 + uiy*term2 - uir*term3; tkxy = ukx*term1 + uky*term2 - ukr*term3; // tixx and tkxx term1 = rr5ik * zr; term3 = zr * (rr7ik*xr); tixz = uix*term1 + uiz*term2 - uir*term3; tkxz = ukx*term1 + ukz*term2 - ukr*term3; // tiyz and tkyz term2 = rr5ik*yr; term3 = zr * (rr7ik*yr); tiyz = uiy*term1 + uiz*term2 - uir*term3; tkyz = uky*term1 + ukz*term2 - ukr*term3; depx = tixx*ukxp + tixy*ukyp + tixz*ukzp + tkxx*uixp + tkxy*uiyp + tkxz*uizp; depy = tixy*ukxp + tiyy*ukyp + tiyz*ukzp + tkxy*uixp + tkyy*uiyp + tkyz*uizp; depz = tixz*ukxp + tiyz*ukyp + tizz*ukzp + tkxz*uixp + tkyz*uiyp + tkzz*uizp; frcx = frcx - depx; frcy = frcy - depy; frcz = frcz - depz; f.x += frcx; f.y += frcy; f.z += frcz; if (EVFLAG && vflag) { numtyp vxx = xr * frcx; numtyp vxy = (numtyp)0.5 * (yr*frcx+xr*frcy); numtyp vxz = (numtyp)0.5 * (zr*frcx+xr*frcz); numtyp vyy = yr * frcy; numtyp vyz = (numtyp)0.5 * (zr*frcy+yr*frcz); numtyp vzz = zr * frcz; virial[0] -= vxx; virial[1] -= vyy; virial[2] -= vzz; virial[3] -= vxy; virial[4] -= vxz; virial[5] -= vyz; } } // nbor } // ii> SBBITS & 3; j = sj & NEIGHMASK; tagint jtag = tag[j]; if (!which) { int offset=ii; for (int k=0; k