// ************************************************************************** // amoeba.cu // ------------------- // Trung Dac Nguyen (Northwestern) // // Device code for acceleration of the amoeba pair style // // __________________________________________________________________________ // This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) // __________________________________________________________________________ // // begin : // email : trung.nguyen@northwestern.edu // *************************************************************************** #if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifdef LAMMPS_SMALLBIG #define tagint int #endif #ifdef LAMMPS_BIGBIG #include "inttypes.h" #define tagint int64_t #endif #ifdef LAMMPS_SMALLSMALL #define tagint int #endif #ifndef _DOUBLE_DOUBLE _texture( pos_tex,float4); _texture( q_tex,float); #else _texture_2d( pos_tex,int4); _texture( q_tex,int2); #endif #else #define pos_tex x_ #define q_tex q_ #ifdef LAMMPS_SMALLBIG #define tagint int #endif #ifdef LAMMPS_BIGBIG #define tagint long #endif #ifdef LAMMPS_SMALLSMALL #define tagint int #endif #endif // defined(NV_KERNEL) || defined(USE_HIP) #if (SHUFFLE_AVAIL == 0) #define local_allocate_store_ufld() \ __local acctyp red_acc[6][BLOCK_PAIR]; #define store_answers_amoeba_tq(tq, ii, inum,tid, t_per_atom, offset, i, \ tep) \ if (t_per_atom>1) { \ red_acc[0][tid]=tq.x; \ red_acc[1][tid]=tq.y; \ red_acc[2][tid]=tq.z; \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ simdsync(); \ if (offset < s) { \ for (int r=0; r<3; r++) \ red_acc[r][tid] += red_acc[r][tid+s]; \ } \ } \ tq.x=red_acc[0][tid]; \ tq.y=red_acc[1][tid]; \ tq.z=red_acc[2][tid]; \ } \ if (offset==0 && ii1) { \ red_acc[0][tid]=ufld[0]; \ red_acc[1][tid]=ufld[1]; \ red_acc[2][tid]=ufld[2]; \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ simdsync(); \ if (offset < s) { \ for (int r=0; r<3; r++) \ red_acc[r][tid] += red_acc[r][tid+s]; \ } \ } \ ufld[0]=red_acc[0][tid]; \ ufld[1]=red_acc[1][tid]; \ ufld[2]=red_acc[2][tid]; \ red_acc[0][tid]=dufld[0]; \ red_acc[1][tid]=dufld[1]; \ red_acc[2][tid]=dufld[2]; \ red_acc[3][tid]=dufld[3]; \ red_acc[4][tid]=dufld[4]; \ red_acc[5][tid]=dufld[5]; \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ simdsync(); \ if (offset < s) { \ for (int r=0; r<6; r++) \ red_acc[r][tid] += red_acc[r][tid+s]; \ } \ } \ dufld[0]=red_acc[0][tid]; \ dufld[1]=red_acc[1][tid]; \ dufld[2]=red_acc[2][tid]; \ dufld[3]=red_acc[3][tid]; \ dufld[4]=red_acc[4][tid]; \ dufld[5]=red_acc[5][tid]; \ } \ if (offset==0 && ii1) { \ red_acc[0][tid]=_fieldp[0]; \ red_acc[1][tid]=_fieldp[1]; \ red_acc[2][tid]=_fieldp[2]; \ red_acc[3][tid]=_fieldp[3]; \ red_acc[4][tid]=_fieldp[4]; \ red_acc[5][tid]=_fieldp[5]; \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ simdsync(); \ if (offset < s) { \ for (int r=0; r<6; r++) \ red_acc[r][tid] += red_acc[r][tid+s]; \ } \ } \ _fieldp[0]=red_acc[0][tid]; \ _fieldp[1]=red_acc[1][tid]; \ _fieldp[2]=red_acc[2][tid]; \ _fieldp[3]=red_acc[3][tid]; \ _fieldp[4]=red_acc[4][tid]; \ _fieldp[5]=red_acc[5][tid]; \ } \ if (offset==0 && ii1) { \ simd_reduce_add3(t_per_atom, red_acc, offset, tid, f.x, f.y, f.z); \ if (EVFLAG && (vflag==2 || eflag==2)) { \ if (eflag) { \ simdsync(); \ simd_reduce_add2(t_per_atom, red_acc, offset, tid, energy, e_coul); \ } \ if (vflag) { \ simdsync(); \ simd_reduce_arr(6, t_per_atom, red_acc, offset, tid, virial); \ } \ } \ } \ if (offset==0 && ii1) { \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ tq.x += shfl_down(tq.x, s, t_per_atom); \ tq.y += shfl_down(tq.y, s, t_per_atom); \ tq.z += shfl_down(tq.z, s, t_per_atom); \ } \ } \ if (offset==0 && ii1) { \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ ufld[0] += shfl_down(ufld[0], s, t_per_atom); \ ufld[1] += shfl_down(ufld[1], s, t_per_atom); \ ufld[2] += shfl_down(ufld[2], s, t_per_atom); \ dufld[0] += shfl_down(dufld[0], s, t_per_atom); \ dufld[1] += shfl_down(dufld[1], s, t_per_atom); \ dufld[2] += shfl_down(dufld[2], s, t_per_atom); \ dufld[3] += shfl_down(dufld[3], s, t_per_atom); \ dufld[4] += shfl_down(dufld[4], s, t_per_atom); \ dufld[5] += shfl_down(dufld[5], s, t_per_atom); \ } \ } \ if (offset==0 && ii1) { \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ _fieldp[0] += shfl_down(_fieldp[0], s, t_per_atom); \ _fieldp[1] += shfl_down(_fieldp[1], s, t_per_atom); \ _fieldp[2] += shfl_down(_fieldp[2], s, t_per_atom); \ _fieldp[3] += shfl_down(_fieldp[3], s, t_per_atom); \ _fieldp[4] += shfl_down(_fieldp[4], s, t_per_atom); \ _fieldp[5] += shfl_down(_fieldp[5], s, t_per_atom); \ } \ } \ if (offset==0 && ii1) { \ simd_reduce_add3(t_per_atom, f.x, f.y, f.z); \ if (vflag==2 || eflag==2) { \ if (eflag) \ simd_reduce_add2(t_per_atom,energy,e_coul); \ if (vflag) \ simd_reduce_arr(6, t_per_atom,virial); \ } \ } \ if (offset==0 && ii 1; active_subgs /= vwidth) { \ if (active_subgs < BLOCK_SIZE_X/simd_size()) __syncthreads(); \ if (bnum < active_subgs) { \ if (eflag) { \ simd_reduce_add2(vwidth, energy, e_coul); \ if (voffset==0) { \ red_acc[6][bnum] = energy; \ red_acc[7][bnum] = e_coul; \ } \ } \ if (vflag) { \ simd_reduce_arr(6, vwidth, virial); \ if (voffset==0) \ for (int r=0; r<6; r++) red_acc[r][bnum]=virial[r]; \ } \ } \ \ __syncthreads(); \ if (tid < active_subgs) { \ if (eflag) { \ energy = red_acc[6][tid]; \ e_coul = red_acc[7][tid]; \ } \ if (vflag) \ for (int r = 0; r < 6; r++) virial[r] = red_acc[r][tid]; \ } else { \ if (eflag) energy = e_coul = (acctyp)0; \ if (vflag) for (int r = 0; r < 6; r++) virial[r] = (acctyp)0; \ } \ } \ \ if (bnum == 0) { \ int ei=BLOCK_ID_X; \ if (eflag) { \ simd_reduce_add2(vwidth, energy, e_coul); \ if (tid==0) { \ engv[ei]+=energy*(acctyp)0.5; \ ei+=ev_stride; \ engv[ei]+=e_coul*(acctyp)0.5; \ ei+=ev_stride; \ } \ } \ if (vflag) { \ simd_reduce_arr(6, vwidth, virial); \ if (tid==0) { \ for (int r=0; r<6; r++) { \ engv[ei]+=virial[r]*(acctyp)0.5; \ ei+=ev_stride; \ } \ } \ } \ } \ } else if (offset==0 && ii1) \ simd_reduce_add3(t_per_atom, f.x, f.y, f.z); \ if (offset==0 && ii (numtyp)0.0) alsq2n = (numtyp)1.0 / (MY_PIS*aewald); int m; for (m = 1; m < 6; m++) { numtyp bfac = (numtyp) (m+m-1); alsq2n = alsq2 * alsq2n; bn[m] = (bfac*bn[m-1]+alsq2n*exp2a) * r2inv; } for (m = 0; m < 6; m++) bn[m] *= felec; numtyp term1,term2,term3; numtyp term4,term5,term6; term1 = ci*ck; term2 = ck*dir - ci*dkr + dik; term3 = ci*qkr + ck*qir - dir*dkr + (numtyp)2.0*(dkqi-diqk+qiqk); term4 = dir*qkr - dkr*qir - (numtyp)4.0*qik; term5 = qir*qkr; numtyp scalek = (numtyp)1.0 - factor_mpole; rr1 = bn[0] - scalek*rr1; rr3 = bn[1] - scalek*rr3; rr5 = bn[2] - scalek*rr5; rr7 = bn[3] - scalek*rr7; rr9 = bn[4] - scalek*rr9; rr11 = bn[5] - scalek*rr11; numtyp e = term1*rr1 + term2*rr3 + term3*rr5 + term4*rr7 + term5*rr9; // find standard multipole intermediates for force and torque numtyp de = term1*rr3 + term2*rr5 + term3*rr7 + term4*rr9 + term5*rr11; term1 = -ck*rr3 + dkr*rr5 - qkr*rr7; term2 = ci*rr3 + dir*rr5 + qir*rr7; term3 = (numtyp)2.0 * rr5; term4 = (numtyp)2.0 * (-ck*rr5+dkr*rr7-qkr*rr9); term5 = (numtyp)2.0 * (-ci*rr5-dir*rr7-qir*rr9); term6 = (numtyp)4.0 * rr7; energy += e; // compute the force components for this interaction numtyp frcx = de*xr + term1*dix + term2*dkx + term3*(diqkx-dkqix) + term4*qix + term5*qkx + term6*(qixk+qkxi); numtyp frcy = de*yr + term1*diy + term2*dky + term3*(diqky-dkqiy) + term4*qiy + term5*qky + term6*(qiyk+qkyi); numtyp frcz = de*zr + term1*diz + term2*dkz + term3*(diqkz-dkqiz) + term4*qiz + term5*qkz + term6*(qizk+qkzi); // compute the torque components for this interaction numtyp ttmix = -rr3*dikx + term1*dirx + term3*(dqikx+dkqirx) - term4*qirx - term6*(qikrx+qikx); numtyp ttmiy = -rr3*diky + term1*diry + term3*(dqiky+dkqiry) - term4*qiry - term6*(qikry+qiky); numtyp ttmiz = -rr3*dikz + term1*dirz + term3*(dqikz+dkqirz) - term4*qirz - term6*(qikrz+qikz); // increment force-based gradient and torque on first site f.x -= frcx; f.y -= frcy; f.z -= frcz; tq.x += ttmix; tq.y += ttmiy; tq.z += ttmiz; if (EVFLAG && vflag) { numtyp vxx = -xr * frcx; numtyp vxy = (numtyp)-0.5 * (yr*frcx+xr*frcy); numtyp vxz = (numtyp)-0.5 * (zr*frcx+xr*frcz); numtyp vyy = -yr * frcy; numtyp vyz = (numtyp)-0.5 * (zr*frcy+yr*frcz); numtyp vzz = -zr * frcz; virial[0] -= vxx; virial[1] -= vyy; virial[2] -= vzz; virial[3] -= vxy; virial[4] -= vxz; virial[5] -= vyz; } } // nbor } // ii (numtyp)0.0) aesq2n = (numtyp)1.0 / (MY_PIS*aewald); for ( ; nbor (numtyp)0.0) aesq2n = (numtyp)1.0 / (MY_PIS*aewald); for ( ; nbor (numtyp)0.0) alsq2n = (numtyp)1.0 / (MY_PIS*aewald); for (m = 1; m <= 4; m++) { numtyp bfac = (numtyp) (m+m-1); alsq2n = alsq2 * alsq2n; bn[m] = (bfac*bn[m-1]+alsq2n*exp2a) * r2inv; } for (m = 0; m < 5; m++) bn[m] *= felec; // apply Thole polarization damping to scale factors numtyp sc3 = (numtyp)1.0; numtyp sc5 = (numtyp)1.0; numtyp sc7 = (numtyp)1.0; for (k = 0; k < 3; k++) { rc3[k] = (numtyp)0.0; rc5[k] = (numtyp)0.0; rc7[k] = (numtyp)0.0; } // apply Thole polarization damping to scale factors numtyp damp = pdi * coeff[jtype].x; // pdamp[jtype] if (damp != (numtyp)0.0) { numtyp pgamma = MIN(pti,coeff[jtype].y); // thole[jtype] numtyp tmp = r*ucl_recip(damp); damp = pgamma * (tmp*tmp*tmp); if (damp < (numtyp)50.0) { numtyp expdamp = ucl_exp(-damp); sc3 = (numtyp)1.0 - expdamp; sc5 = (numtyp)1.0 - ((numtyp)1.0+damp)*expdamp; sc7 = (numtyp)1.0 - ((numtyp)1.0+damp+(numtyp)0.6*damp*damp) * expdamp; numtyp temp3 = (numtyp)3.0 * damp * expdamp * r2inv; numtyp temp5 = damp; numtyp temp7 = (numtyp)-0.2 + (numtyp)0.6*damp; rc3[0] = xr * temp3; rc3[1] = yr * temp3; rc3[2] = zr * temp3; rc5[0] = rc3[0] * temp5; rc5[1] = rc3[1] * temp5; rc5[2] = rc3[2] * temp5; rc7[0] = rc5[0] * temp7; rc7[1] = rc5[1] * temp7; rc7[2] = rc5[2] * temp7; } psc3 = (numtyp)1.0 - sc3*factor_pscale; psc5 = (numtyp)1.0 - sc5*factor_pscale; psc7 = (numtyp)1.0 - sc7*factor_pscale; dsc3 = (numtyp)1.0 - sc3*factor_dscale; dsc5 = (numtyp)1.0 - sc5*factor_dscale; dsc7 = (numtyp)1.0 - sc7*factor_dscale; usc3 = (numtyp)1.0 - sc3*factor_uscale; usc5 = (numtyp)1.0 - sc5*factor_uscale; psr3 = bn[1] - psc3*rr3; psr5 = bn[2] - psc5*rr5; psr7 = bn[3] - psc7*rr7; dsr3 = bn[1] - dsc3*rr3; dsr5 = bn[2] - dsc5*rr5; dsr7 = bn[3] - dsc7*rr7; usr5 = bn[2] - usc5*rr5; for (k = 0; k < 3; k++) { prc3[k] = rc3[k] * factor_pscale; prc5[k] = rc5[k] * factor_pscale; prc7[k] = rc7[k] * factor_pscale; drc3[k] = rc3[k] * factor_dscale; drc5[k] = rc5[k] * factor_dscale; drc7[k] = rc7[k] * factor_dscale; urc3[k] = rc3[k] * factor_uscale; urc5[k] = rc5[k] * factor_uscale; } } else { // damp == 0: ??? } // get the induced dipole field used for dipole torques numtyp tix3 = psr3*ukx + dsr3*ukxp; numtyp tiy3 = psr3*uky + dsr3*ukyp; numtyp tiz3 = psr3*ukz + dsr3*ukzp; numtyp tuir = -psr5*ukr - dsr5*ukrp; ufld[0] += tix3 + xr*tuir; ufld[1] += tiy3 + yr*tuir; ufld[2] += tiz3 + zr*tuir; // get induced dipole field gradient used for quadrupole torques numtyp tix5 = (numtyp)2.0 * (psr5*ukx+dsr5*ukxp); numtyp tiy5 = (numtyp)2.0 * (psr5*uky+dsr5*ukyp); numtyp tiz5 = (numtyp)2.0 * (psr5*ukz+dsr5*ukzp); tuir = -psr7*ukr - dsr7*ukrp; dufld[0] += xr*tix5 + xr*xr*tuir; dufld[1] += xr*tiy5 + yr*tix5 + (numtyp)2.0*xr*yr*tuir; dufld[2] += yr*tiy5 + yr*yr*tuir; dufld[3] += xr*tiz5 + zr*tix5 + (numtyp)2.0*xr*zr*tuir; dufld[4] += yr*tiz5 + zr*tiy5 + (numtyp)2.0*yr*zr*tuir; dufld[5] += zr*tiz5 + zr*zr*tuir; // get the dEd/dR terms used for direct polarization force term1 = bn[2] - dsc3*rr5; term2 = bn[3] - dsc5*rr7; term3 = -dsr3 + term1*xr*xr - rr3*xr*drc3[0]; term4 = rr3*drc3[0] - term1*xr - dsr5*xr; term5 = term2*xr*xr - dsr5 - rr5*xr*drc5[0]; term6 = (bn[4]-dsc7*rr9)*xr*xr - bn[3] - rr7*xr*drc7[0]; term7 = rr5*drc5[0] - (numtyp)2.0*bn[3]*xr + (dsc5+(numtyp)1.5*dsc7)*rr7*xr; numtyp tixx = ci*term3 + dix*term4 + dir*term5 + (numtyp)2.0*dsr5*qixx + (qiy*yr+qiz*zr)*dsc7*rr7 + (numtyp)2.0*qix*term7 + qir*term6; numtyp tkxx = ck*term3 - dkx*term4 - dkr*term5 + (numtyp)2.0*dsr5*qkxx + (qky*yr+qkz*zr)*dsc7*rr7 + (numtyp)2.0*qkx*term7 + qkr*term6; term3 = -dsr3 + term1*yr*yr - rr3*yr*drc3[1]; term4 = rr3*drc3[1] - term1*yr - dsr5*yr; term5 = term2*yr*yr - dsr5 - rr5*yr*drc5[1]; term6 = (bn[4]-dsc7*rr9)*yr*yr - bn[3] - rr7*yr*drc7[1]; term7 = rr5*drc5[1] - (numtyp)2.0*bn[3]*yr + (dsc5+(numtyp)1.5*dsc7)*rr7*yr; numtyp tiyy = ci*term3 + diy*term4 + dir*term5 + (numtyp)2.0*dsr5*qiyy + (qix*xr+qiz*zr)*dsc7*rr7 + (numtyp)2.0*qiy*term7 + qir*term6; numtyp tkyy = ck*term3 - dky*term4 - dkr*term5 + (numtyp)2.0*dsr5*qkyy + (qkx*xr+qkz*zr)*dsc7*rr7 + (numtyp)2.0*qky*term7 + qkr*term6; term3 = -dsr3 + term1*zr*zr - rr3*zr*drc3[2]; term4 = rr3*drc3[2] - term1*zr - dsr5*zr; term5 = term2*zr*zr - dsr5 - rr5*zr*drc5[2]; term6 = (bn[4]-dsc7*rr9)*zr*zr - bn[3] - rr7*zr*drc7[2]; term7 = rr5*drc5[2] - (numtyp)2.0*bn[3]*zr + (dsc5+(numtyp)1.5*dsc7)*rr7*zr; numtyp tizz = ci*term3 + diz*term4 + dir*term5 + (numtyp)2.0*dsr5*qizz + (qix*xr+qiy*yr)*dsc7*rr7 + (numtyp)2.0*qiz*term7 + qir*term6; numtyp tkzz = ck*term3 - dkz*term4 - dkr*term5 + (numtyp)2.0*dsr5*qkzz + (qkx*xr+qky*yr)*dsc7*rr7 + (numtyp)2.0*qkz*term7 + qkr*term6; term3 = term1*xr*yr - rr3*yr*drc3[0]; term4 = rr3*drc3[0] - term1*xr; term5 = term2*xr*yr - rr5*yr*drc5[0]; term6 = (bn[4]-dsc7*rr9)*xr*yr - rr7*yr*drc7[0]; term7 = rr5*drc5[0] - term2*xr; numtyp tixy = ci*term3 - dsr5*dix*yr + diy*term4 + dir*term5 + (numtyp)2.0*dsr5*qixy - (numtyp)2.0*dsr7*yr*qix + (numtyp)2.0*qiy*term7 + qir*term6; numtyp tkxy = ck*term3 + dsr5*dkx*yr - dky*term4 - dkr*term5 + (numtyp)2.0*dsr5*qkxy - (numtyp)2.0*dsr7*yr*qkx +(numtyp) 2.0*qky*term7 + qkr*term6; term3 = term1*xr*zr - rr3*zr*drc3[0]; term5 = term2*xr*zr - rr5*zr*drc5[0]; term6 = (bn[4]-dsc7*rr9)*xr*zr - rr7*zr*drc7[0]; numtyp tixz = ci*term3 - dsr5*dix*zr + diz*term4 + dir*term5 + (numtyp)2.0*dsr5*qixz - (numtyp)2.0*dsr7*zr*qix + (numtyp)2.0*qiz*term7 + qir*term6; numtyp tkxz = ck*term3 + dsr5*dkx*zr - dkz*term4 - dkr*term5 + (numtyp)2.0*dsr5*qkxz - (numtyp)2.0*dsr7*zr*qkx + (numtyp)2.0*qkz*term7 + qkr*term6; term3 = term1*yr*zr - rr3*zr*drc3[1]; term4 = rr3*drc3[1] - term1*yr; term5 = term2*yr*zr - rr5*zr*drc5[1]; term6 = (bn[4]-dsc7*rr9)*yr*zr - rr7*zr*drc7[1]; term7 = rr5*drc5[1] - term2*yr; numtyp tiyz = ci*term3 - dsr5*diy*zr + diz*term4 + dir*term5 + (numtyp)2.0*dsr5*qiyz - (numtyp)2.0*dsr7*zr*qiy + (numtyp)2.0*qiz*term7 + qir*term6; numtyp tkyz = ck*term3 + dsr5*dky*zr - dkz*term4 - dkr*term5 + (numtyp)2.0*dsr5*qkyz - (numtyp)2.0*dsr7*zr*qky + (numtyp)2.0*qkz*term7 + qkr*term6; numtyp depx = tixx*ukxp + tixy*ukyp + tixz*ukzp - tkxx*uixp - tkxy*uiyp - tkxz*uizp; numtyp depy = tixy*ukxp + tiyy*ukyp + tiyz*ukzp - tkxy*uixp - tkyy*uiyp - tkyz*uizp; numtyp depz = tixz*ukxp + tiyz*ukyp + tizz*ukzp - tkxz*uixp - tkyz*uiyp - tkzz*uizp; numtyp frcx = depx; numtyp frcy = depy; numtyp frcz = depz; // get the dEp/dR terms used for direct polarization force // tixx and tkxx term1 = bn[2] - psc3*rr5; term2 = bn[3] - psc5*rr7; term3 = -psr3 + term1*xr*xr - rr3*xr*prc3[0]; term4 = rr3*prc3[0] - term1*xr - psr5*xr; term5 = term2*xr*xr - psr5 - rr5*xr*prc5[0]; term6 = (bn[4]-psc7*rr9)*xr*xr - bn[3] - rr7*xr*prc7[0]; term7 = rr5*prc5[0] - (numtyp)2.0*bn[3]*xr + (psc5+(numtyp)1.5*psc7)*rr7*xr; tixx = ci*term3 + dix*term4 + dir*term5 + (numtyp)2.0*psr5*qixx + (qiy*yr+qiz*zr)*psc7*rr7 + (numtyp)2.0*qix*term7 + qir*term6; tkxx = ck*term3 - dkx*term4 - dkr*term5 + (numtyp)2.0*psr5*qkxx + (qky*yr+qkz*zr)*psc7*rr7 + (numtyp)2.0*qkx*term7 + qkr*term6; // tiyy and tkyy term3 = -psr3 + term1*yr*yr - rr3*yr*prc3[1]; term4 = rr3*prc3[1] - term1*yr - psr5*yr; term5 = term2*yr*yr - psr5 - rr5*yr*prc5[1]; term6 = (bn[4]-psc7*rr9)*yr*yr - bn[3] - rr7*yr*prc7[1]; term7 = rr5*prc5[1] - (numtyp)2.0*bn[3]*yr + (psc5+(numtyp)1.5*psc7)*rr7*yr; tiyy = ci*term3 + diy*term4 + dir*term5 + (numtyp)2.0*psr5*qiyy + (qix*xr+qiz*zr)*psc7*rr7 + (numtyp)2.0*qiy*term7 + qir*term6; tkyy = ck*term3 - dky*term4 - dkr*term5 + (numtyp)2.0*psr5*qkyy + (qkx*xr+qkz*zr)*psc7*rr7 + (numtyp)2.0*qky*term7 + qkr*term6; // tizz and tkzz term3 = -psr3 + term1*zr*zr - rr3*zr*prc3[2]; term4 = rr3*prc3[2] - term1*zr - psr5*zr; term5 = term2*zr*zr - psr5 - rr5*zr*prc5[2]; term6 = (bn[4]-psc7*rr9)*zr*zr - bn[3] - rr7*zr*prc7[2]; term7 = rr5*prc5[2] - (numtyp)2.0*bn[3]*zr + (psc5+(numtyp)1.5*psc7)*rr7*zr; tizz = ci*term3 + diz*term4 + dir*term5 + (numtyp)2.0*psr5*qizz + (qix*xr+qiy*yr)*psc7*rr7 + (numtyp)2.0*qiz*term7 + qir*term6; tkzz = ck*term3 - dkz*term4 - dkr*term5 + (numtyp)2.0*psr5*qkzz + (qkx*xr+qky*yr)*psc7*rr7 + (numtyp)2.0*qkz*term7 + qkr*term6; // tixy and tkxy term3 = term1*xr*yr - rr3*yr*prc3[0]; term4 = rr3*prc3[0] - term1*xr; term5 = term2*xr*yr - rr5*yr*prc5[0]; term6 = (bn[4]-psc7*rr9)*xr*yr - rr7*yr*prc7[0]; term7 = rr5*prc5[0] - term2*xr; tixy = ci*term3 - psr5*dix*yr + diy*term4 + dir*term5 + (numtyp)2.0*psr5*qixy - (numtyp)2.0*psr7*yr*qix + (numtyp)2.0*qiy*term7 + qir*term6; tkxy = ck*term3 + psr5*dkx*yr - dky*term4 - dkr*term5 + (numtyp)2.0*psr5*qkxy - (numtyp)2.0*psr7*yr*qkx + (numtyp)2.0*qky*term7 + qkr*term6; // tixz and tkxz term3 = term1*xr*zr - rr3*zr*prc3[0]; term5 = term2*xr*zr - rr5*zr*prc5[0]; term6 = (bn[4]-psc7*rr9)*xr*zr - rr7*zr*prc7[0]; tixz = ci*term3 - psr5*dix*zr + diz*term4 + dir*term5 + (numtyp)2.0*psr5*qixz - (numtyp)2.0*psr7*zr*qix + (numtyp)2.0*qiz*term7 + qir*term6; tkxz = ck*term3 + psr5*dkx*zr - dkz*term4 - dkr*term5 + (numtyp)2.0*psr5*qkxz - (numtyp)2.0*psr7*zr*qkx + (numtyp)2.0*qkz*term7 + qkr*term6; // tiyz and tkyz term3 = term1*yr*zr - rr3*zr*prc3[1]; term4 = rr3*prc3[1] - term1*yr; term5 = term2*yr*zr - rr5*zr*prc5[1]; term6 = (bn[4]-psc7*rr9)*yr*zr - rr7*zr*prc7[1]; term7 = rr5*prc5[1] - term2*yr; tiyz = ci*term3 - psr5*diy*zr + diz*term4 + dir*term5 + (numtyp)2.0*psr5*qiyz - (numtyp)2.0*psr7*zr*qiy + (numtyp)2.0*qiz*term7 + qir*term6; tkyz = ck*term3 + psr5*dky*zr - dkz*term4 - dkr*term5 + (numtyp)2.0*psr5*qkyz - (numtyp)2.0*psr7*zr*qky + (numtyp)2.0*qkz*term7 + qkr*term6; depx = tixx*ukx + tixy*uky + tixz*ukz - tkxx*uix - tkxy*uiy - tkxz*uiz; depy = tixy*ukx + tiyy*uky + tiyz*ukz - tkxy*uix - tkyy*uiy - tkyz*uiz; depz = tixz*ukx + tiyz*uky + tizz*ukz - tkxz*uix - tkyz*uiy - tkzz*uiz; frcx = frcx + depx; frcy = frcy + depy; frcz = frcz + depz; // get the dtau/dr terms used for mutual polarization force // poltyp == MUTUAL && amoeba term1 = bn[2] - usc3*rr5; term2 = bn[3] - usc5*rr7; term3 = usr5 + term1; term4 = rr3 * factor_uscale; term5 = -xr*term3 + rc3[0]*term4; term6 = -usr5 + xr*xr*term2 - rr5*xr*urc5[0]; tixx = uix*term5 + uir*term6; tkxx = ukx*term5 + ukr*term6; term5 = -yr*term3 + rc3[1]*term4; term6 = -usr5 + yr*yr*term2 - rr5*yr*urc5[1]; tiyy = uiy*term5 + uir*term6; tkyy = uky*term5 + ukr*term6; term5 = -zr*term3 + rc3[2]*term4; term6 = -usr5 + zr*zr*term2 - rr5*zr*urc5[2]; tizz = uiz*term5 + uir*term6; tkzz = ukz*term5 + ukr*term6; term4 = -usr5 * yr; term5 = -xr*term1 + rr3*urc3[0]; term6 = xr*yr*term2 - rr5*yr*urc5[0]; tixy = uix*term4 + uiy*term5 + uir*term6; tkxy = ukx*term4 + uky*term5 + ukr*term6; term4 = -usr5 * zr; term6 = xr*zr*term2 - rr5*zr*urc5[0]; tixz = uix*term4 + uiz*term5 + uir*term6; tkxz = ukx*term4 + ukz*term5 + ukr*term6; term5 = -yr*term1 + rr3*urc3[1]; term6 = yr*zr*term2 - rr5*zr*urc5[1]; tiyz = uiy*term4 + uiz*term5 + uir*term6; tkyz = uky*term4 + ukz*term5 + ukr*term6; depx = tixx*ukxp + tixy*ukyp + tixz*ukzp + tkxx*uixp + tkxy*uiyp + tkxz*uizp; depy = tixy*ukxp + tiyy*ukyp + tiyz*ukzp + tkxy*uixp + tkyy*uiyp + tkyz*uizp; depz = tixz*ukxp + tiyz*ukyp + tizz*ukzp + tkxz*uixp + tkyz*uiyp + tkzz*uizp; frcx = frcx + depx; frcy = frcy + depy; frcz = frcz + depz; f.x += frcx; f.y += frcy; f.z += frcz; if (EVFLAG && vflag) { numtyp vxx = xr * frcx; numtyp vxy = (numtyp)0.5 * (yr*frcx+xr*frcy); numtyp vxz = (numtyp)0.5 * (zr*frcx+xr*frcz); numtyp vyy = yr * frcy; numtyp vyz = (numtyp)0.5 * (zr*frcy+yr*frcz); numtyp vzz = zr * frcz; virial[0] -= vxx; virial[1] -= vyy; virial[2] -= vzz; virial[3] -= vxy; virial[4] -= vxz; virial[5] -= vyz; } } // nbor } // ii> SBBITS & 3; int j = sj & NEIGHMASK; tagint jtag = tag[j]; if (!which) { int offset=ii; for (int k=0; k