/* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing authors: Mike Brown (ORNL), brownw@ornl.gov ------------------------------------------------------------------------- */ #ifndef GB_GPU_KERNEL #define GB_GPU_KERNEL #ifdef NV_KERNEL #include "gb_gpu_extra.h" #endif __inline void compute_eta_torque(numtyp m[9],numtyp m2[9], const numtyp4 shape, numtyp ans[9]) { numtyp den = m[3]*m[2]*m[7]-m[0]*m[5]*m[7]- m[2]*m[6]*m[4]+m[1]*m[6]*m[5]- m[3]*m[1]*m[8]+m[0]*m[4]*m[8]; den = (numtyp)1.0/den; ans[0] = shape.x*(m[5]*m[1]*m2[2]+(numtyp)2.0*m[4]*m[8]*m2[0]- m[4]*m2[2]*m[2]-(numtyp)2.0*m[5]*m2[0]*m[7]+ m2[1]*m[2]*m[7]-m2[1]*m[1]*m[8]- m[3]*m[8]*m2[1]+m[6]*m[5]*m2[1]+ m[3]*m2[2]*m[7]-m2[2]*m[6]*m[4])*den; ans[1] = shape.x*(m[2]*m2[0]*m[7]-m[8]*m2[0]*m[1]+ (numtyp)2.0*m[0]*m[8]*m2[1]-m[0]*m2[2]*m[5]- (numtyp)2.0*m[6]*m[2]*m2[1]+m2[2]*m[3]*m[2]- m[8]*m[3]*m2[0]+m[6]*m2[0]*m[5]+ m[6]*m2[2]*m[1]-m2[2]*m[0]*m[7])*den; ans[2] = shape.x*(m[1]*m[5]*m2[0]-m[2]*m2[0]*m[4]- m[0]*m[5]*m2[1]+m[3]*m[2]*m2[1]- m2[1]*m[0]*m[7]-m[6]*m[4]*m2[0]+ (numtyp)2.0*m[4]*m[0]*m2[2]-(numtyp)2.0*m[3]*m2[2]*m[1]+ m[3]*m[7]*m2[0]+m[6]*m2[1]*m[1])*den; ans[3] = shape.y*(-m[4]*m2[5]*m[2]+(numtyp)2.0*m[4]*m[8]*m2[3]+ m[5]*m[1]*m2[5]-(numtyp)2.0*m[5]*m2[3]*m[7]+ m2[4]*m[2]*m[7]-m2[4]*m[1]*m[8]- m[3]*m[8]*m2[4]+m[6]*m[5]*m2[4]- m2[5]*m[6]*m[4]+m[3]*m2[5]*m[7])*den; ans[4] = shape.y*(m[2]*m2[3]*m[7]-m[1]*m[8]*m2[3]+ (numtyp)2.0*m[8]*m[0]*m2[4]-m2[5]*m[0]*m[5]- (numtyp)2.0*m[6]*m2[4]*m[2]-m[3]*m[8]*m2[3]+ m[6]*m[5]*m2[3]+m[3]*m2[5]*m[2]- m[0]*m2[5]*m[7]+m2[5]*m[1]*m[6])*den; ans[5] = shape.y*(m[1]*m[5]*m2[3]-m[2]*m2[3]*m[4]- m[0]*m[5]*m2[4]+m[3]*m[2]*m2[4]+ (numtyp)2.0*m[4]*m[0]*m2[5]-m[0]*m2[4]*m[7]+ m[1]*m[6]*m2[4]-m2[3]*m[6]*m[4]- (numtyp)2.0*m[3]*m[1]*m2[5]+m[3]*m2[3]*m[7])*den; ans[6] = shape.z*(-m[4]*m[2]*m2[8]+m[1]*m[5]*m2[8]+ (numtyp)2.0*m[4]*m2[6]*m[8]-m[1]*m2[7]*m[8]+ m[2]*m[7]*m2[7]-(numtyp)2.0*m2[6]*m[7]*m[5]- m[3]*m2[7]*m[8]+m[5]*m[6]*m2[7]- m[4]*m[6]*m2[8]+m[7]*m[3]*m2[8])*den; ans[7] = shape.z*-(m[1]*m[8]*m2[6]-m[2]*m2[6]*m[7]- (numtyp)2.0*m2[7]*m[0]*m[8]+m[5]*m2[8]*m[0]+ (numtyp)2.0*m2[7]*m[2]*m[6]+m[3]*m2[6]*m[8]- m[3]*m[2]*m2[8]-m[5]*m[6]*m2[6]+ m[0]*m2[8]*m[7]-m2[8]*m[1]*m[6])*den; ans[8] = shape.z*(m[1]*m[5]*m2[6]-m[2]*m2[6]*m[4]- m[0]*m[5]*m2[7]+m[3]*m[2]*m2[7]- m[4]*m[6]*m2[6]-m[7]*m2[7]*m[0]+ (numtyp)2.0*m[4]*m2[8]*m[0]+m[7]*m[3]*m2[6]+ m[6]*m[1]*m2[7]-(numtyp)2.0*m2[8]*m[3]*m[1])*den; } __kernel void kernel_gayberne(__global numtyp4* x_,__global numtyp4 *q, __global numtyp4* shape, __global numtyp4* well, __global numtyp *gum, __global numtyp2* sig_eps, const int ntypes, __global numtyp *lshape, __global int *dev_nbor, const int stride, __global acctyp4 *ans, const int astride, __global acctyp *engv, __global int *err_flag, const int eflag, const int vflag, const int inum, const int nall) { __local numtyp sp_lj[4]; // ii indexes the two interacting particles in gi int ii=THREAD_ID_X; if (ii<4) sp_lj[ii]=gum[ii+3]; ii+=mul24((int)BLOCK_ID_X,(int)BLOCK_SIZE_X); __syncthreads(); if (ii0) energy+=u_r*temp2; numtyp temp1 = -eta*u_r*factor_lj; if (vflag>0) { r12[0]*=-r; r12[1]*=-r; r12[2]*=-r; numtyp ft=temp1*dchi[0]-temp2*dUr[0]; f.x+=ft; virial[0]+=r12[0]*ft; ft=temp1*dchi[1]-temp2*dUr[1]; f.y+=ft; virial[1]+=r12[1]*ft; virial[3]+=r12[0]*ft; ft=temp1*dchi[2]-temp2*dUr[2]; f.z+=ft; virial[2]+=r12[2]*ft; virial[4]+=r12[0]*ft; virial[5]+=r12[1]*ft; } else { f.x+=temp1*dchi[0]-temp2*dUr[0]; f.y+=temp1*dchi[1]-temp2*dUr[1]; f.z+=temp1*dchi[2]-temp2*dUr[2]; } // Torque on 1 temp1 = -u_r*eta*factor_lj; temp2 = -u_r*chi*factor_lj; numtyp temp3 = -chi*eta*factor_lj; tor.x+=temp1*tchi[0]+temp2*teta[0]+temp3*tUr[0]; tor.y+=temp1*tchi[1]+temp2*teta[1]+temp3*tUr[1]; tor.z+=temp1*tchi[2]+temp2*teta[2]+temp3*tUr[2]; } // for nbor // Store answers __global acctyp *ap1=engv+ii; if (eflag>0) { *ap1=energy; ap1+=astride; } if (vflag>0) { for (int i=0; i<6; i++) { *ap1=virial[i]; ap1+=astride; } } ans[ii]=f; ans[ii+astride]=tor; } // if ii } #endif