/*************************************************************************** lj_gpu_kernel.cu ------------------- W. Michael Brown Routines that actually perform the force computation __________________________________________________________________________ This file is part of the LAMMPS GPU Library __________________________________________________________________________ begin : Tue Aug 4 2009 copyright : (C) 2009 by W. Michael Brown email : wmbrown@sandia.gov ***************************************************************************/ /* ----------------------------------------------------------------------- Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. ----------------------------------------------------------------------- */ #ifndef LJ_GPU_KERNEL #define LJ_GPU_KERNEL template __global__ void kernel_lj(const numtyp *special_lj, const int *dev_nbor, const int *dev_ij, const int nbor_pitch, acctyp *ans, size_t ans_pitch, const bool eflag, const bool vflag, const int inum, const int nall) { __shared__ numtyp sp_lj[4]; // ii indexes the two interacting particles in gi int ii=threadIdx.x; if (ii<4) sp_lj[ii]=special_lj[ii]; ii+=INT_MUL(blockIdx.x,blockDim.x); __syncthreads(); if (ii(i,0); numtyp iy=_x_(i,1); numtyp iz=_x_(i,2); int itype=_x_(i,3); numtyp factor_lj; for ( ; list(j,3); // Compute r12 numtyp delx = ix-_x_(j,0); numtyp dely = iy-_x_(j,1); numtyp delz = iz-_x_(j,2); numtyp r2inv = delx*delx+dely*dely+delz*delz; if (r2inv<_cutsq_(itype,jtype)) { r2inv=(numtyp)1.0/r2inv; numtyp r6inv =r2inv*r2inv*r2inv; numtyp force =factor_lj*r2inv*r6inv*(_lj1_(itype,jtype).x*r6inv- _lj1_(itype,jtype).y); fx+=delx*force; fy+=dely*force; fz+=delz*force; if (eflag) { numtyp e=r6inv*(_lj3_(itype,jtype).x*r6inv- _lj3_(itype,jtype).y); energy+=factor_lj*(e-_offset_(1,1)); } if (vflag) { virial[0] += delx*delx*force; virial[1] += dely*dely*force; virial[2] += delz*delz*force; virial[3] += delx*dely*force; virial[4] += delx*delz*force; virial[5] += dely*delz*force; } } } // for nbor // Store answers acctyp *ap1=ans+ii; if (eflag) { *ap1=energy; ap1+=ans_pitch; } if (vflag) { for (int i=0; i<6; i++) { *ap1=virial[i]; ap1+=ans_pitch; } } *ap1=fx; ap1+=ans_pitch; *ap1=fy; ap1+=ans_pitch; *ap1=fz; } // if ii } template __global__ void kernel_lj_fast(const numtyp *special_lj, const int *dev_nbor, const int *dev_ij, const int nbor_pitch, acctyp *ans, size_t ans_pitch,const bool eflag, const bool vflag, const int inum, const int nall) { // ii indexes the two interacting particles in gi int ii=threadIdx.x; __shared__ numtyp sp_lj[4]; __shared__ numtyp cutsq[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; __shared__ numtyp lj1[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; __shared__ numtyp lj2[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; __shared__ numtyp lj3[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; __shared__ numtyp lj4[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; __shared__ numtyp offset[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; if (ii<4) sp_lj[ii]=special_lj[ii]; if (ii(itype,jtype); lj1[ii]=_lj1_(itype,jtype).x; lj2[ii]=_lj1_(itype,jtype).y; if (eflag) { lj3[ii]=_lj3_(itype,jtype).x; lj4[ii]=_lj3_(itype,jtype).y; offset[ii]=_offset_(itype,jtype); } } ii+=INT_MUL(blockIdx.x,blockDim.x); __syncthreads(); if (ii(i,0); numtyp iy=_x_(i,1); numtyp iz=_x_(i,2); int itype=INT_MUL(MAX_SHARED_TYPES,_x_(i,3)); numtyp factor_lj; for ( ; list(j,3); // Compute r12 numtyp delx = ix-_x_(j,0); numtyp dely = iy-_x_(j,1); numtyp delz = iz-_x_(j,2); numtyp r2inv = delx*delx+dely*dely+delz*delz; if (r2inv