diff --git a/src/OPT/pair_eam_alloy_opt.cpp b/src/OPT/pair_eam_alloy_opt.cpp index 8bf4acfe04..0deaf8b430 100644 --- a/src/OPT/pair_eam_alloy_opt.cpp +++ b/src/OPT/pair_eam_alloy_opt.cpp @@ -11,6 +11,14 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- + Contributing authors: + James Fischer, High Performance Technologies, Inc. + Charles Cornwell, High Performance Technologies, Inc. + David Richie, Stone Ridge Technology + Vincent Natoli, Stone Ridge Technology +------------------------------------------------------------------------- */ + #include "pair_eam_alloy_opt.h" using namespace LAMMPS_NS; @@ -18,6 +26,8 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- multiple inheritance from two parent classes invoke constructor of grandparent class, then of each parent + inherit optimized compute() from PairEAMOpt + inherit everything else from PairEAMAlloy ------------------------------------------------------------------------- */ PairEAMAlloyOpt::PairEAMAlloyOpt(LAMMPS *lmp) : diff --git a/src/OPT/pair_eam_alloy_opt.h b/src/OPT/pair_eam_alloy_opt.h index 507fa44376..5435f00fa4 100644 --- a/src/OPT/pair_eam_alloy_opt.h +++ b/src/OPT/pair_eam_alloy_opt.h @@ -25,10 +25,6 @@ PairStyle(eam/alloy/opt,PairEAMAlloyOpt) namespace LAMMPS_NS { -// multiple inheritance from two parent classes -// optimized compute() from PairEAMOpt -// everything else from PairEAMAlloy - class PairEAMAlloyOpt : public PairEAMAlloy, public PairEAMOpt { public: PairEAMAlloyOpt(class LAMMPS *); diff --git a/src/OPT/pair_eam_fs_opt.cpp b/src/OPT/pair_eam_fs_opt.cpp index 11e8b222d6..a097eec2b6 100644 --- a/src/OPT/pair_eam_fs_opt.cpp +++ b/src/OPT/pair_eam_fs_opt.cpp @@ -11,6 +11,14 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- + Contributing authors: + James Fischer, High Performance Technologies, Inc. + Charles Cornwell, High Performance Technologies, Inc. + David Richie, Stone Ridge Technology + Vincent Natoli, Stone Ridge Technology +------------------------------------------------------------------------- */ + #include "pair_eam_fs_opt.h" using namespace LAMMPS_NS; @@ -18,6 +26,8 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- multiple inheritance from two parent classes invoke constructor of grandparent class, then of each parent + inherit optimized compute() from PairEAMOpt + inherit everything else from PairEAMFS ------------------------------------------------------------------------- */ PairEAMFSOpt::PairEAMFSOpt(LAMMPS *lmp) : diff --git a/src/OPT/pair_eam_fs_opt.h b/src/OPT/pair_eam_fs_opt.h index 519630880a..b5bfdad4bf 100644 --- a/src/OPT/pair_eam_fs_opt.h +++ b/src/OPT/pair_eam_fs_opt.h @@ -25,10 +25,6 @@ PairStyle(eam/fs/opt,PairEAMFSOpt) namespace LAMMPS_NS { -// multiple inheritance from two parent classes -// optimized compute() from PairEAMOpt -// everything else from PairEAMFS - class PairEAMFSOpt : public PairEAMFS, public PairEAMOpt { public: PairEAMFSOpt(class LAMMPS *); @@ -37,5 +33,4 @@ class PairEAMFSOpt : public PairEAMFS, public PairEAMOpt { } #endif - #endif diff --git a/src/OPT/pair_eam_opt.cpp b/src/OPT/pair_eam_opt.cpp index 43c2d3733e..83a9580b92 100644 --- a/src/OPT/pair_eam_opt.cpp +++ b/src/OPT/pair_eam_opt.cpp @@ -19,10 +19,20 @@ Vincent Natoli, Stone Ridge Technology ------------------------------------------------------------------------- */ +#include "math.h" +#include "stdlib.h" #include "pair_eam_opt.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neigh_list.h" +#include "memory.h" using namespace LAMMPS_NS; +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + /* ---------------------------------------------------------------------- */ PairEAMOpt::PairEAMOpt(LAMMPS *lmp) : PairEAM(lmp) {} @@ -47,3 +57,295 @@ void PairEAMOpt::compute(int eflag, int vflag) else return eval<0,0,0>(); } } + +/* ---------------------------------------------------------------------- */ + +template < int EVFLAG, int EFLAG, int NEWTON_PAIR > +void PairEAMOpt::eval() +{ + typedef struct { double x,y,z; } vec3_t; + + typedef struct { + double rhor0i,rhor1i,rhor2i,rhor3i; + double rhor0j,rhor1j,rhor2j,rhor3j; + } fast_alpha_t; + + typedef struct { + double frho0,frho1,frho2,frho3,frho4,frho5,frho6; + double _pad[1]; + } fast_beta_t; + + typedef struct { + double rhor4i,rhor5i,rhor6i; + double rhor4j,rhor5j,rhor6j; + double z2r0,z2r1,z2r2,z2r3,z2r4,z2r5,z2r6; + double _pad[3]; + } fast_gamma_t; + + int i,j,ii,jj,inum,jnum,itype,jtype; + double evdwl = 0.0; + double* __restrict__ coeff; + + // grow energy array if necessary + + if (atom->nmax > nmax) { + memory->sfree(rho); + memory->sfree(fp); + nmax = atom->nmax; + rho = (double *) memory->smalloc(nmax*sizeof(double),"pair:rho"); + fp = (double *) memory->smalloc(nmax*sizeof(double),"pair:fp"); + } + + double** __restrict__ x = atom->x; + double** __restrict__ f = atom->f; + int* __restrict__ type = atom->type; + int nlocal = atom->nlocal; + + vec3_t* __restrict__ xx = (vec3_t*)x[0]; + vec3_t* __restrict__ ff = (vec3_t*)f[0]; + + double tmp_cutforcesq = cutforcesq; + double tmp_rdr = rdr; + int nr2 = nr-2; + int nr1 = nr-1; + + inum = list->inum; + int* __restrict__ ilist = list->ilist; + int** __restrict__ firstneigh = list->firstneigh; + int* __restrict__ numneigh = list->numneigh; + + int ntypes = atom->ntypes; + int ntypes2 = ntypes*ntypes; + + fast_alpha_t* __restrict__ fast_alpha = + (fast_alpha_t*) malloc(ntypes2*(nr+1)*sizeof(fast_alpha_t)); + for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) { + fast_alpha_t* __restrict__ tab = &fast_alpha[i*ntypes*nr+j*nr]; + if (type2rhor[i+1][j+1] >= 0) { + for(int m = 1; m <= nr; m++) { + tab[m].rhor0i = rhor_spline[type2rhor[i+1][j+1]][m][6]; + tab[m].rhor1i = rhor_spline[type2rhor[i+1][j+1]][m][5]; + tab[m].rhor2i = rhor_spline[type2rhor[i+1][j+1]][m][4]; + tab[m].rhor3i = rhor_spline[type2rhor[i+1][j+1]][m][3]; + } + } + if (type2rhor[j+1][i+1] >= 0) { + for(int m = 1; m <= nr; m++) { + tab[m].rhor0j = rhor_spline[type2rhor[j+1][i+1]][m][6]; + tab[m].rhor1j = rhor_spline[type2rhor[j+1][i+1]][m][5]; + tab[m].rhor2j = rhor_spline[type2rhor[j+1][i+1]][m][4]; + tab[m].rhor3j = rhor_spline[type2rhor[j+1][i+1]][m][3]; + } + } + } + fast_alpha_t* __restrict__ tabeight = fast_alpha; + + fast_gamma_t* __restrict__ fast_gamma = + (fast_gamma_t*) malloc(ntypes2*(nr+1)*sizeof(fast_gamma_t)); + for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) { + fast_gamma_t* __restrict__ tab = &fast_gamma[i*ntypes*nr+j*nr]; + if (type2rhor[i+1][j+1] >= 0) { + for(int m = 1; m <= nr; m++) { + tab[m].rhor4i = rhor_spline[type2rhor[i+1][j+1]][m][2]; + tab[m].rhor5i = rhor_spline[type2rhor[i+1][j+1]][m][1]; + tab[m].rhor6i = rhor_spline[type2rhor[i+1][j+1]][m][0]; + } + } + if (type2rhor[j+1][i+1] >= 0) { + for(int m = 1; m <= nr; m++) { + tab[m].rhor4j = rhor_spline[type2rhor[j+1][i+1]][m][2]; + tab[m].rhor5j = rhor_spline[type2rhor[j+1][i+1]][m][1]; + tab[m].rhor6j = rhor_spline[type2rhor[j+1][i+1]][m][0]; + tab[m].z2r6 = z2r_spline[type2z2r[i+1][j+1]][m][0]; + } + } + if (type2z2r[i+1][j+1] >= 0) { + for(int m = 1; m <= nr; m++) { + tab[m].z2r0 = z2r_spline[type2z2r[i+1][j+1]][m][6]; + tab[m].z2r1 = z2r_spline[type2z2r[i+1][j+1]][m][5]; + tab[m].z2r2 = z2r_spline[type2z2r[i+1][j+1]][m][4]; + tab[m].z2r3 = z2r_spline[type2z2r[i+1][j+1]][m][3]; + tab[m].z2r4 = z2r_spline[type2z2r[i+1][j+1]][m][2]; + tab[m].z2r5 = z2r_spline[type2z2r[i+1][j+1]][m][1]; + tab[m].z2r6 = z2r_spline[type2z2r[i+1][j+1]][m][0]; + } + } + } + fast_gamma_t* __restrict__ tabss = fast_gamma; + + // zero out density + + if (NEWTON_PAIR) { + int m = nlocal + atom->nghost; + for (i = 0; i < m; i++) rho[i] = 0.0; + } else for (i = 0; i < nlocal; i++) rho[i] = 0.0; + + // rho = density at each atom + // loop over neighbors of my atoms + + // loop over neighbors of my atoms + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + double xtmp = xx[i].x; + double ytmp = xx[i].y; + double ztmp = xx[i].z; + itype = type[i] - 1; + int* __restrict__ jlist = firstneigh[i]; + jnum = numneigh[i]; + + double tmprho = rho[i]; + fast_alpha_t* __restrict__ tabeighti = &tabeight[itype*ntypes*nr]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + double delx = xtmp - xx[j].x; + double dely = ytmp - xx[j].y; + double delz = ztmp - xx[j].z; + double rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < tmp_cutforcesq) { + jtype = type[j] - 1; + + double p = sqrt(rsq)*tmp_rdr; + if ( (int)p <= nr2 ) { + int m = (int)p + 1; + p -= (double)((int)p); + fast_alpha_t& a = tabeighti[jtype*nr+m]; + tmprho += ((a.rhor3j*p+a.rhor2j)*p+a.rhor1j)*p+a.rhor0j; + if (NEWTON_PAIR || j < nlocal) { + rho[j] += ((a.rhor3i*p+a.rhor2i)*p+a.rhor1i)*p+a.rhor0i; + } + } else { + fast_alpha_t& a = tabeighti[jtype*nr+nr1]; + tmprho += a.rhor3j+a.rhor2j+a.rhor1j+a.rhor0j; + if (NEWTON_PAIR || j < nlocal) { + rho[j] += a.rhor3i+a.rhor2i+a.rhor1i+a.rhor0i; + } + } + } + } + rho[i] = tmprho; + } + + // communicate and sum densities + + if (NEWTON_PAIR) comm->reverse_comm_pair(this); + + // fp = derivative of embedding energy at each atom + // phi = embedding energy at each atom + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + double p = rho[i]*rdrho; + int m = MIN((int)p,nrho-2); + p -= (double)m; + ++m; + coeff = frho_spline[type2frho[type[i]]][m]; + fp[i] = (coeff[0]*p + coeff[1])*p + coeff[2]; + if (EFLAG) { + double phi = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + if (eflag_global) eng_vdwl += phi; + if (eflag_atom) eatom[i] += phi; + } + } + + // communicate derivative of embedding function + + comm->forward_comm_pair(this); + + // compute forces on each atom + // loop over neighbors of my atoms + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + double xtmp = xx[i].x; + double ytmp = xx[i].y; + double ztmp = xx[i].z; + int itype1 = type[i] - 1; + int* __restrict__ jlist = firstneigh[i]; + jnum = numneigh[i]; + + double tmpfx = 0.0; + double tmpfy = 0.0; + double tmpfz = 0.0; + + fast_gamma_t* __restrict__ tabssi = &tabss[itype1*ntypes*nr]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + double delx = xtmp - xx[j].x; + double dely = ytmp - xx[j].y; + double delz = ztmp - xx[j].z; + double rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < tmp_cutforcesq) { + jtype = type[j] - 1; + double r = sqrt(rsq); + double rhoip,rhojp,z2,z2p; + double p = r*tmp_rdr; + if ( (int)p <= nr2 ) { + int m = (int) p + 1; + p -= (double)((int) p); + + fast_gamma_t& a = tabssi[jtype*nr+m]; + rhoip = (a.rhor6i*p + a.rhor5i)*p + a.rhor4i; + rhojp = (a.rhor6j*p + a.rhor5j)*p + a.rhor4j; + z2 = ((a.z2r3*p + a.z2r2)*p + a.z2r1)*p + a.z2r0; + z2p = (a.z2r6*p + a.z2r5)*p + a.z2r4; + + } else { + + fast_gamma_t& a = tabssi[jtype*nr+nr1]; + rhoip = a.rhor6i + a.rhor5i + a.rhor4i; + rhojp = a.rhor6j + a.rhor5j + a.rhor4j; + z2 = a.z2r3 + a.z2r2 + a.z2r1 + a.z2r0; + z2p = a.z2r6 + a.z2r5 + a.z2r4; + } + + // rhoip = derivative of (density at atom j due to atom i) + // rhojp = derivative of (density at atom i due to atom j) + // phi = pair potential energy + // phip = phi' + // z2 = phi * r + // z2p = (phi * r)' = (phi' r) + phi + // psip needs both fp[i] and fp[j] terms since r_ij appears in two + // terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji) + // hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip + + double recip = 1.0/r; + double phi = z2*recip; + double phip = z2p*recip - phi*recip; + double psip = fp[i]*rhojp + fp[j]*rhoip + phip; + double fpair = -psip*recip; + + tmpfx += delx*fpair; + tmpfy += dely*fpair; + tmpfz += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + ff[j].x -= delx*fpair; + ff[j].y -= dely*fpair; + ff[j].z -= delz*fpair; + } + + if (EFLAG) evdwl = phi; + + if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz); + } + } + + ff[i].x += tmpfx; + ff[i].y += tmpfy; + ff[i].z += tmpfz; + } + + free(fast_alpha); fast_alpha = 0; + free(fast_gamma); fast_gamma = 0; + + if (vflag_fdotr) virial_fdotr_compute(); +} diff --git a/src/OPT/pair_eam_opt.h b/src/OPT/pair_eam_opt.h index c2d302e882..afc7764da9 100644 --- a/src/OPT/pair_eam_opt.h +++ b/src/OPT/pair_eam_opt.h @@ -11,14 +11,6 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -/* ---------------------------------------------------------------------- - Contributing authors: - James Fischer, High Performance Technologies, Inc. - Charles Cornwell, High Performance Technologies, Inc. - David Richie, Stone Ridge Technology - Vincent Natol, Stone Ridge Technology -------------------------------------------------------------------------- */ - #ifdef PAIR_CLASS PairStyle(eam/opt,PairEAMOpt) @@ -28,17 +20,7 @@ PairStyle(eam/opt,PairEAMOpt) #ifndef LMP_PAIR_EAM_OPT_H #define LMP_PAIR_EAM_OPT_H -#include "math.h" -#include "stdlib.h" #include "pair_eam.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "neigh_list.h" -#include "memory.h" - -#define MIN(a,b) ((a) < (b) ? (a) : (b)) -#define MAX(a,b) ((a) > (b) ? (a) : (b)) namespace LAMMPS_NS { @@ -54,296 +36,6 @@ class PairEAMOpt : virtual public PairEAM { template < int EVFLAG, int EFLAG, int NEWTON_PAIR > void eval(); }; -template < int EVFLAG, int EFLAG, int NEWTON_PAIR > -void PairEAMOpt::eval() -{ - typedef struct { double x,y,z; } vec3_t; - - typedef struct { - double rhor0i,rhor1i,rhor2i,rhor3i; - double rhor0j,rhor1j,rhor2j,rhor3j; - } fast_alpha_t; - - typedef struct { - double frho0,frho1,frho2,frho3,frho4,frho5,frho6; - double _pad[1]; - } fast_beta_t; - - typedef struct { - double rhor4i,rhor5i,rhor6i; - double rhor4j,rhor5j,rhor6j; - double z2r0,z2r1,z2r2,z2r3,z2r4,z2r5,z2r6; - double _pad[3]; - } fast_gamma_t; - - int i,j,ii,jj,inum,jnum,itype,jtype; - double evdwl = 0.0; - double* __restrict__ coeff; - - // grow energy array if necessary - - if (atom->nmax > nmax) { - memory->sfree(rho); - memory->sfree(fp); - nmax = atom->nmax; - rho = (double *) memory->smalloc(nmax*sizeof(double),"pair:rho"); - fp = (double *) memory->smalloc(nmax*sizeof(double),"pair:fp"); - } - - double** __restrict__ x = atom->x; - double** __restrict__ f = atom->f; - int* __restrict__ type = atom->type; - int nlocal = atom->nlocal; - - vec3_t* __restrict__ xx = (vec3_t*)x[0]; - vec3_t* __restrict__ ff = (vec3_t*)f[0]; - - double tmp_cutforcesq = cutforcesq; - double tmp_rdr = rdr; - int nr2 = nr-2; - int nr1 = nr-1; - - inum = list->inum; - int* __restrict__ ilist = list->ilist; - int** __restrict__ firstneigh = list->firstneigh; - int* __restrict__ numneigh = list->numneigh; - - int ntypes = atom->ntypes; - int ntypes2 = ntypes*ntypes; - - fast_alpha_t* __restrict__ fast_alpha = - (fast_alpha_t*) malloc(ntypes2*(nr+1)*sizeof(fast_alpha_t)); - for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) { - fast_alpha_t* __restrict__ tab = &fast_alpha[i*ntypes*nr+j*nr]; - if (type2rhor[i+1][j+1] >= 0) { - for(int m = 1; m <= nr; m++) { - tab[m].rhor0i = rhor_spline[type2rhor[i+1][j+1]][m][6]; - tab[m].rhor1i = rhor_spline[type2rhor[i+1][j+1]][m][5]; - tab[m].rhor2i = rhor_spline[type2rhor[i+1][j+1]][m][4]; - tab[m].rhor3i = rhor_spline[type2rhor[i+1][j+1]][m][3]; - } - } - if (type2rhor[j+1][i+1] >= 0) { - for(int m = 1; m <= nr; m++) { - tab[m].rhor0j = rhor_spline[type2rhor[j+1][i+1]][m][6]; - tab[m].rhor1j = rhor_spline[type2rhor[j+1][i+1]][m][5]; - tab[m].rhor2j = rhor_spline[type2rhor[j+1][i+1]][m][4]; - tab[m].rhor3j = rhor_spline[type2rhor[j+1][i+1]][m][3]; - } - } - } - fast_alpha_t* __restrict__ tabeight = fast_alpha; - - fast_gamma_t* __restrict__ fast_gamma = - (fast_gamma_t*) malloc(ntypes2*(nr+1)*sizeof(fast_gamma_t)); - for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) { - fast_gamma_t* __restrict__ tab = &fast_gamma[i*ntypes*nr+j*nr]; - if (type2rhor[i+1][j+1] >= 0) { - for(int m = 1; m <= nr; m++) { - tab[m].rhor4i = rhor_spline[type2rhor[i+1][j+1]][m][2]; - tab[m].rhor5i = rhor_spline[type2rhor[i+1][j+1]][m][1]; - tab[m].rhor6i = rhor_spline[type2rhor[i+1][j+1]][m][0]; - } - } - if (type2rhor[j+1][i+1] >= 0) { - for(int m = 1; m <= nr; m++) { - tab[m].rhor4j = rhor_spline[type2rhor[j+1][i+1]][m][2]; - tab[m].rhor5j = rhor_spline[type2rhor[j+1][i+1]][m][1]; - tab[m].rhor6j = rhor_spline[type2rhor[j+1][i+1]][m][0]; - tab[m].z2r6 = z2r_spline[type2z2r[i+1][j+1]][m][0]; - } - } - if (type2z2r[i+1][j+1] >= 0) { - for(int m = 1; m <= nr; m++) { - tab[m].z2r0 = z2r_spline[type2z2r[i+1][j+1]][m][6]; - tab[m].z2r1 = z2r_spline[type2z2r[i+1][j+1]][m][5]; - tab[m].z2r2 = z2r_spline[type2z2r[i+1][j+1]][m][4]; - tab[m].z2r3 = z2r_spline[type2z2r[i+1][j+1]][m][3]; - tab[m].z2r4 = z2r_spline[type2z2r[i+1][j+1]][m][2]; - tab[m].z2r5 = z2r_spline[type2z2r[i+1][j+1]][m][1]; - tab[m].z2r6 = z2r_spline[type2z2r[i+1][j+1]][m][0]; - } - } - } - fast_gamma_t* __restrict__ tabss = fast_gamma; - - // zero out density - - if (NEWTON_PAIR) { - int m = nlocal + atom->nghost; - for (i = 0; i < m; i++) rho[i] = 0.0; - } else for (i = 0; i < nlocal; i++) rho[i] = 0.0; - - // rho = density at each atom - // loop over neighbors of my atoms - - // loop over neighbors of my atoms - - for (ii = 0; ii < inum; ii++) { - i = ilist[ii]; - double xtmp = xx[i].x; - double ytmp = xx[i].y; - double ztmp = xx[i].z; - itype = type[i] - 1; - int* __restrict__ jlist = firstneigh[i]; - jnum = numneigh[i]; - - double tmprho = rho[i]; - fast_alpha_t* __restrict__ tabeighti = &tabeight[itype*ntypes*nr]; - - for (jj = 0; jj < jnum; jj++) { - j = jlist[jj]; - j &= NEIGHMASK; - - double delx = xtmp - xx[j].x; - double dely = ytmp - xx[j].y; - double delz = ztmp - xx[j].z; - double rsq = delx*delx + dely*dely + delz*delz; - - if (rsq < tmp_cutforcesq) { - jtype = type[j] - 1; - - double p = sqrt(rsq)*tmp_rdr; - if ( (int)p <= nr2 ) { - int m = (int)p + 1; - p -= (double)((int)p); - fast_alpha_t& a = tabeighti[jtype*nr+m]; - tmprho += ((a.rhor3j*p+a.rhor2j)*p+a.rhor1j)*p+a.rhor0j; - if (NEWTON_PAIR || j < nlocal) { - rho[j] += ((a.rhor3i*p+a.rhor2i)*p+a.rhor1i)*p+a.rhor0i; - } - } else { - fast_alpha_t& a = tabeighti[jtype*nr+nr1]; - tmprho += a.rhor3j+a.rhor2j+a.rhor1j+a.rhor0j; - if (NEWTON_PAIR || j < nlocal) { - rho[j] += a.rhor3i+a.rhor2i+a.rhor1i+a.rhor0i; - } - } - } - } - rho[i] = tmprho; - } - - // communicate and sum densities - - if (NEWTON_PAIR) comm->reverse_comm_pair(this); - - // fp = derivative of embedding energy at each atom - // phi = embedding energy at each atom - - for (ii = 0; ii < inum; ii++) { - i = ilist[ii]; - double p = rho[i]*rdrho; - int m = MIN((int)p,nrho-2); - p -= (double)m; - ++m; - coeff = frho_spline[type2frho[type[i]]][m]; - fp[i] = (coeff[0]*p + coeff[1])*p + coeff[2]; - if (EFLAG) { - double phi = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; - if (eflag_global) eng_vdwl += phi; - if (eflag_atom) eatom[i] += phi; - } - } - - // communicate derivative of embedding function - - comm->forward_comm_pair(this); - - // compute forces on each atom - // loop over neighbors of my atoms - - for (ii = 0; ii < inum; ii++) { - i = ilist[ii]; - double xtmp = xx[i].x; - double ytmp = xx[i].y; - double ztmp = xx[i].z; - int itype1 = type[i] - 1; - int* __restrict__ jlist = firstneigh[i]; - jnum = numneigh[i]; - - double tmpfx = 0.0; - double tmpfy = 0.0; - double tmpfz = 0.0; - - fast_gamma_t* __restrict__ tabssi = &tabss[itype1*ntypes*nr]; - - for (jj = 0; jj < jnum; jj++) { - j = jlist[jj]; - j &= NEIGHMASK; - - double delx = xtmp - xx[j].x; - double dely = ytmp - xx[j].y; - double delz = ztmp - xx[j].z; - double rsq = delx*delx + dely*dely + delz*delz; - - if (rsq < tmp_cutforcesq) { - jtype = type[j] - 1; - double r = sqrt(rsq); - double rhoip,rhojp,z2,z2p; - double p = r*tmp_rdr; - if ( (int)p <= nr2 ) { - int m = (int) p + 1; - p -= (double)((int) p); - - fast_gamma_t& a = tabssi[jtype*nr+m]; - rhoip = (a.rhor6i*p + a.rhor5i)*p + a.rhor4i; - rhojp = (a.rhor6j*p + a.rhor5j)*p + a.rhor4j; - z2 = ((a.z2r3*p + a.z2r2)*p + a.z2r1)*p + a.z2r0; - z2p = (a.z2r6*p + a.z2r5)*p + a.z2r4; - - } else { - - fast_gamma_t& a = tabssi[jtype*nr+nr1]; - rhoip = a.rhor6i + a.rhor5i + a.rhor4i; - rhojp = a.rhor6j + a.rhor5j + a.rhor4j; - z2 = a.z2r3 + a.z2r2 + a.z2r1 + a.z2r0; - z2p = a.z2r6 + a.z2r5 + a.z2r4; - } - - // rhoip = derivative of (density at atom j due to atom i) - // rhojp = derivative of (density at atom i due to atom j) - // phi = pair potential energy - // phip = phi' - // z2 = phi * r - // z2p = (phi * r)' = (phi' r) + phi - // psip needs both fp[i] and fp[j] terms since r_ij appears in two - // terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji) - // hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip - - double recip = 1.0/r; - double phi = z2*recip; - double phip = z2p*recip - phi*recip; - double psip = fp[i]*rhojp + fp[j]*rhoip + phip; - double fpair = -psip*recip; - - tmpfx += delx*fpair; - tmpfy += dely*fpair; - tmpfz += delz*fpair; - if (NEWTON_PAIR || j < nlocal) { - ff[j].x -= delx*fpair; - ff[j].y -= dely*fpair; - ff[j].z -= delz*fpair; - } - - if (EFLAG) evdwl = phi; - - if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz); - } - } - - ff[i].x += tmpfx; - ff[i].y += tmpfy; - ff[i].z += tmpfz; - } - - free(fast_alpha); fast_alpha = 0; - free(fast_gamma); fast_gamma = 0; - - if (vflag_fdotr) virial_fdotr_compute(); -} - } #endif diff --git a/src/OPT/pair_lj_charmm_coul_long_opt.cpp b/src/OPT/pair_lj_charmm_coul_long_opt.cpp index 173c201d12..6049eb2a31 100644 --- a/src/OPT/pair_lj_charmm_coul_long_opt.cpp +++ b/src/OPT/pair_lj_charmm_coul_long_opt.cpp @@ -18,10 +18,26 @@ Vincent Natoli, Stone Ridge Technology ------------------------------------------------------------------------- */ +#include "math.h" +#include "stdlib.h" #include "pair_lj_charmm_coul_long_opt.h" +#include "atom.h" +#include "force.h" +#include "neigh_list.h" using namespace LAMMPS_NS; +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define EWALD_A1 0.254829592 +#define EWALD_A2 -0.284496736 +#define EWALD_A3 1.421413741 +#define EWALD_A4 -1.453152027 +#define EWALD_A5 1.061405429 + /* ---------------------------------------------------------------------- */ PairLJCharmmCoulLongOpt::PairLJCharmmCoulLongOpt(LAMMPS *lmp) : @@ -47,3 +63,281 @@ void PairLJCharmmCoulLongOpt::compute(int eflag, int vflag) else return eval<0,0,0>(); } } + +/* ---------------------------------------------------------------------- */ + +template < int EVFLAG, int EFLAG, int NEWTON_PAIR > +void PairLJCharmmCoulLongOpt::eval() +{ + typedef struct { double x,y,z; } vec3_t; + + typedef struct { + double cutsq,lj1,lj2,lj3,lj4,offset; + double _pad[2]; + } fast_alpha_t; + + int i,j,ii,jj,inum,jnum,itype,jtype,itable,sbindex; + double fraction,table; + double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + double grij,expm2,prefactor,t,erfc; + double philj,switch1,switch2; + + double rsq; + + double evdwl = 0.0; + double ecoul = 0.0; + + double** __restrict__ x = atom->x; + double** __restrict__ f = atom->f; + double* __restrict__ q = atom->q; + int* __restrict__ type = atom->type; + int nlocal = atom->nlocal; + double* __restrict__ special_coul = force->special_coul; + double* __restrict__ special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; + + inum = list->inum; + int* __restrict__ ilist = list->ilist; + int** __restrict__ firstneigh = list->firstneigh; + int* __restrict__ numneigh = list->numneigh; + + vec3_t* __restrict__ xx = (vec3_t*)x[0]; + vec3_t* __restrict__ ff = (vec3_t*)f[0]; + + int ntypes = atom->ntypes; + int ntypes2 = ntypes*ntypes; + + double tmp_coef1 = 1.0/denom_lj; + double tmp_coef2 = cut_ljsq - 3.0*cut_lj_innersq; + + fast_alpha_t* __restrict__ fast_alpha = + (fast_alpha_t*)malloc(ntypes2*sizeof(fast_alpha_t)); + for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) { + fast_alpha_t& a = fast_alpha[i*ntypes+j]; + a.cutsq = cutsq[i+1][j+1]; + a.lj1 = lj1[i+1][j+1]; + a.lj2 = lj2[i+1][j+1]; + a.lj3 = lj3[i+1][j+1]; + a.lj4 = lj4[i+1][j+1]; + } + fast_alpha_t* __restrict__ tabsix = fast_alpha; + + // loop over neighbors of my atoms + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + double qtmp = q[i]; + double xtmp = xx[i].x; + double ytmp = xx[i].y; + double ztmp = xx[i].z; + itype = type[i] - 1; + int* __restrict__ jlist = firstneigh[i]; + jnum = numneigh[i]; + + double tmpfx = 0.0; + double tmpfy = 0.0; + double tmpfz = 0.0; + + fast_alpha_t* __restrict__ tabsixi = (fast_alpha_t*) &tabsix[itype*ntypes]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + sbindex = sbmask(j); + + if (sbindex == 0) { + double delx = xtmp - xx[j].x; + double dely = ytmp - xx[j].y; + double delz = ztmp - xx[j].z; + rsq = delx*delx + dely*dely + delz*delz; + double tmp_coef3 = qtmp*q[j]; + + if (rsq < cut_bothsq) { + r2inv = 1.0/rsq; + + forcecoul = 0.0; + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) { + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * + (EWALD_A1+t*(EWALD_A2+t*(EWALD_A3+t*(EWALD_A4+t*EWALD_A5)))) * + expm2; + prefactor = qqrd2e * tmp_coef3/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; + itable >>= ncoulshiftbits; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; + table = ftable[itable] + fraction*dftable[itable]; + forcecoul = tmp_coef3 * table; + } + } + + forcelj = 0.0; + if (rsq < cut_ljsq) { + r6inv = r2inv*r2inv*r2inv; + jtype = type[j] - 1; + fast_alpha_t& a = tabsixi[jtype]; + forcelj = r6inv * (a.lj1*r6inv - a.lj2); + if (rsq > cut_lj_innersq) { + switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + (tmp_coef2 + 2.0*rsq) * tmp_coef1; + switch2 = 12.0*rsq * (cut_ljsq-rsq) * + (rsq-cut_lj_innersq) * tmp_coef1; + philj = r6inv * (a.lj3*r6inv - a.lj4); + forcelj = forcelj*switch1 + philj*switch2; + } + } + + double fpair = (forcecoul + forcelj) * r2inv; + + tmpfx += delx*fpair; + tmpfy += dely*fpair; + tmpfz += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + ff[j].x -= delx*fpair; + ff[j].y -= dely*fpair; + ff[j].z -= delz*fpair; + } + + if (EFLAG) { + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) + ecoul = prefactor*erfc; + else { + table = etable[itable] + fraction*detable[itable]; + ecoul = tmp_coef3 * table; + } + } else ecoul = 0.0; + + if (rsq < cut_ljsq) { + fast_alpha_t& a = tabsixi[jtype]; + evdwl = r6inv*(a.lj3*r6inv-a.lj4); + if (rsq > cut_lj_innersq) { + switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + (tmp_coef2 + 2.0*rsq) * tmp_coef1; + evdwl *= switch1; + } + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz); + } + + } else { + factor_lj = special_lj[sbindex]; + factor_coul = special_coul[sbindex]; + j &= NEIGHMASK; + + double delx = xtmp - xx[j].x; + double dely = ytmp - xx[j].y; + double delz = ztmp - xx[j].z; + rsq = delx*delx + dely*dely + delz*delz; + double tmp_coef3 = qtmp*q[j]; + + if (rsq < cut_bothsq) { + r2inv = 1.0/rsq; + + forcecoul = 0.0; + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) { + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * + (EWALD_A1+t*(EWALD_A2+t*(EWALD_A3+t*(EWALD_A4+t*EWALD_A5)))) * + expm2; + prefactor = qqrd2e * tmp_coef3/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) { + forcecoul -= (1.0-factor_coul)*prefactor; + } + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; + itable >>= ncoulshiftbits; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; + table = ftable[itable] + fraction*dftable[itable]; + forcecoul = tmp_coef3 * table; + if (factor_coul < 1.0) { + table = ctable[itable] + fraction*dctable[itable]; + prefactor = tmp_coef3 * table; + forcecoul -= (1.0-factor_coul)*prefactor; + } + } + } + + forcelj = 0.0; + if (rsq < cut_ljsq) { + r6inv = r2inv*r2inv*r2inv; + jtype = type[j] - 1; + fast_alpha_t& a = tabsixi[jtype]; + forcelj = r6inv * (a.lj1*r6inv - a.lj2); + if (rsq > cut_lj_innersq) { + switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + (tmp_coef2 + 2.0*rsq) * tmp_coef1; + switch2 = 12.0*rsq * (cut_ljsq-rsq) * + (rsq-cut_lj_innersq) * tmp_coef1; + fast_alpha_t& a = tabsixi[jtype]; + philj = r6inv * (a.lj3*r6inv - a.lj4); + forcelj = forcelj*switch1 + philj*switch2; + } + } + + double fpair = (forcecoul + factor_lj*forcelj) * r2inv; + + tmpfx += delx*fpair; + tmpfy += dely*fpair; + tmpfz += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + ff[j].x -= delx*fpair; + ff[j].y -= dely*fpair; + ff[j].z -= delz*fpair; + } + + if (EFLAG) { + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) + ecoul = prefactor*erfc; + else { + table = etable[itable] + fraction*detable[itable]; + ecoul = tmp_coef3 * table; + } + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + + if (rsq < cut_ljsq) { + fast_alpha_t& a = tabsixi[jtype]; + evdwl = r6inv*(a.lj3*r6inv-a.lj4); + if (rsq > cut_lj_innersq) { + switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + (tmp_coef2 + 2.0*rsq) * tmp_coef1; + evdwl *= switch1; + } + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz); + } + } + } + + ff[i].x += tmpfx; + ff[i].y += tmpfy; + ff[i].z += tmpfz; + } + + free(fast_alpha); fast_alpha = 0; + + if (vflag_fdotr) virial_fdotr_compute(); +} + diff --git a/src/OPT/pair_lj_charmm_coul_long_opt.h b/src/OPT/pair_lj_charmm_coul_long_opt.h index c6616b0c0b..e2cc1af8ed 100644 --- a/src/OPT/pair_lj_charmm_coul_long_opt.h +++ b/src/OPT/pair_lj_charmm_coul_long_opt.h @@ -11,13 +11,6 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -/* ---------------------------------------------------------------------- - Contributing authors: - James Fischer, High Performance Technologies, Inc. - David Richie, Stone Ridge Technology - Vincent Natol, Stone Ridge Technology -------------------------------------------------------------------------- */ - #ifdef PAIR_CLASS PairStyle(lj/charmm/coul/long/opt,PairLJCharmmCoulLongOpt) @@ -27,26 +20,10 @@ PairStyle(lj/charmm/coul/long/opt,PairLJCharmmCoulLongOpt) #ifndef LMP_PAIR_LJ_CHARMM_COUL_LONG_OPT_H #define LMP_PAIR_LJ_CHARMM_COUL_LONG_OPT_H -#include "math.h" -#include "stdlib.h" #include "pair_lj_charmm_coul_long.h" -#include "atom.h" -#include "force.h" -#include "neigh_list.h" namespace LAMMPS_NS { -#define EWALD_F 1.12837917 -#define EWALD_P 0.3275911 -#define EWALD_A1 0.254829592 -#define EWALD_A2 -0.284496736 -#define EWALD_A3 1.421413741 -#define EWALD_A4 -1.453152027 -#define EWALD_A5 1.061405429 - -#define MIN(a,b) ((a) < (b) ? (a) : (b)) -#define MAX(a,b) ((a) > (b) ? (a) : (b)) - class PairLJCharmmCoulLongOpt : public PairLJCharmmCoulLong { public: PairLJCharmmCoulLongOpt(class LAMMPS *); @@ -55,292 +32,6 @@ class PairLJCharmmCoulLongOpt : public PairLJCharmmCoulLong { private: template < int EVFLAG, int EFLAG, int NEWTON_PAIR > void eval(); }; - -template < int EVFLAG, int EFLAG, int NEWTON_PAIR > -void PairLJCharmmCoulLongOpt::eval() -{ - typedef struct { double x,y,z; } vec3_t; - - typedef struct { - double cutsq,lj1,lj2,lj3,lj4,offset; - double _pad[2]; - } fast_alpha_t; - - int i,j,ii,jj,inum,jnum,itype,jtype,itable,sbindex; - double fraction,table; - double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; - double grij,expm2,prefactor,t,erfc; - double philj,switch1,switch2; - - double rsq; - - double evdwl = 0.0; - double ecoul = 0.0; - - double** __restrict__ x = atom->x; - double** __restrict__ f = atom->f; - double* __restrict__ q = atom->q; - int* __restrict__ type = atom->type; - int nlocal = atom->nlocal; - double* __restrict__ special_coul = force->special_coul; - double* __restrict__ special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; - - inum = list->inum; - int* __restrict__ ilist = list->ilist; - int** __restrict__ firstneigh = list->firstneigh; - int* __restrict__ numneigh = list->numneigh; - - vec3_t* __restrict__ xx = (vec3_t*)x[0]; - vec3_t* __restrict__ ff = (vec3_t*)f[0]; - - int ntypes = atom->ntypes; - int ntypes2 = ntypes*ntypes; - - double tmp_coef1 = 1.0/denom_lj; - double tmp_coef2 = cut_ljsq - 3.0*cut_lj_innersq; - - fast_alpha_t* __restrict__ fast_alpha = - (fast_alpha_t*)malloc(ntypes2*sizeof(fast_alpha_t)); - for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) { - fast_alpha_t& a = fast_alpha[i*ntypes+j]; - a.cutsq = cutsq[i+1][j+1]; - a.lj1 = lj1[i+1][j+1]; - a.lj2 = lj2[i+1][j+1]; - a.lj3 = lj3[i+1][j+1]; - a.lj4 = lj4[i+1][j+1]; - } - fast_alpha_t* __restrict__ tabsix = fast_alpha; - - // loop over neighbors of my atoms - - for (ii = 0; ii < inum; ii++) { - i = ilist[ii]; - double qtmp = q[i]; - double xtmp = xx[i].x; - double ytmp = xx[i].y; - double ztmp = xx[i].z; - itype = type[i] - 1; - int* __restrict__ jlist = firstneigh[i]; - jnum = numneigh[i]; - - double tmpfx = 0.0; - double tmpfy = 0.0; - double tmpfz = 0.0; - - fast_alpha_t* __restrict__ tabsixi = (fast_alpha_t*) &tabsix[itype*ntypes]; - - for (jj = 0; jj < jnum; jj++) { - j = jlist[jj]; - sbindex = sbmask(j); - - if (sbindex == 0) { - double delx = xtmp - xx[j].x; - double dely = ytmp - xx[j].y; - double delz = ztmp - xx[j].z; - rsq = delx*delx + dely*dely + delz*delz; - double tmp_coef3 = qtmp*q[j]; - - if (rsq < cut_bothsq) { - r2inv = 1.0/rsq; - - forcecoul = 0.0; - if (rsq < cut_coulsq) { - if (!ncoultablebits || rsq <= tabinnersq) { - r = sqrt(rsq); - grij = g_ewald * r; - expm2 = exp(-grij*grij); - t = 1.0 / (1.0 + EWALD_P*grij); - erfc = t * - (EWALD_A1+t*(EWALD_A2+t*(EWALD_A3+t*(EWALD_A4+t*EWALD_A5)))) * - expm2; - prefactor = qqrd2e * tmp_coef3/r; - forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); - } else { - union_int_float_t rsq_lookup; - rsq_lookup.f = rsq; - itable = rsq_lookup.i & ncoulmask; - itable >>= ncoulshiftbits; - fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; - table = ftable[itable] + fraction*dftable[itable]; - forcecoul = tmp_coef3 * table; - } - } - - forcelj = 0.0; - if (rsq < cut_ljsq) { - r6inv = r2inv*r2inv*r2inv; - jtype = type[j] - 1; - fast_alpha_t& a = tabsixi[jtype]; - forcelj = r6inv * (a.lj1*r6inv - a.lj2); - if (rsq > cut_lj_innersq) { - switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * - (tmp_coef2 + 2.0*rsq) * tmp_coef1; - switch2 = 12.0*rsq * (cut_ljsq-rsq) * - (rsq-cut_lj_innersq) * tmp_coef1; - philj = r6inv * (a.lj3*r6inv - a.lj4); - forcelj = forcelj*switch1 + philj*switch2; - } - } - - double fpair = (forcecoul + forcelj) * r2inv; - - tmpfx += delx*fpair; - tmpfy += dely*fpair; - tmpfz += delz*fpair; - if (NEWTON_PAIR || j < nlocal) { - ff[j].x -= delx*fpair; - ff[j].y -= dely*fpair; - ff[j].z -= delz*fpair; - } - - if (EFLAG) { - if (rsq < cut_coulsq) { - if (!ncoultablebits || rsq <= tabinnersq) - ecoul = prefactor*erfc; - else { - table = etable[itable] + fraction*detable[itable]; - ecoul = tmp_coef3 * table; - } - } else ecoul = 0.0; - - if (rsq < cut_ljsq) { - fast_alpha_t& a = tabsixi[jtype]; - evdwl = r6inv*(a.lj3*r6inv-a.lj4); - if (rsq > cut_lj_innersq) { - switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * - (tmp_coef2 + 2.0*rsq) * tmp_coef1; - evdwl *= switch1; - } - } else evdwl = 0.0; - } - - if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz); - } - - } else { - factor_lj = special_lj[sbindex]; - factor_coul = special_coul[sbindex]; - j &= NEIGHMASK; - - double delx = xtmp - xx[j].x; - double dely = ytmp - xx[j].y; - double delz = ztmp - xx[j].z; - rsq = delx*delx + dely*dely + delz*delz; - double tmp_coef3 = qtmp*q[j]; - - if (rsq < cut_bothsq) { - r2inv = 1.0/rsq; - - forcecoul = 0.0; - if (rsq < cut_coulsq) { - if (!ncoultablebits || rsq <= tabinnersq) { - r = sqrt(rsq); - grij = g_ewald * r; - expm2 = exp(-grij*grij); - t = 1.0 / (1.0 + EWALD_P*grij); - erfc = t * - (EWALD_A1+t*(EWALD_A2+t*(EWALD_A3+t*(EWALD_A4+t*EWALD_A5)))) * - expm2; - prefactor = qqrd2e * tmp_coef3/r; - forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); - if (factor_coul < 1.0) { - forcecoul -= (1.0-factor_coul)*prefactor; - } - } else { - union_int_float_t rsq_lookup; - rsq_lookup.f = rsq; - itable = rsq_lookup.i & ncoulmask; - itable >>= ncoulshiftbits; - fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; - table = ftable[itable] + fraction*dftable[itable]; - forcecoul = tmp_coef3 * table; - if (factor_coul < 1.0) { - table = ctable[itable] + fraction*dctable[itable]; - prefactor = tmp_coef3 * table; - forcecoul -= (1.0-factor_coul)*prefactor; - } - } - } - - forcelj = 0.0; - if (rsq < cut_ljsq) { - r6inv = r2inv*r2inv*r2inv; - jtype = type[j] - 1; - fast_alpha_t& a = tabsixi[jtype]; - forcelj = r6inv * (a.lj1*r6inv - a.lj2); - if (rsq > cut_lj_innersq) { - switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * - (tmp_coef2 + 2.0*rsq) * tmp_coef1; - switch2 = 12.0*rsq * (cut_ljsq-rsq) * - (rsq-cut_lj_innersq) * tmp_coef1; - fast_alpha_t& a = tabsixi[jtype]; - philj = r6inv * (a.lj3*r6inv - a.lj4); - forcelj = forcelj*switch1 + philj*switch2; - } - } - - double fpair = (forcecoul + factor_lj*forcelj) * r2inv; - - tmpfx += delx*fpair; - tmpfy += dely*fpair; - tmpfz += delz*fpair; - if (NEWTON_PAIR || j < nlocal) { - ff[j].x -= delx*fpair; - ff[j].y -= dely*fpair; - ff[j].z -= delz*fpair; - } - - if (EFLAG) { - if (rsq < cut_coulsq) { - if (!ncoultablebits || rsq <= tabinnersq) - ecoul = prefactor*erfc; - else { - table = etable[itable] + fraction*detable[itable]; - ecoul = tmp_coef3 * table; - } - if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; - } else ecoul = 0.0; - - if (rsq < cut_ljsq) { - fast_alpha_t& a = tabsixi[jtype]; - evdwl = r6inv*(a.lj3*r6inv-a.lj4); - if (rsq > cut_lj_innersq) { - switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * - (tmp_coef2 + 2.0*rsq) * tmp_coef1; - evdwl *= switch1; - } - evdwl *= factor_lj; - } else evdwl = 0.0; - } - - if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz); - } - } - } - - ff[i].x += tmpfx; - ff[i].y += tmpfy; - ff[i].z += tmpfz; - } - - free(fast_alpha); fast_alpha = 0; - - if (vflag_fdotr) virial_fdotr_compute(); -} - -#undef EWALD_F -#undef EWALD_P -#undef EWALD_A1 -#undef EWALD_A2 -#undef EWALD_A3 -#undef EWALD_A4 -#undef EWALD_A5 - -#undef MIN -#undef MAX } diff --git a/src/OPT/pair_lj_cut_opt.cpp b/src/OPT/pair_lj_cut_opt.cpp index d939727a59..8f855da566 100644 --- a/src/OPT/pair_lj_cut_opt.cpp +++ b/src/OPT/pair_lj_cut_opt.cpp @@ -18,7 +18,11 @@ Vincent Natoli, Stone Ridge Technology ------------------------------------------------------------------------- */ +#include "stdlib.h" #include "pair_lj_cut_opt.h" +#include "atom.h" +#include "force.h" +#include "neigh_list.h" using namespace LAMMPS_NS; @@ -46,3 +50,152 @@ void PairLJCutOpt::compute(int eflag, int vflag) else return eval<0,0,0>(); } } + +/* ---------------------------------------------------------------------- */ + +template < int EVFLAG, int EFLAG, int NEWTON_PAIR > +void PairLJCutOpt::eval() +{ + typedef struct { double x,y,z; } vec3_t; + + typedef struct { + double cutsq,lj1,lj2,lj3,lj4,offset; + double _pad[2]; + } fast_alpha_t; + + int i,j,ii,jj,inum,jnum,itype,jtype,sbindex; + double factor_lj; + double evdwl = 0.0; + + double** __restrict__ x = atom->x; + double** __restrict__ f = atom->f; + int* __restrict__ type = atom->type; + int nlocal = atom->nlocal; + double* __restrict__ special_lj = force->special_lj; + + inum = list->inum; + int* __restrict__ ilist = list->ilist; + int** __restrict__ firstneigh = list->firstneigh; + int* __restrict__ numneigh = list->numneigh; + + vec3_t* __restrict__ xx = (vec3_t*)x[0]; + vec3_t* __restrict__ ff = (vec3_t*)f[0]; + + int ntypes = atom->ntypes; + int ntypes2 = ntypes*ntypes; + + fast_alpha_t* __restrict__ fast_alpha = + (fast_alpha_t*) malloc(ntypes2*sizeof(fast_alpha_t)); + for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) { + fast_alpha_t& a = fast_alpha[i*ntypes+j]; + a.cutsq = cutsq[i+1][j+1]; + a.lj1 = lj1[i+1][j+1]; + a.lj2 = lj2[i+1][j+1]; + a.lj3 = lj3[i+1][j+1]; + a.lj4 = lj4[i+1][j+1]; + a.offset = offset[i+1][j+1]; + } + fast_alpha_t* __restrict__ tabsix = fast_alpha; + + // loop over neighbors of my atoms + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + double xtmp = xx[i].x; + double ytmp = xx[i].y; + double ztmp = xx[i].z; + itype = type[i] - 1; + int* __restrict__ jlist = firstneigh[i]; + jnum = numneigh[i]; + + double tmpfx = 0.0; + double tmpfy = 0.0; + double tmpfz = 0.0; + + fast_alpha_t* __restrict__ tabsixi = (fast_alpha_t*)&tabsix[itype*ntypes]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + sbindex = sbmask(j); + + if (sbindex == 0) { + double delx = xtmp - xx[j].x; + double dely = ytmp - xx[j].y; + double delz = ztmp - xx[j].z; + double rsq = delx*delx + dely*dely + delz*delz; + + jtype = type[j] - 1; + + fast_alpha_t& a = tabsixi[jtype]; + + if (rsq < a.cutsq) { + double r2inv = 1.0/rsq; + double r6inv = r2inv*r2inv*r2inv; + double forcelj = r6inv * (a.lj1*r6inv - a.lj2); + double fpair = forcelj*r2inv; + + tmpfx += delx*fpair; + tmpfy += dely*fpair; + tmpfz += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + ff[j].x -= delx*fpair; + ff[j].y -= dely*fpair; + ff[j].z -= delz*fpair; + } + + if (EFLAG) evdwl = r6inv*(a.lj3*r6inv-a.lj4) - a.offset; + + if (EVFLAG) + ev_tally(i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz); + } + + } else { + factor_lj = special_lj[sbindex]; + j &= NEIGHMASK; + + double delx = xtmp - xx[j].x; + double dely = ytmp - xx[j].y; + double delz = ztmp - xx[j].z; + double rsq = delx*delx + dely*dely + delz*delz; + + int jtype1 = type[j]; + jtype = jtype1 - 1; + + fast_alpha_t& a = tabsixi[jtype]; + if (rsq < a.cutsq) { + double r2inv = 1.0/rsq; + double r6inv = r2inv*r2inv*r2inv; + fast_alpha_t& a = tabsixi[jtype]; + double forcelj = r6inv * (a.lj1*r6inv - a.lj2); + double fpair = factor_lj*forcelj*r2inv; + + tmpfx += delx*fpair; + tmpfy += dely*fpair; + tmpfz += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + ff[j].x -= delx*fpair; + ff[j].y -= dely*fpair; + ff[j].z -= delz*fpair; + } + + if (EFLAG) { + evdwl = r6inv*(a.lj3*r6inv-a.lj4) - a.offset; + evdwl *= factor_lj; + } + + if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz); + } + } + } + + ff[i].x += tmpfx; + ff[i].y += tmpfy; + ff[i].z += tmpfz; + } + + free(fast_alpha); fast_alpha = 0; + + if (vflag_fdotr) virial_fdotr_compute(); +} diff --git a/src/OPT/pair_lj_cut_opt.h b/src/OPT/pair_lj_cut_opt.h index acd6afefeb..0bdd004811 100644 --- a/src/OPT/pair_lj_cut_opt.h +++ b/src/OPT/pair_lj_cut_opt.h @@ -11,13 +11,6 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -/* ---------------------------------------------------------------------- - Contributing authors: - James Fischer, High Performance Technologies, Inc. - David Richie, Stone Ridge Technology - Vincent Natol, Stone Ridge Technology -------------------------------------------------------------------------- */ - #ifdef PAIR_CLASS PairStyle(lj/cut/opt,PairLJCutOpt) @@ -27,11 +20,7 @@ PairStyle(lj/cut/opt,PairLJCutOpt) #ifndef LMP_PAIR_LJ_CUT_OPT_H #define LMP_PAIR_LJ_CUT_OPT_H -#include "stdlib.h" #include "pair_lj_cut.h" -#include "atom.h" -#include "force.h" -#include "neigh_list.h" namespace LAMMPS_NS { @@ -44,153 +33,6 @@ class PairLJCutOpt : public PairLJCut { template < int EVFLAG, int EFLAG, int NEWTON_PAIR > void eval(); }; -template < int EVFLAG, int EFLAG, int NEWTON_PAIR > -void PairLJCutOpt::eval() -{ - typedef struct { double x,y,z; } vec3_t; - - typedef struct { - double cutsq,lj1,lj2,lj3,lj4,offset; - double _pad[2]; - } fast_alpha_t; - - int i,j,ii,jj,inum,jnum,itype,jtype,sbindex; - double factor_lj; - double evdwl = 0.0; - - double** __restrict__ x = atom->x; - double** __restrict__ f = atom->f; - int* __restrict__ type = atom->type; - int nlocal = atom->nlocal; - double* __restrict__ special_lj = force->special_lj; - - inum = list->inum; - int* __restrict__ ilist = list->ilist; - int** __restrict__ firstneigh = list->firstneigh; - int* __restrict__ numneigh = list->numneigh; - - vec3_t* __restrict__ xx = (vec3_t*)x[0]; - vec3_t* __restrict__ ff = (vec3_t*)f[0]; - - int ntypes = atom->ntypes; - int ntypes2 = ntypes*ntypes; - - fast_alpha_t* __restrict__ fast_alpha = - (fast_alpha_t*) malloc(ntypes2*sizeof(fast_alpha_t)); - for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) { - fast_alpha_t& a = fast_alpha[i*ntypes+j]; - a.cutsq = cutsq[i+1][j+1]; - a.lj1 = lj1[i+1][j+1]; - a.lj2 = lj2[i+1][j+1]; - a.lj3 = lj3[i+1][j+1]; - a.lj4 = lj4[i+1][j+1]; - a.offset = offset[i+1][j+1]; - } - fast_alpha_t* __restrict__ tabsix = fast_alpha; - - // loop over neighbors of my atoms - - for (ii = 0; ii < inum; ii++) { - i = ilist[ii]; - double xtmp = xx[i].x; - double ytmp = xx[i].y; - double ztmp = xx[i].z; - itype = type[i] - 1; - int* __restrict__ jlist = firstneigh[i]; - jnum = numneigh[i]; - - double tmpfx = 0.0; - double tmpfy = 0.0; - double tmpfz = 0.0; - - fast_alpha_t* __restrict__ tabsixi = (fast_alpha_t*)&tabsix[itype*ntypes]; - - for (jj = 0; jj < jnum; jj++) { - j = jlist[jj]; - sbindex = sbmask(j); - - if (sbindex == 0) { - double delx = xtmp - xx[j].x; - double dely = ytmp - xx[j].y; - double delz = ztmp - xx[j].z; - double rsq = delx*delx + dely*dely + delz*delz; - - jtype = type[j] - 1; - - fast_alpha_t& a = tabsixi[jtype]; - - if (rsq < a.cutsq) { - double r2inv = 1.0/rsq; - double r6inv = r2inv*r2inv*r2inv; - double forcelj = r6inv * (a.lj1*r6inv - a.lj2); - double fpair = forcelj*r2inv; - - tmpfx += delx*fpair; - tmpfy += dely*fpair; - tmpfz += delz*fpair; - if (NEWTON_PAIR || j < nlocal) { - ff[j].x -= delx*fpair; - ff[j].y -= dely*fpair; - ff[j].z -= delz*fpair; - } - - if (EFLAG) evdwl = r6inv*(a.lj3*r6inv-a.lj4) - a.offset; - - if (EVFLAG) - ev_tally(i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz); - } - - } else { - factor_lj = special_lj[sbindex]; - j &= NEIGHMASK; - - double delx = xtmp - xx[j].x; - double dely = ytmp - xx[j].y; - double delz = ztmp - xx[j].z; - double rsq = delx*delx + dely*dely + delz*delz; - - int jtype1 = type[j]; - jtype = jtype1 - 1; - - fast_alpha_t& a = tabsixi[jtype]; - if (rsq < a.cutsq) { - double r2inv = 1.0/rsq; - double r6inv = r2inv*r2inv*r2inv; - fast_alpha_t& a = tabsixi[jtype]; - double forcelj = r6inv * (a.lj1*r6inv - a.lj2); - double fpair = factor_lj*forcelj*r2inv; - - tmpfx += delx*fpair; - tmpfy += dely*fpair; - tmpfz += delz*fpair; - if (NEWTON_PAIR || j < nlocal) { - ff[j].x -= delx*fpair; - ff[j].y -= dely*fpair; - ff[j].z -= delz*fpair; - } - - if (EFLAG) { - evdwl = r6inv*(a.lj3*r6inv-a.lj4) - a.offset; - evdwl *= factor_lj; - } - - if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz); - } - } - } - - ff[i].x += tmpfx; - ff[i].y += tmpfy; - ff[i].z += tmpfz; - } - - free(fast_alpha); fast_alpha = 0; - - if (vflag_fdotr) virial_fdotr_compute(); -} - } #endif diff --git a/src/OPT/pair_morse_opt.cpp b/src/OPT/pair_morse_opt.cpp index 1cf96d85b4..5a9bfe7bed 100644 --- a/src/OPT/pair_morse_opt.cpp +++ b/src/OPT/pair_morse_opt.cpp @@ -18,7 +18,12 @@ Vincent Natoli, Stone Ridge Technology ------------------------------------------------------------------------- */ +#include "math.h" +#include "stdlib.h" #include "pair_morse_opt.h" +#include "atom.h" +#include "force.h" +#include "neigh_list.h" using namespace LAMMPS_NS; @@ -46,3 +51,148 @@ void PairMorseOpt::compute(int eflag, int vflag) else return eval<0,0,0>(); } } + +/* ---------------------------------------------------------------------- */ + +template < int EVFLAG, int EFLAG, int NEWTON_PAIR > +void PairMorseOpt::eval() +{ + typedef struct { double x,y,z; } vec3_t; + + typedef struct { + double cutsq,r0,alpha,morse1,d0,offset; + double _pad[2]; + } fast_alpha_t; + + int i,j,ii,jj,inum,jnum,itype,jtype,sbindex; + double factor_lj; + double evdwl = 0.0; + + double** __restrict__ x = atom->x; + double** __restrict__ f = atom->f; + int* __restrict__ type = atom->type; + int nlocal = atom->nlocal; + double* __restrict__ special_lj = force->special_lj; + + inum = list->inum; + int* __restrict__ ilist = list->ilist; + int** __restrict__ firstneigh = list->firstneigh; + int* __restrict__ numneigh = list->numneigh; + + vec3_t* __restrict__ xx = (vec3_t*)x[0]; + vec3_t* __restrict__ ff = (vec3_t*)f[0]; + + int ntypes = atom->ntypes; + int ntypes2 = ntypes*ntypes; + + fast_alpha_t* __restrict__ fast_alpha = + (fast_alpha_t*) malloc(ntypes2*sizeof(fast_alpha_t)); + for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) { + fast_alpha_t& a = fast_alpha[i*ntypes+j]; + a.cutsq = cutsq[i+1][j+1]; + a.r0 = r0[i+1][j+1]; + a.alpha = alpha[i+1][j+1]; + a.morse1 = morse1[i+1][j+1]; + a.d0 = d0[i+1][j+1]; + a.offset = offset[i+1][j+1]; + } + fast_alpha_t* __restrict__ tabsix = fast_alpha; + + // loop over neighbors of my atoms + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + double xtmp = xx[i].x; + double ytmp = xx[i].y; + double ztmp = xx[i].z; + itype = type[i] - 1; + int* __restrict__ jlist = firstneigh[i]; + jnum = numneigh[i]; + + double tmpfx = 0.0; + double tmpfy = 0.0; + double tmpfz = 0.0; + + fast_alpha_t* __restrict__ tabsixi = (fast_alpha_t*)&tabsix[itype*ntypes]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + sbindex = sbmask(j); + + if (sbindex == 0) { + double delx = xtmp - xx[j].x; + double dely = ytmp - xx[j].y; + double delz = ztmp - xx[j].z; + double rsq = delx*delx + dely*dely + delz*delz; + + jtype = type[j] - 1; + + fast_alpha_t& a = tabsixi[jtype]; + if (rsq < a.cutsq) { + double r = sqrt(rsq); + double dr = r - a.r0; + double dexp = exp(-a.alpha * dr); + double fpair = a.morse1 * (dexp*dexp - dexp) / r; + + tmpfx += delx*fpair; + tmpfy += dely*fpair; + tmpfz += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + ff[j].x -= delx*fpair; + ff[j].y -= dely*fpair; + ff[j].z -= delz*fpair; + } + + if (EFLAG) evdwl = a.d0 * (dexp*dexp - 2.0*dexp) - a.offset; + + if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz); + } + + } else { + factor_lj = special_lj[sbindex]; + j &= NEIGHMASK; + + double delx = xtmp - xx[j].x; + double dely = ytmp - xx[j].y; + double delz = ztmp - xx[j].z; + double rsq = delx*delx + dely*dely + delz*delz; + + jtype = type[j] - 1; + + fast_alpha_t& a = tabsixi[jtype]; + if (rsq < a.cutsq) { + double r = sqrt(rsq); + double dr = r - a.r0; + double dexp = exp(-a.alpha * dr); + double fpair = factor_lj * a.morse1 * (dexp*dexp - dexp) / r; + + tmpfx += delx*fpair; + tmpfy += dely*fpair; + tmpfz += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + ff[j].x -= delx*fpair; + ff[j].y -= dely*fpair; + ff[j].z -= delz*fpair; + } + + if (EFLAG) { + evdwl = a.d0 * (dexp*dexp - 2.0*dexp) - a.offset; + evdwl *= factor_lj; + } + + if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz); + } + } + } + + ff[i].x += tmpfx; + ff[i].y += tmpfy; + ff[i].z += tmpfz; + } + + free(fast_alpha); fast_alpha = 0; + + if (vflag_fdotr) virial_fdotr_compute(); +} diff --git a/src/OPT/pair_morse_opt.h b/src/OPT/pair_morse_opt.h index 3d1ea371c6..0e80ac4d74 100644 --- a/src/OPT/pair_morse_opt.h +++ b/src/OPT/pair_morse_opt.h @@ -11,13 +11,6 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -/* ---------------------------------------------------------------------- - Contributing authors: - James Fischer, High Performance Technologies, Inc. - David Richie, Stone Ridge Technology - Vincent Natol, Stone Ridge Technology -------------------------------------------------------------------------- */ - #ifdef PAIR_CLASS PairStyle(morse/opt,PairMorseOpt) @@ -27,12 +20,7 @@ PairStyle(morse/opt,PairMorseOpt) #ifndef LMP_PAIR_MORSE_OPT_H #define LMP_PAIR_MORSE_OPT_H -#include "math.h" -#include "stdlib.h" #include "pair_morse.h" -#include "atom.h" -#include "force.h" -#include "neigh_list.h" namespace LAMMPS_NS { @@ -45,149 +33,6 @@ class PairMorseOpt : public PairMorse { template < int EVFLAG, int EFLAG, int NEWTON_PAIR > void eval(); }; -template < int EVFLAG, int EFLAG, int NEWTON_PAIR > -void PairMorseOpt::eval() -{ - typedef struct { double x,y,z; } vec3_t; - - typedef struct { - double cutsq,r0,alpha,morse1,d0,offset; - double _pad[2]; - } fast_alpha_t; - - int i,j,ii,jj,inum,jnum,itype,jtype,sbindex; - double factor_lj; - double evdwl = 0.0; - - double** __restrict__ x = atom->x; - double** __restrict__ f = atom->f; - int* __restrict__ type = atom->type; - int nlocal = atom->nlocal; - double* __restrict__ special_lj = force->special_lj; - - inum = list->inum; - int* __restrict__ ilist = list->ilist; - int** __restrict__ firstneigh = list->firstneigh; - int* __restrict__ numneigh = list->numneigh; - - vec3_t* __restrict__ xx = (vec3_t*)x[0]; - vec3_t* __restrict__ ff = (vec3_t*)f[0]; - - int ntypes = atom->ntypes; - int ntypes2 = ntypes*ntypes; - - fast_alpha_t* __restrict__ fast_alpha = - (fast_alpha_t*) malloc(ntypes2*sizeof(fast_alpha_t)); - for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) { - fast_alpha_t& a = fast_alpha[i*ntypes+j]; - a.cutsq = cutsq[i+1][j+1]; - a.r0 = r0[i+1][j+1]; - a.alpha = alpha[i+1][j+1]; - a.morse1 = morse1[i+1][j+1]; - a.d0 = d0[i+1][j+1]; - a.offset = offset[i+1][j+1]; - } - fast_alpha_t* __restrict__ tabsix = fast_alpha; - - // loop over neighbors of my atoms - - for (ii = 0; ii < inum; ii++) { - i = ilist[ii]; - double xtmp = xx[i].x; - double ytmp = xx[i].y; - double ztmp = xx[i].z; - itype = type[i] - 1; - int* __restrict__ jlist = firstneigh[i]; - jnum = numneigh[i]; - - double tmpfx = 0.0; - double tmpfy = 0.0; - double tmpfz = 0.0; - - fast_alpha_t* __restrict__ tabsixi = (fast_alpha_t*)&tabsix[itype*ntypes]; - - for (jj = 0; jj < jnum; jj++) { - j = jlist[jj]; - sbindex = sbmask(j); - - if (sbindex == 0) { - double delx = xtmp - xx[j].x; - double dely = ytmp - xx[j].y; - double delz = ztmp - xx[j].z; - double rsq = delx*delx + dely*dely + delz*delz; - - jtype = type[j] - 1; - - fast_alpha_t& a = tabsixi[jtype]; - if (rsq < a.cutsq) { - double r = sqrt(rsq); - double dr = r - a.r0; - double dexp = exp(-a.alpha * dr); - double fpair = a.morse1 * (dexp*dexp - dexp) / r; - - tmpfx += delx*fpair; - tmpfy += dely*fpair; - tmpfz += delz*fpair; - if (NEWTON_PAIR || j < nlocal) { - ff[j].x -= delx*fpair; - ff[j].y -= dely*fpair; - ff[j].z -= delz*fpair; - } - - if (EFLAG) evdwl = a.d0 * (dexp*dexp - 2.0*dexp) - a.offset; - - if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz); - } - - } else { - factor_lj = special_lj[sbindex]; - j &= NEIGHMASK; - - double delx = xtmp - xx[j].x; - double dely = ytmp - xx[j].y; - double delz = ztmp - xx[j].z; - double rsq = delx*delx + dely*dely + delz*delz; - - jtype = type[j] - 1; - - fast_alpha_t& a = tabsixi[jtype]; - if (rsq < a.cutsq) { - double r = sqrt(rsq); - double dr = r - a.r0; - double dexp = exp(-a.alpha * dr); - double fpair = factor_lj * a.morse1 * (dexp*dexp - dexp) / r; - - tmpfx += delx*fpair; - tmpfy += dely*fpair; - tmpfz += delz*fpair; - if (NEWTON_PAIR || j < nlocal) { - ff[j].x -= delx*fpair; - ff[j].y -= dely*fpair; - ff[j].z -= delz*fpair; - } - - if (EFLAG) { - evdwl = a.d0 * (dexp*dexp - 2.0*dexp) - a.offset; - evdwl *= factor_lj; - } - - if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz); - } - } - } - - ff[i].x += tmpfx; - ff[i].y += tmpfy; - ff[i].z += tmpfz; - } - - free(fast_alpha); fast_alpha = 0; - - if (vflag_fdotr) virial_fdotr_compute(); -} - } #endif