git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@6760 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2011-08-23 17:07:41 +00:00
parent 1490fa2606
commit 301dad125f
12 changed files with 919 additions and 939 deletions

View File

@ -11,6 +11,14 @@
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors:
James Fischer, High Performance Technologies, Inc.
Charles Cornwell, High Performance Technologies, Inc.
David Richie, Stone Ridge Technology
Vincent Natoli, Stone Ridge Technology
------------------------------------------------------------------------- */
#include "pair_eam_alloy_opt.h"
using namespace LAMMPS_NS;
@ -18,6 +26,8 @@ using namespace LAMMPS_NS;
/* ----------------------------------------------------------------------
multiple inheritance from two parent classes
invoke constructor of grandparent class, then of each parent
inherit optimized compute() from PairEAMOpt
inherit everything else from PairEAMAlloy
------------------------------------------------------------------------- */
PairEAMAlloyOpt::PairEAMAlloyOpt(LAMMPS *lmp) :

View File

@ -25,10 +25,6 @@ PairStyle(eam/alloy/opt,PairEAMAlloyOpt)
namespace LAMMPS_NS {
// multiple inheritance from two parent classes
// optimized compute() from PairEAMOpt
// everything else from PairEAMAlloy
class PairEAMAlloyOpt : public PairEAMAlloy, public PairEAMOpt {
public:
PairEAMAlloyOpt(class LAMMPS *);

View File

@ -11,6 +11,14 @@
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors:
James Fischer, High Performance Technologies, Inc.
Charles Cornwell, High Performance Technologies, Inc.
David Richie, Stone Ridge Technology
Vincent Natoli, Stone Ridge Technology
------------------------------------------------------------------------- */
#include "pair_eam_fs_opt.h"
using namespace LAMMPS_NS;
@ -18,6 +26,8 @@ using namespace LAMMPS_NS;
/* ----------------------------------------------------------------------
multiple inheritance from two parent classes
invoke constructor of grandparent class, then of each parent
inherit optimized compute() from PairEAMOpt
inherit everything else from PairEAMFS
------------------------------------------------------------------------- */
PairEAMFSOpt::PairEAMFSOpt(LAMMPS *lmp) :

View File

@ -25,10 +25,6 @@ PairStyle(eam/fs/opt,PairEAMFSOpt)
namespace LAMMPS_NS {
// multiple inheritance from two parent classes
// optimized compute() from PairEAMOpt
// everything else from PairEAMFS
class PairEAMFSOpt : public PairEAMFS, public PairEAMOpt {
public:
PairEAMFSOpt(class LAMMPS *);
@ -37,5 +33,4 @@ class PairEAMFSOpt : public PairEAMFS, public PairEAMOpt {
}
#endif
#endif

View File

@ -19,10 +19,20 @@
Vincent Natoli, Stone Ridge Technology
------------------------------------------------------------------------- */
#include "math.h"
#include "stdlib.h"
#include "pair_eam_opt.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neigh_list.h"
#include "memory.h"
using namespace LAMMPS_NS;
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
/* ---------------------------------------------------------------------- */
PairEAMOpt::PairEAMOpt(LAMMPS *lmp) : PairEAM(lmp) {}
@ -47,3 +57,295 @@ void PairEAMOpt::compute(int eflag, int vflag)
else return eval<0,0,0>();
}
}
/* ---------------------------------------------------------------------- */
template < int EVFLAG, int EFLAG, int NEWTON_PAIR >
void PairEAMOpt::eval()
{
typedef struct { double x,y,z; } vec3_t;
typedef struct {
double rhor0i,rhor1i,rhor2i,rhor3i;
double rhor0j,rhor1j,rhor2j,rhor3j;
} fast_alpha_t;
typedef struct {
double frho0,frho1,frho2,frho3,frho4,frho5,frho6;
double _pad[1];
} fast_beta_t;
typedef struct {
double rhor4i,rhor5i,rhor6i;
double rhor4j,rhor5j,rhor6j;
double z2r0,z2r1,z2r2,z2r3,z2r4,z2r5,z2r6;
double _pad[3];
} fast_gamma_t;
int i,j,ii,jj,inum,jnum,itype,jtype;
double evdwl = 0.0;
double* __restrict__ coeff;
// grow energy array if necessary
if (atom->nmax > nmax) {
memory->sfree(rho);
memory->sfree(fp);
nmax = atom->nmax;
rho = (double *) memory->smalloc(nmax*sizeof(double),"pair:rho");
fp = (double *) memory->smalloc(nmax*sizeof(double),"pair:fp");
}
double** __restrict__ x = atom->x;
double** __restrict__ f = atom->f;
int* __restrict__ type = atom->type;
int nlocal = atom->nlocal;
vec3_t* __restrict__ xx = (vec3_t*)x[0];
vec3_t* __restrict__ ff = (vec3_t*)f[0];
double tmp_cutforcesq = cutforcesq;
double tmp_rdr = rdr;
int nr2 = nr-2;
int nr1 = nr-1;
inum = list->inum;
int* __restrict__ ilist = list->ilist;
int** __restrict__ firstneigh = list->firstneigh;
int* __restrict__ numneigh = list->numneigh;
int ntypes = atom->ntypes;
int ntypes2 = ntypes*ntypes;
fast_alpha_t* __restrict__ fast_alpha =
(fast_alpha_t*) malloc(ntypes2*(nr+1)*sizeof(fast_alpha_t));
for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) {
fast_alpha_t* __restrict__ tab = &fast_alpha[i*ntypes*nr+j*nr];
if (type2rhor[i+1][j+1] >= 0) {
for(int m = 1; m <= nr; m++) {
tab[m].rhor0i = rhor_spline[type2rhor[i+1][j+1]][m][6];
tab[m].rhor1i = rhor_spline[type2rhor[i+1][j+1]][m][5];
tab[m].rhor2i = rhor_spline[type2rhor[i+1][j+1]][m][4];
tab[m].rhor3i = rhor_spline[type2rhor[i+1][j+1]][m][3];
}
}
if (type2rhor[j+1][i+1] >= 0) {
for(int m = 1; m <= nr; m++) {
tab[m].rhor0j = rhor_spline[type2rhor[j+1][i+1]][m][6];
tab[m].rhor1j = rhor_spline[type2rhor[j+1][i+1]][m][5];
tab[m].rhor2j = rhor_spline[type2rhor[j+1][i+1]][m][4];
tab[m].rhor3j = rhor_spline[type2rhor[j+1][i+1]][m][3];
}
}
}
fast_alpha_t* __restrict__ tabeight = fast_alpha;
fast_gamma_t* __restrict__ fast_gamma =
(fast_gamma_t*) malloc(ntypes2*(nr+1)*sizeof(fast_gamma_t));
for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) {
fast_gamma_t* __restrict__ tab = &fast_gamma[i*ntypes*nr+j*nr];
if (type2rhor[i+1][j+1] >= 0) {
for(int m = 1; m <= nr; m++) {
tab[m].rhor4i = rhor_spline[type2rhor[i+1][j+1]][m][2];
tab[m].rhor5i = rhor_spline[type2rhor[i+1][j+1]][m][1];
tab[m].rhor6i = rhor_spline[type2rhor[i+1][j+1]][m][0];
}
}
if (type2rhor[j+1][i+1] >= 0) {
for(int m = 1; m <= nr; m++) {
tab[m].rhor4j = rhor_spline[type2rhor[j+1][i+1]][m][2];
tab[m].rhor5j = rhor_spline[type2rhor[j+1][i+1]][m][1];
tab[m].rhor6j = rhor_spline[type2rhor[j+1][i+1]][m][0];
tab[m].z2r6 = z2r_spline[type2z2r[i+1][j+1]][m][0];
}
}
if (type2z2r[i+1][j+1] >= 0) {
for(int m = 1; m <= nr; m++) {
tab[m].z2r0 = z2r_spline[type2z2r[i+1][j+1]][m][6];
tab[m].z2r1 = z2r_spline[type2z2r[i+1][j+1]][m][5];
tab[m].z2r2 = z2r_spline[type2z2r[i+1][j+1]][m][4];
tab[m].z2r3 = z2r_spline[type2z2r[i+1][j+1]][m][3];
tab[m].z2r4 = z2r_spline[type2z2r[i+1][j+1]][m][2];
tab[m].z2r5 = z2r_spline[type2z2r[i+1][j+1]][m][1];
tab[m].z2r6 = z2r_spline[type2z2r[i+1][j+1]][m][0];
}
}
}
fast_gamma_t* __restrict__ tabss = fast_gamma;
// zero out density
if (NEWTON_PAIR) {
int m = nlocal + atom->nghost;
for (i = 0; i < m; i++) rho[i] = 0.0;
} else for (i = 0; i < nlocal; i++) rho[i] = 0.0;
// rho = density at each atom
// loop over neighbors of my atoms
// loop over neighbors of my atoms
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
double xtmp = xx[i].x;
double ytmp = xx[i].y;
double ztmp = xx[i].z;
itype = type[i] - 1;
int* __restrict__ jlist = firstneigh[i];
jnum = numneigh[i];
double tmprho = rho[i];
fast_alpha_t* __restrict__ tabeighti = &tabeight[itype*ntypes*nr];
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
j &= NEIGHMASK;
double delx = xtmp - xx[j].x;
double dely = ytmp - xx[j].y;
double delz = ztmp - xx[j].z;
double rsq = delx*delx + dely*dely + delz*delz;
if (rsq < tmp_cutforcesq) {
jtype = type[j] - 1;
double p = sqrt(rsq)*tmp_rdr;
if ( (int)p <= nr2 ) {
int m = (int)p + 1;
p -= (double)((int)p);
fast_alpha_t& a = tabeighti[jtype*nr+m];
tmprho += ((a.rhor3j*p+a.rhor2j)*p+a.rhor1j)*p+a.rhor0j;
if (NEWTON_PAIR || j < nlocal) {
rho[j] += ((a.rhor3i*p+a.rhor2i)*p+a.rhor1i)*p+a.rhor0i;
}
} else {
fast_alpha_t& a = tabeighti[jtype*nr+nr1];
tmprho += a.rhor3j+a.rhor2j+a.rhor1j+a.rhor0j;
if (NEWTON_PAIR || j < nlocal) {
rho[j] += a.rhor3i+a.rhor2i+a.rhor1i+a.rhor0i;
}
}
}
}
rho[i] = tmprho;
}
// communicate and sum densities
if (NEWTON_PAIR) comm->reverse_comm_pair(this);
// fp = derivative of embedding energy at each atom
// phi = embedding energy at each atom
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
double p = rho[i]*rdrho;
int m = MIN((int)p,nrho-2);
p -= (double)m;
++m;
coeff = frho_spline[type2frho[type[i]]][m];
fp[i] = (coeff[0]*p + coeff[1])*p + coeff[2];
if (EFLAG) {
double phi = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
if (eflag_global) eng_vdwl += phi;
if (eflag_atom) eatom[i] += phi;
}
}
// communicate derivative of embedding function
comm->forward_comm_pair(this);
// compute forces on each atom
// loop over neighbors of my atoms
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
double xtmp = xx[i].x;
double ytmp = xx[i].y;
double ztmp = xx[i].z;
int itype1 = type[i] - 1;
int* __restrict__ jlist = firstneigh[i];
jnum = numneigh[i];
double tmpfx = 0.0;
double tmpfy = 0.0;
double tmpfz = 0.0;
fast_gamma_t* __restrict__ tabssi = &tabss[itype1*ntypes*nr];
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
j &= NEIGHMASK;
double delx = xtmp - xx[j].x;
double dely = ytmp - xx[j].y;
double delz = ztmp - xx[j].z;
double rsq = delx*delx + dely*dely + delz*delz;
if (rsq < tmp_cutforcesq) {
jtype = type[j] - 1;
double r = sqrt(rsq);
double rhoip,rhojp,z2,z2p;
double p = r*tmp_rdr;
if ( (int)p <= nr2 ) {
int m = (int) p + 1;
p -= (double)((int) p);
fast_gamma_t& a = tabssi[jtype*nr+m];
rhoip = (a.rhor6i*p + a.rhor5i)*p + a.rhor4i;
rhojp = (a.rhor6j*p + a.rhor5j)*p + a.rhor4j;
z2 = ((a.z2r3*p + a.z2r2)*p + a.z2r1)*p + a.z2r0;
z2p = (a.z2r6*p + a.z2r5)*p + a.z2r4;
} else {
fast_gamma_t& a = tabssi[jtype*nr+nr1];
rhoip = a.rhor6i + a.rhor5i + a.rhor4i;
rhojp = a.rhor6j + a.rhor5j + a.rhor4j;
z2 = a.z2r3 + a.z2r2 + a.z2r1 + a.z2r0;
z2p = a.z2r6 + a.z2r5 + a.z2r4;
}
// rhoip = derivative of (density at atom j due to atom i)
// rhojp = derivative of (density at atom i due to atom j)
// phi = pair potential energy
// phip = phi'
// z2 = phi * r
// z2p = (phi * r)' = (phi' r) + phi
// psip needs both fp[i] and fp[j] terms since r_ij appears in two
// terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji)
// hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip
double recip = 1.0/r;
double phi = z2*recip;
double phip = z2p*recip - phi*recip;
double psip = fp[i]*rhojp + fp[j]*rhoip + phip;
double fpair = -psip*recip;
tmpfx += delx*fpair;
tmpfy += dely*fpair;
tmpfz += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
ff[j].x -= delx*fpair;
ff[j].y -= dely*fpair;
ff[j].z -= delz*fpair;
}
if (EFLAG) evdwl = phi;
if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR,
evdwl,0.0,fpair,delx,dely,delz);
}
}
ff[i].x += tmpfx;
ff[i].y += tmpfy;
ff[i].z += tmpfz;
}
free(fast_alpha); fast_alpha = 0;
free(fast_gamma); fast_gamma = 0;
if (vflag_fdotr) virial_fdotr_compute();
}

View File

@ -11,14 +11,6 @@
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors:
James Fischer, High Performance Technologies, Inc.
Charles Cornwell, High Performance Technologies, Inc.
David Richie, Stone Ridge Technology
Vincent Natol, Stone Ridge Technology
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(eam/opt,PairEAMOpt)
@ -28,17 +20,7 @@ PairStyle(eam/opt,PairEAMOpt)
#ifndef LMP_PAIR_EAM_OPT_H
#define LMP_PAIR_EAM_OPT_H
#include "math.h"
#include "stdlib.h"
#include "pair_eam.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neigh_list.h"
#include "memory.h"
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
namespace LAMMPS_NS {
@ -54,296 +36,6 @@ class PairEAMOpt : virtual public PairEAM {
template < int EVFLAG, int EFLAG, int NEWTON_PAIR > void eval();
};
template < int EVFLAG, int EFLAG, int NEWTON_PAIR >
void PairEAMOpt::eval()
{
typedef struct { double x,y,z; } vec3_t;
typedef struct {
double rhor0i,rhor1i,rhor2i,rhor3i;
double rhor0j,rhor1j,rhor2j,rhor3j;
} fast_alpha_t;
typedef struct {
double frho0,frho1,frho2,frho3,frho4,frho5,frho6;
double _pad[1];
} fast_beta_t;
typedef struct {
double rhor4i,rhor5i,rhor6i;
double rhor4j,rhor5j,rhor6j;
double z2r0,z2r1,z2r2,z2r3,z2r4,z2r5,z2r6;
double _pad[3];
} fast_gamma_t;
int i,j,ii,jj,inum,jnum,itype,jtype;
double evdwl = 0.0;
double* __restrict__ coeff;
// grow energy array if necessary
if (atom->nmax > nmax) {
memory->sfree(rho);
memory->sfree(fp);
nmax = atom->nmax;
rho = (double *) memory->smalloc(nmax*sizeof(double),"pair:rho");
fp = (double *) memory->smalloc(nmax*sizeof(double),"pair:fp");
}
double** __restrict__ x = atom->x;
double** __restrict__ f = atom->f;
int* __restrict__ type = atom->type;
int nlocal = atom->nlocal;
vec3_t* __restrict__ xx = (vec3_t*)x[0];
vec3_t* __restrict__ ff = (vec3_t*)f[0];
double tmp_cutforcesq = cutforcesq;
double tmp_rdr = rdr;
int nr2 = nr-2;
int nr1 = nr-1;
inum = list->inum;
int* __restrict__ ilist = list->ilist;
int** __restrict__ firstneigh = list->firstneigh;
int* __restrict__ numneigh = list->numneigh;
int ntypes = atom->ntypes;
int ntypes2 = ntypes*ntypes;
fast_alpha_t* __restrict__ fast_alpha =
(fast_alpha_t*) malloc(ntypes2*(nr+1)*sizeof(fast_alpha_t));
for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) {
fast_alpha_t* __restrict__ tab = &fast_alpha[i*ntypes*nr+j*nr];
if (type2rhor[i+1][j+1] >= 0) {
for(int m = 1; m <= nr; m++) {
tab[m].rhor0i = rhor_spline[type2rhor[i+1][j+1]][m][6];
tab[m].rhor1i = rhor_spline[type2rhor[i+1][j+1]][m][5];
tab[m].rhor2i = rhor_spline[type2rhor[i+1][j+1]][m][4];
tab[m].rhor3i = rhor_spline[type2rhor[i+1][j+1]][m][3];
}
}
if (type2rhor[j+1][i+1] >= 0) {
for(int m = 1; m <= nr; m++) {
tab[m].rhor0j = rhor_spline[type2rhor[j+1][i+1]][m][6];
tab[m].rhor1j = rhor_spline[type2rhor[j+1][i+1]][m][5];
tab[m].rhor2j = rhor_spline[type2rhor[j+1][i+1]][m][4];
tab[m].rhor3j = rhor_spline[type2rhor[j+1][i+1]][m][3];
}
}
}
fast_alpha_t* __restrict__ tabeight = fast_alpha;
fast_gamma_t* __restrict__ fast_gamma =
(fast_gamma_t*) malloc(ntypes2*(nr+1)*sizeof(fast_gamma_t));
for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) {
fast_gamma_t* __restrict__ tab = &fast_gamma[i*ntypes*nr+j*nr];
if (type2rhor[i+1][j+1] >= 0) {
for(int m = 1; m <= nr; m++) {
tab[m].rhor4i = rhor_spline[type2rhor[i+1][j+1]][m][2];
tab[m].rhor5i = rhor_spline[type2rhor[i+1][j+1]][m][1];
tab[m].rhor6i = rhor_spline[type2rhor[i+1][j+1]][m][0];
}
}
if (type2rhor[j+1][i+1] >= 0) {
for(int m = 1; m <= nr; m++) {
tab[m].rhor4j = rhor_spline[type2rhor[j+1][i+1]][m][2];
tab[m].rhor5j = rhor_spline[type2rhor[j+1][i+1]][m][1];
tab[m].rhor6j = rhor_spline[type2rhor[j+1][i+1]][m][0];
tab[m].z2r6 = z2r_spline[type2z2r[i+1][j+1]][m][0];
}
}
if (type2z2r[i+1][j+1] >= 0) {
for(int m = 1; m <= nr; m++) {
tab[m].z2r0 = z2r_spline[type2z2r[i+1][j+1]][m][6];
tab[m].z2r1 = z2r_spline[type2z2r[i+1][j+1]][m][5];
tab[m].z2r2 = z2r_spline[type2z2r[i+1][j+1]][m][4];
tab[m].z2r3 = z2r_spline[type2z2r[i+1][j+1]][m][3];
tab[m].z2r4 = z2r_spline[type2z2r[i+1][j+1]][m][2];
tab[m].z2r5 = z2r_spline[type2z2r[i+1][j+1]][m][1];
tab[m].z2r6 = z2r_spline[type2z2r[i+1][j+1]][m][0];
}
}
}
fast_gamma_t* __restrict__ tabss = fast_gamma;
// zero out density
if (NEWTON_PAIR) {
int m = nlocal + atom->nghost;
for (i = 0; i < m; i++) rho[i] = 0.0;
} else for (i = 0; i < nlocal; i++) rho[i] = 0.0;
// rho = density at each atom
// loop over neighbors of my atoms
// loop over neighbors of my atoms
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
double xtmp = xx[i].x;
double ytmp = xx[i].y;
double ztmp = xx[i].z;
itype = type[i] - 1;
int* __restrict__ jlist = firstneigh[i];
jnum = numneigh[i];
double tmprho = rho[i];
fast_alpha_t* __restrict__ tabeighti = &tabeight[itype*ntypes*nr];
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
j &= NEIGHMASK;
double delx = xtmp - xx[j].x;
double dely = ytmp - xx[j].y;
double delz = ztmp - xx[j].z;
double rsq = delx*delx + dely*dely + delz*delz;
if (rsq < tmp_cutforcesq) {
jtype = type[j] - 1;
double p = sqrt(rsq)*tmp_rdr;
if ( (int)p <= nr2 ) {
int m = (int)p + 1;
p -= (double)((int)p);
fast_alpha_t& a = tabeighti[jtype*nr+m];
tmprho += ((a.rhor3j*p+a.rhor2j)*p+a.rhor1j)*p+a.rhor0j;
if (NEWTON_PAIR || j < nlocal) {
rho[j] += ((a.rhor3i*p+a.rhor2i)*p+a.rhor1i)*p+a.rhor0i;
}
} else {
fast_alpha_t& a = tabeighti[jtype*nr+nr1];
tmprho += a.rhor3j+a.rhor2j+a.rhor1j+a.rhor0j;
if (NEWTON_PAIR || j < nlocal) {
rho[j] += a.rhor3i+a.rhor2i+a.rhor1i+a.rhor0i;
}
}
}
}
rho[i] = tmprho;
}
// communicate and sum densities
if (NEWTON_PAIR) comm->reverse_comm_pair(this);
// fp = derivative of embedding energy at each atom
// phi = embedding energy at each atom
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
double p = rho[i]*rdrho;
int m = MIN((int)p,nrho-2);
p -= (double)m;
++m;
coeff = frho_spline[type2frho[type[i]]][m];
fp[i] = (coeff[0]*p + coeff[1])*p + coeff[2];
if (EFLAG) {
double phi = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
if (eflag_global) eng_vdwl += phi;
if (eflag_atom) eatom[i] += phi;
}
}
// communicate derivative of embedding function
comm->forward_comm_pair(this);
// compute forces on each atom
// loop over neighbors of my atoms
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
double xtmp = xx[i].x;
double ytmp = xx[i].y;
double ztmp = xx[i].z;
int itype1 = type[i] - 1;
int* __restrict__ jlist = firstneigh[i];
jnum = numneigh[i];
double tmpfx = 0.0;
double tmpfy = 0.0;
double tmpfz = 0.0;
fast_gamma_t* __restrict__ tabssi = &tabss[itype1*ntypes*nr];
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
j &= NEIGHMASK;
double delx = xtmp - xx[j].x;
double dely = ytmp - xx[j].y;
double delz = ztmp - xx[j].z;
double rsq = delx*delx + dely*dely + delz*delz;
if (rsq < tmp_cutforcesq) {
jtype = type[j] - 1;
double r = sqrt(rsq);
double rhoip,rhojp,z2,z2p;
double p = r*tmp_rdr;
if ( (int)p <= nr2 ) {
int m = (int) p + 1;
p -= (double)((int) p);
fast_gamma_t& a = tabssi[jtype*nr+m];
rhoip = (a.rhor6i*p + a.rhor5i)*p + a.rhor4i;
rhojp = (a.rhor6j*p + a.rhor5j)*p + a.rhor4j;
z2 = ((a.z2r3*p + a.z2r2)*p + a.z2r1)*p + a.z2r0;
z2p = (a.z2r6*p + a.z2r5)*p + a.z2r4;
} else {
fast_gamma_t& a = tabssi[jtype*nr+nr1];
rhoip = a.rhor6i + a.rhor5i + a.rhor4i;
rhojp = a.rhor6j + a.rhor5j + a.rhor4j;
z2 = a.z2r3 + a.z2r2 + a.z2r1 + a.z2r0;
z2p = a.z2r6 + a.z2r5 + a.z2r4;
}
// rhoip = derivative of (density at atom j due to atom i)
// rhojp = derivative of (density at atom i due to atom j)
// phi = pair potential energy
// phip = phi'
// z2 = phi * r
// z2p = (phi * r)' = (phi' r) + phi
// psip needs both fp[i] and fp[j] terms since r_ij appears in two
// terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji)
// hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip
double recip = 1.0/r;
double phi = z2*recip;
double phip = z2p*recip - phi*recip;
double psip = fp[i]*rhojp + fp[j]*rhoip + phip;
double fpair = -psip*recip;
tmpfx += delx*fpair;
tmpfy += dely*fpair;
tmpfz += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
ff[j].x -= delx*fpair;
ff[j].y -= dely*fpair;
ff[j].z -= delz*fpair;
}
if (EFLAG) evdwl = phi;
if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR,
evdwl,0.0,fpair,delx,dely,delz);
}
}
ff[i].x += tmpfx;
ff[i].y += tmpfy;
ff[i].z += tmpfz;
}
free(fast_alpha); fast_alpha = 0;
free(fast_gamma); fast_gamma = 0;
if (vflag_fdotr) virial_fdotr_compute();
}
}
#endif

View File

@ -18,10 +18,26 @@
Vincent Natoli, Stone Ridge Technology
------------------------------------------------------------------------- */
#include "math.h"
#include "stdlib.h"
#include "pair_lj_charmm_coul_long_opt.h"
#include "atom.h"
#include "force.h"
#include "neigh_list.h"
using namespace LAMMPS_NS;
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define EWALD_A1 0.254829592
#define EWALD_A2 -0.284496736
#define EWALD_A3 1.421413741
#define EWALD_A4 -1.453152027
#define EWALD_A5 1.061405429
/* ---------------------------------------------------------------------- */
PairLJCharmmCoulLongOpt::PairLJCharmmCoulLongOpt(LAMMPS *lmp) :
@ -47,3 +63,281 @@ void PairLJCharmmCoulLongOpt::compute(int eflag, int vflag)
else return eval<0,0,0>();
}
}
/* ---------------------------------------------------------------------- */
template < int EVFLAG, int EFLAG, int NEWTON_PAIR >
void PairLJCharmmCoulLongOpt::eval()
{
typedef struct { double x,y,z; } vec3_t;
typedef struct {
double cutsq,lj1,lj2,lj3,lj4,offset;
double _pad[2];
} fast_alpha_t;
int i,j,ii,jj,inum,jnum,itype,jtype,itable,sbindex;
double fraction,table;
double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
double grij,expm2,prefactor,t,erfc;
double philj,switch1,switch2;
double rsq;
double evdwl = 0.0;
double ecoul = 0.0;
double** __restrict__ x = atom->x;
double** __restrict__ f = atom->f;
double* __restrict__ q = atom->q;
int* __restrict__ type = atom->type;
int nlocal = atom->nlocal;
double* __restrict__ special_coul = force->special_coul;
double* __restrict__ special_lj = force->special_lj;
double qqrd2e = force->qqrd2e;
inum = list->inum;
int* __restrict__ ilist = list->ilist;
int** __restrict__ firstneigh = list->firstneigh;
int* __restrict__ numneigh = list->numneigh;
vec3_t* __restrict__ xx = (vec3_t*)x[0];
vec3_t* __restrict__ ff = (vec3_t*)f[0];
int ntypes = atom->ntypes;
int ntypes2 = ntypes*ntypes;
double tmp_coef1 = 1.0/denom_lj;
double tmp_coef2 = cut_ljsq - 3.0*cut_lj_innersq;
fast_alpha_t* __restrict__ fast_alpha =
(fast_alpha_t*)malloc(ntypes2*sizeof(fast_alpha_t));
for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) {
fast_alpha_t& a = fast_alpha[i*ntypes+j];
a.cutsq = cutsq[i+1][j+1];
a.lj1 = lj1[i+1][j+1];
a.lj2 = lj2[i+1][j+1];
a.lj3 = lj3[i+1][j+1];
a.lj4 = lj4[i+1][j+1];
}
fast_alpha_t* __restrict__ tabsix = fast_alpha;
// loop over neighbors of my atoms
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
double qtmp = q[i];
double xtmp = xx[i].x;
double ytmp = xx[i].y;
double ztmp = xx[i].z;
itype = type[i] - 1;
int* __restrict__ jlist = firstneigh[i];
jnum = numneigh[i];
double tmpfx = 0.0;
double tmpfy = 0.0;
double tmpfz = 0.0;
fast_alpha_t* __restrict__ tabsixi = (fast_alpha_t*) &tabsix[itype*ntypes];
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
sbindex = sbmask(j);
if (sbindex == 0) {
double delx = xtmp - xx[j].x;
double dely = ytmp - xx[j].y;
double delz = ztmp - xx[j].z;
rsq = delx*delx + dely*dely + delz*delz;
double tmp_coef3 = qtmp*q[j];
if (rsq < cut_bothsq) {
r2inv = 1.0/rsq;
forcecoul = 0.0;
if (rsq < cut_coulsq) {
if (!ncoultablebits || rsq <= tabinnersq) {
r = sqrt(rsq);
grij = g_ewald * r;
expm2 = exp(-grij*grij);
t = 1.0 / (1.0 + EWALD_P*grij);
erfc = t *
(EWALD_A1+t*(EWALD_A2+t*(EWALD_A3+t*(EWALD_A4+t*EWALD_A5)))) *
expm2;
prefactor = qqrd2e * tmp_coef3/r;
forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
} else {
union_int_float_t rsq_lookup;
rsq_lookup.f = rsq;
itable = rsq_lookup.i & ncoulmask;
itable >>= ncoulshiftbits;
fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
table = ftable[itable] + fraction*dftable[itable];
forcecoul = tmp_coef3 * table;
}
}
forcelj = 0.0;
if (rsq < cut_ljsq) {
r6inv = r2inv*r2inv*r2inv;
jtype = type[j] - 1;
fast_alpha_t& a = tabsixi[jtype];
forcelj = r6inv * (a.lj1*r6inv - a.lj2);
if (rsq > cut_lj_innersq) {
switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
(tmp_coef2 + 2.0*rsq) * tmp_coef1;
switch2 = 12.0*rsq * (cut_ljsq-rsq) *
(rsq-cut_lj_innersq) * tmp_coef1;
philj = r6inv * (a.lj3*r6inv - a.lj4);
forcelj = forcelj*switch1 + philj*switch2;
}
}
double fpair = (forcecoul + forcelj) * r2inv;
tmpfx += delx*fpair;
tmpfy += dely*fpair;
tmpfz += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
ff[j].x -= delx*fpair;
ff[j].y -= dely*fpair;
ff[j].z -= delz*fpair;
}
if (EFLAG) {
if (rsq < cut_coulsq) {
if (!ncoultablebits || rsq <= tabinnersq)
ecoul = prefactor*erfc;
else {
table = etable[itable] + fraction*detable[itable];
ecoul = tmp_coef3 * table;
}
} else ecoul = 0.0;
if (rsq < cut_ljsq) {
fast_alpha_t& a = tabsixi[jtype];
evdwl = r6inv*(a.lj3*r6inv-a.lj4);
if (rsq > cut_lj_innersq) {
switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
(tmp_coef2 + 2.0*rsq) * tmp_coef1;
evdwl *= switch1;
}
} else evdwl = 0.0;
}
if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR,
evdwl,ecoul,fpair,delx,dely,delz);
}
} else {
factor_lj = special_lj[sbindex];
factor_coul = special_coul[sbindex];
j &= NEIGHMASK;
double delx = xtmp - xx[j].x;
double dely = ytmp - xx[j].y;
double delz = ztmp - xx[j].z;
rsq = delx*delx + dely*dely + delz*delz;
double tmp_coef3 = qtmp*q[j];
if (rsq < cut_bothsq) {
r2inv = 1.0/rsq;
forcecoul = 0.0;
if (rsq < cut_coulsq) {
if (!ncoultablebits || rsq <= tabinnersq) {
r = sqrt(rsq);
grij = g_ewald * r;
expm2 = exp(-grij*grij);
t = 1.0 / (1.0 + EWALD_P*grij);
erfc = t *
(EWALD_A1+t*(EWALD_A2+t*(EWALD_A3+t*(EWALD_A4+t*EWALD_A5)))) *
expm2;
prefactor = qqrd2e * tmp_coef3/r;
forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
if (factor_coul < 1.0) {
forcecoul -= (1.0-factor_coul)*prefactor;
}
} else {
union_int_float_t rsq_lookup;
rsq_lookup.f = rsq;
itable = rsq_lookup.i & ncoulmask;
itable >>= ncoulshiftbits;
fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
table = ftable[itable] + fraction*dftable[itable];
forcecoul = tmp_coef3 * table;
if (factor_coul < 1.0) {
table = ctable[itable] + fraction*dctable[itable];
prefactor = tmp_coef3 * table;
forcecoul -= (1.0-factor_coul)*prefactor;
}
}
}
forcelj = 0.0;
if (rsq < cut_ljsq) {
r6inv = r2inv*r2inv*r2inv;
jtype = type[j] - 1;
fast_alpha_t& a = tabsixi[jtype];
forcelj = r6inv * (a.lj1*r6inv - a.lj2);
if (rsq > cut_lj_innersq) {
switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
(tmp_coef2 + 2.0*rsq) * tmp_coef1;
switch2 = 12.0*rsq * (cut_ljsq-rsq) *
(rsq-cut_lj_innersq) * tmp_coef1;
fast_alpha_t& a = tabsixi[jtype];
philj = r6inv * (a.lj3*r6inv - a.lj4);
forcelj = forcelj*switch1 + philj*switch2;
}
}
double fpair = (forcecoul + factor_lj*forcelj) * r2inv;
tmpfx += delx*fpair;
tmpfy += dely*fpair;
tmpfz += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
ff[j].x -= delx*fpair;
ff[j].y -= dely*fpair;
ff[j].z -= delz*fpair;
}
if (EFLAG) {
if (rsq < cut_coulsq) {
if (!ncoultablebits || rsq <= tabinnersq)
ecoul = prefactor*erfc;
else {
table = etable[itable] + fraction*detable[itable];
ecoul = tmp_coef3 * table;
}
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
} else ecoul = 0.0;
if (rsq < cut_ljsq) {
fast_alpha_t& a = tabsixi[jtype];
evdwl = r6inv*(a.lj3*r6inv-a.lj4);
if (rsq > cut_lj_innersq) {
switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
(tmp_coef2 + 2.0*rsq) * tmp_coef1;
evdwl *= switch1;
}
evdwl *= factor_lj;
} else evdwl = 0.0;
}
if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR,
evdwl,ecoul,fpair,delx,dely,delz);
}
}
}
ff[i].x += tmpfx;
ff[i].y += tmpfy;
ff[i].z += tmpfz;
}
free(fast_alpha); fast_alpha = 0;
if (vflag_fdotr) virial_fdotr_compute();
}

View File

@ -11,13 +11,6 @@
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors:
James Fischer, High Performance Technologies, Inc.
David Richie, Stone Ridge Technology
Vincent Natol, Stone Ridge Technology
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(lj/charmm/coul/long/opt,PairLJCharmmCoulLongOpt)
@ -27,26 +20,10 @@ PairStyle(lj/charmm/coul/long/opt,PairLJCharmmCoulLongOpt)
#ifndef LMP_PAIR_LJ_CHARMM_COUL_LONG_OPT_H
#define LMP_PAIR_LJ_CHARMM_COUL_LONG_OPT_H
#include "math.h"
#include "stdlib.h"
#include "pair_lj_charmm_coul_long.h"
#include "atom.h"
#include "force.h"
#include "neigh_list.h"
namespace LAMMPS_NS {
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define EWALD_A1 0.254829592
#define EWALD_A2 -0.284496736
#define EWALD_A3 1.421413741
#define EWALD_A4 -1.453152027
#define EWALD_A5 1.061405429
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
class PairLJCharmmCoulLongOpt : public PairLJCharmmCoulLong {
public:
PairLJCharmmCoulLongOpt(class LAMMPS *);
@ -55,292 +32,6 @@ class PairLJCharmmCoulLongOpt : public PairLJCharmmCoulLong {
private:
template < int EVFLAG, int EFLAG, int NEWTON_PAIR > void eval();
};
template < int EVFLAG, int EFLAG, int NEWTON_PAIR >
void PairLJCharmmCoulLongOpt::eval()
{
typedef struct { double x,y,z; } vec3_t;
typedef struct {
double cutsq,lj1,lj2,lj3,lj4,offset;
double _pad[2];
} fast_alpha_t;
int i,j,ii,jj,inum,jnum,itype,jtype,itable,sbindex;
double fraction,table;
double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
double grij,expm2,prefactor,t,erfc;
double philj,switch1,switch2;
double rsq;
double evdwl = 0.0;
double ecoul = 0.0;
double** __restrict__ x = atom->x;
double** __restrict__ f = atom->f;
double* __restrict__ q = atom->q;
int* __restrict__ type = atom->type;
int nlocal = atom->nlocal;
double* __restrict__ special_coul = force->special_coul;
double* __restrict__ special_lj = force->special_lj;
double qqrd2e = force->qqrd2e;
inum = list->inum;
int* __restrict__ ilist = list->ilist;
int** __restrict__ firstneigh = list->firstneigh;
int* __restrict__ numneigh = list->numneigh;
vec3_t* __restrict__ xx = (vec3_t*)x[0];
vec3_t* __restrict__ ff = (vec3_t*)f[0];
int ntypes = atom->ntypes;
int ntypes2 = ntypes*ntypes;
double tmp_coef1 = 1.0/denom_lj;
double tmp_coef2 = cut_ljsq - 3.0*cut_lj_innersq;
fast_alpha_t* __restrict__ fast_alpha =
(fast_alpha_t*)malloc(ntypes2*sizeof(fast_alpha_t));
for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) {
fast_alpha_t& a = fast_alpha[i*ntypes+j];
a.cutsq = cutsq[i+1][j+1];
a.lj1 = lj1[i+1][j+1];
a.lj2 = lj2[i+1][j+1];
a.lj3 = lj3[i+1][j+1];
a.lj4 = lj4[i+1][j+1];
}
fast_alpha_t* __restrict__ tabsix = fast_alpha;
// loop over neighbors of my atoms
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
double qtmp = q[i];
double xtmp = xx[i].x;
double ytmp = xx[i].y;
double ztmp = xx[i].z;
itype = type[i] - 1;
int* __restrict__ jlist = firstneigh[i];
jnum = numneigh[i];
double tmpfx = 0.0;
double tmpfy = 0.0;
double tmpfz = 0.0;
fast_alpha_t* __restrict__ tabsixi = (fast_alpha_t*) &tabsix[itype*ntypes];
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
sbindex = sbmask(j);
if (sbindex == 0) {
double delx = xtmp - xx[j].x;
double dely = ytmp - xx[j].y;
double delz = ztmp - xx[j].z;
rsq = delx*delx + dely*dely + delz*delz;
double tmp_coef3 = qtmp*q[j];
if (rsq < cut_bothsq) {
r2inv = 1.0/rsq;
forcecoul = 0.0;
if (rsq < cut_coulsq) {
if (!ncoultablebits || rsq <= tabinnersq) {
r = sqrt(rsq);
grij = g_ewald * r;
expm2 = exp(-grij*grij);
t = 1.0 / (1.0 + EWALD_P*grij);
erfc = t *
(EWALD_A1+t*(EWALD_A2+t*(EWALD_A3+t*(EWALD_A4+t*EWALD_A5)))) *
expm2;
prefactor = qqrd2e * tmp_coef3/r;
forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
} else {
union_int_float_t rsq_lookup;
rsq_lookup.f = rsq;
itable = rsq_lookup.i & ncoulmask;
itable >>= ncoulshiftbits;
fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
table = ftable[itable] + fraction*dftable[itable];
forcecoul = tmp_coef3 * table;
}
}
forcelj = 0.0;
if (rsq < cut_ljsq) {
r6inv = r2inv*r2inv*r2inv;
jtype = type[j] - 1;
fast_alpha_t& a = tabsixi[jtype];
forcelj = r6inv * (a.lj1*r6inv - a.lj2);
if (rsq > cut_lj_innersq) {
switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
(tmp_coef2 + 2.0*rsq) * tmp_coef1;
switch2 = 12.0*rsq * (cut_ljsq-rsq) *
(rsq-cut_lj_innersq) * tmp_coef1;
philj = r6inv * (a.lj3*r6inv - a.lj4);
forcelj = forcelj*switch1 + philj*switch2;
}
}
double fpair = (forcecoul + forcelj) * r2inv;
tmpfx += delx*fpair;
tmpfy += dely*fpair;
tmpfz += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
ff[j].x -= delx*fpair;
ff[j].y -= dely*fpair;
ff[j].z -= delz*fpair;
}
if (EFLAG) {
if (rsq < cut_coulsq) {
if (!ncoultablebits || rsq <= tabinnersq)
ecoul = prefactor*erfc;
else {
table = etable[itable] + fraction*detable[itable];
ecoul = tmp_coef3 * table;
}
} else ecoul = 0.0;
if (rsq < cut_ljsq) {
fast_alpha_t& a = tabsixi[jtype];
evdwl = r6inv*(a.lj3*r6inv-a.lj4);
if (rsq > cut_lj_innersq) {
switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
(tmp_coef2 + 2.0*rsq) * tmp_coef1;
evdwl *= switch1;
}
} else evdwl = 0.0;
}
if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR,
evdwl,ecoul,fpair,delx,dely,delz);
}
} else {
factor_lj = special_lj[sbindex];
factor_coul = special_coul[sbindex];
j &= NEIGHMASK;
double delx = xtmp - xx[j].x;
double dely = ytmp - xx[j].y;
double delz = ztmp - xx[j].z;
rsq = delx*delx + dely*dely + delz*delz;
double tmp_coef3 = qtmp*q[j];
if (rsq < cut_bothsq) {
r2inv = 1.0/rsq;
forcecoul = 0.0;
if (rsq < cut_coulsq) {
if (!ncoultablebits || rsq <= tabinnersq) {
r = sqrt(rsq);
grij = g_ewald * r;
expm2 = exp(-grij*grij);
t = 1.0 / (1.0 + EWALD_P*grij);
erfc = t *
(EWALD_A1+t*(EWALD_A2+t*(EWALD_A3+t*(EWALD_A4+t*EWALD_A5)))) *
expm2;
prefactor = qqrd2e * tmp_coef3/r;
forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
if (factor_coul < 1.0) {
forcecoul -= (1.0-factor_coul)*prefactor;
}
} else {
union_int_float_t rsq_lookup;
rsq_lookup.f = rsq;
itable = rsq_lookup.i & ncoulmask;
itable >>= ncoulshiftbits;
fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
table = ftable[itable] + fraction*dftable[itable];
forcecoul = tmp_coef3 * table;
if (factor_coul < 1.0) {
table = ctable[itable] + fraction*dctable[itable];
prefactor = tmp_coef3 * table;
forcecoul -= (1.0-factor_coul)*prefactor;
}
}
}
forcelj = 0.0;
if (rsq < cut_ljsq) {
r6inv = r2inv*r2inv*r2inv;
jtype = type[j] - 1;
fast_alpha_t& a = tabsixi[jtype];
forcelj = r6inv * (a.lj1*r6inv - a.lj2);
if (rsq > cut_lj_innersq) {
switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
(tmp_coef2 + 2.0*rsq) * tmp_coef1;
switch2 = 12.0*rsq * (cut_ljsq-rsq) *
(rsq-cut_lj_innersq) * tmp_coef1;
fast_alpha_t& a = tabsixi[jtype];
philj = r6inv * (a.lj3*r6inv - a.lj4);
forcelj = forcelj*switch1 + philj*switch2;
}
}
double fpair = (forcecoul + factor_lj*forcelj) * r2inv;
tmpfx += delx*fpair;
tmpfy += dely*fpair;
tmpfz += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
ff[j].x -= delx*fpair;
ff[j].y -= dely*fpair;
ff[j].z -= delz*fpair;
}
if (EFLAG) {
if (rsq < cut_coulsq) {
if (!ncoultablebits || rsq <= tabinnersq)
ecoul = prefactor*erfc;
else {
table = etable[itable] + fraction*detable[itable];
ecoul = tmp_coef3 * table;
}
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
} else ecoul = 0.0;
if (rsq < cut_ljsq) {
fast_alpha_t& a = tabsixi[jtype];
evdwl = r6inv*(a.lj3*r6inv-a.lj4);
if (rsq > cut_lj_innersq) {
switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
(tmp_coef2 + 2.0*rsq) * tmp_coef1;
evdwl *= switch1;
}
evdwl *= factor_lj;
} else evdwl = 0.0;
}
if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR,
evdwl,ecoul,fpair,delx,dely,delz);
}
}
}
ff[i].x += tmpfx;
ff[i].y += tmpfy;
ff[i].z += tmpfz;
}
free(fast_alpha); fast_alpha = 0;
if (vflag_fdotr) virial_fdotr_compute();
}
#undef EWALD_F
#undef EWALD_P
#undef EWALD_A1
#undef EWALD_A2
#undef EWALD_A3
#undef EWALD_A4
#undef EWALD_A5
#undef MIN
#undef MAX
}

View File

@ -18,7 +18,11 @@
Vincent Natoli, Stone Ridge Technology
------------------------------------------------------------------------- */
#include "stdlib.h"
#include "pair_lj_cut_opt.h"
#include "atom.h"
#include "force.h"
#include "neigh_list.h"
using namespace LAMMPS_NS;
@ -46,3 +50,152 @@ void PairLJCutOpt::compute(int eflag, int vflag)
else return eval<0,0,0>();
}
}
/* ---------------------------------------------------------------------- */
template < int EVFLAG, int EFLAG, int NEWTON_PAIR >
void PairLJCutOpt::eval()
{
typedef struct { double x,y,z; } vec3_t;
typedef struct {
double cutsq,lj1,lj2,lj3,lj4,offset;
double _pad[2];
} fast_alpha_t;
int i,j,ii,jj,inum,jnum,itype,jtype,sbindex;
double factor_lj;
double evdwl = 0.0;
double** __restrict__ x = atom->x;
double** __restrict__ f = atom->f;
int* __restrict__ type = atom->type;
int nlocal = atom->nlocal;
double* __restrict__ special_lj = force->special_lj;
inum = list->inum;
int* __restrict__ ilist = list->ilist;
int** __restrict__ firstneigh = list->firstneigh;
int* __restrict__ numneigh = list->numneigh;
vec3_t* __restrict__ xx = (vec3_t*)x[0];
vec3_t* __restrict__ ff = (vec3_t*)f[0];
int ntypes = atom->ntypes;
int ntypes2 = ntypes*ntypes;
fast_alpha_t* __restrict__ fast_alpha =
(fast_alpha_t*) malloc(ntypes2*sizeof(fast_alpha_t));
for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) {
fast_alpha_t& a = fast_alpha[i*ntypes+j];
a.cutsq = cutsq[i+1][j+1];
a.lj1 = lj1[i+1][j+1];
a.lj2 = lj2[i+1][j+1];
a.lj3 = lj3[i+1][j+1];
a.lj4 = lj4[i+1][j+1];
a.offset = offset[i+1][j+1];
}
fast_alpha_t* __restrict__ tabsix = fast_alpha;
// loop over neighbors of my atoms
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
double xtmp = xx[i].x;
double ytmp = xx[i].y;
double ztmp = xx[i].z;
itype = type[i] - 1;
int* __restrict__ jlist = firstneigh[i];
jnum = numneigh[i];
double tmpfx = 0.0;
double tmpfy = 0.0;
double tmpfz = 0.0;
fast_alpha_t* __restrict__ tabsixi = (fast_alpha_t*)&tabsix[itype*ntypes];
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
sbindex = sbmask(j);
if (sbindex == 0) {
double delx = xtmp - xx[j].x;
double dely = ytmp - xx[j].y;
double delz = ztmp - xx[j].z;
double rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j] - 1;
fast_alpha_t& a = tabsixi[jtype];
if (rsq < a.cutsq) {
double r2inv = 1.0/rsq;
double r6inv = r2inv*r2inv*r2inv;
double forcelj = r6inv * (a.lj1*r6inv - a.lj2);
double fpair = forcelj*r2inv;
tmpfx += delx*fpair;
tmpfy += dely*fpair;
tmpfz += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
ff[j].x -= delx*fpair;
ff[j].y -= dely*fpair;
ff[j].z -= delz*fpair;
}
if (EFLAG) evdwl = r6inv*(a.lj3*r6inv-a.lj4) - a.offset;
if (EVFLAG)
ev_tally(i,j,nlocal,NEWTON_PAIR,
evdwl,0.0,fpair,delx,dely,delz);
}
} else {
factor_lj = special_lj[sbindex];
j &= NEIGHMASK;
double delx = xtmp - xx[j].x;
double dely = ytmp - xx[j].y;
double delz = ztmp - xx[j].z;
double rsq = delx*delx + dely*dely + delz*delz;
int jtype1 = type[j];
jtype = jtype1 - 1;
fast_alpha_t& a = tabsixi[jtype];
if (rsq < a.cutsq) {
double r2inv = 1.0/rsq;
double r6inv = r2inv*r2inv*r2inv;
fast_alpha_t& a = tabsixi[jtype];
double forcelj = r6inv * (a.lj1*r6inv - a.lj2);
double fpair = factor_lj*forcelj*r2inv;
tmpfx += delx*fpair;
tmpfy += dely*fpair;
tmpfz += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
ff[j].x -= delx*fpair;
ff[j].y -= dely*fpair;
ff[j].z -= delz*fpair;
}
if (EFLAG) {
evdwl = r6inv*(a.lj3*r6inv-a.lj4) - a.offset;
evdwl *= factor_lj;
}
if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR,
evdwl,0.0,fpair,delx,dely,delz);
}
}
}
ff[i].x += tmpfx;
ff[i].y += tmpfy;
ff[i].z += tmpfz;
}
free(fast_alpha); fast_alpha = 0;
if (vflag_fdotr) virial_fdotr_compute();
}

View File

@ -11,13 +11,6 @@
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors:
James Fischer, High Performance Technologies, Inc.
David Richie, Stone Ridge Technology
Vincent Natol, Stone Ridge Technology
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(lj/cut/opt,PairLJCutOpt)
@ -27,11 +20,7 @@ PairStyle(lj/cut/opt,PairLJCutOpt)
#ifndef LMP_PAIR_LJ_CUT_OPT_H
#define LMP_PAIR_LJ_CUT_OPT_H
#include "stdlib.h"
#include "pair_lj_cut.h"
#include "atom.h"
#include "force.h"
#include "neigh_list.h"
namespace LAMMPS_NS {
@ -44,153 +33,6 @@ class PairLJCutOpt : public PairLJCut {
template < int EVFLAG, int EFLAG, int NEWTON_PAIR > void eval();
};
template < int EVFLAG, int EFLAG, int NEWTON_PAIR >
void PairLJCutOpt::eval()
{
typedef struct { double x,y,z; } vec3_t;
typedef struct {
double cutsq,lj1,lj2,lj3,lj4,offset;
double _pad[2];
} fast_alpha_t;
int i,j,ii,jj,inum,jnum,itype,jtype,sbindex;
double factor_lj;
double evdwl = 0.0;
double** __restrict__ x = atom->x;
double** __restrict__ f = atom->f;
int* __restrict__ type = atom->type;
int nlocal = atom->nlocal;
double* __restrict__ special_lj = force->special_lj;
inum = list->inum;
int* __restrict__ ilist = list->ilist;
int** __restrict__ firstneigh = list->firstneigh;
int* __restrict__ numneigh = list->numneigh;
vec3_t* __restrict__ xx = (vec3_t*)x[0];
vec3_t* __restrict__ ff = (vec3_t*)f[0];
int ntypes = atom->ntypes;
int ntypes2 = ntypes*ntypes;
fast_alpha_t* __restrict__ fast_alpha =
(fast_alpha_t*) malloc(ntypes2*sizeof(fast_alpha_t));
for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) {
fast_alpha_t& a = fast_alpha[i*ntypes+j];
a.cutsq = cutsq[i+1][j+1];
a.lj1 = lj1[i+1][j+1];
a.lj2 = lj2[i+1][j+1];
a.lj3 = lj3[i+1][j+1];
a.lj4 = lj4[i+1][j+1];
a.offset = offset[i+1][j+1];
}
fast_alpha_t* __restrict__ tabsix = fast_alpha;
// loop over neighbors of my atoms
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
double xtmp = xx[i].x;
double ytmp = xx[i].y;
double ztmp = xx[i].z;
itype = type[i] - 1;
int* __restrict__ jlist = firstneigh[i];
jnum = numneigh[i];
double tmpfx = 0.0;
double tmpfy = 0.0;
double tmpfz = 0.0;
fast_alpha_t* __restrict__ tabsixi = (fast_alpha_t*)&tabsix[itype*ntypes];
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
sbindex = sbmask(j);
if (sbindex == 0) {
double delx = xtmp - xx[j].x;
double dely = ytmp - xx[j].y;
double delz = ztmp - xx[j].z;
double rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j] - 1;
fast_alpha_t& a = tabsixi[jtype];
if (rsq < a.cutsq) {
double r2inv = 1.0/rsq;
double r6inv = r2inv*r2inv*r2inv;
double forcelj = r6inv * (a.lj1*r6inv - a.lj2);
double fpair = forcelj*r2inv;
tmpfx += delx*fpair;
tmpfy += dely*fpair;
tmpfz += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
ff[j].x -= delx*fpair;
ff[j].y -= dely*fpair;
ff[j].z -= delz*fpair;
}
if (EFLAG) evdwl = r6inv*(a.lj3*r6inv-a.lj4) - a.offset;
if (EVFLAG)
ev_tally(i,j,nlocal,NEWTON_PAIR,
evdwl,0.0,fpair,delx,dely,delz);
}
} else {
factor_lj = special_lj[sbindex];
j &= NEIGHMASK;
double delx = xtmp - xx[j].x;
double dely = ytmp - xx[j].y;
double delz = ztmp - xx[j].z;
double rsq = delx*delx + dely*dely + delz*delz;
int jtype1 = type[j];
jtype = jtype1 - 1;
fast_alpha_t& a = tabsixi[jtype];
if (rsq < a.cutsq) {
double r2inv = 1.0/rsq;
double r6inv = r2inv*r2inv*r2inv;
fast_alpha_t& a = tabsixi[jtype];
double forcelj = r6inv * (a.lj1*r6inv - a.lj2);
double fpair = factor_lj*forcelj*r2inv;
tmpfx += delx*fpair;
tmpfy += dely*fpair;
tmpfz += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
ff[j].x -= delx*fpair;
ff[j].y -= dely*fpair;
ff[j].z -= delz*fpair;
}
if (EFLAG) {
evdwl = r6inv*(a.lj3*r6inv-a.lj4) - a.offset;
evdwl *= factor_lj;
}
if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR,
evdwl,0.0,fpair,delx,dely,delz);
}
}
}
ff[i].x += tmpfx;
ff[i].y += tmpfy;
ff[i].z += tmpfz;
}
free(fast_alpha); fast_alpha = 0;
if (vflag_fdotr) virial_fdotr_compute();
}
}
#endif

View File

@ -18,7 +18,12 @@
Vincent Natoli, Stone Ridge Technology
------------------------------------------------------------------------- */
#include "math.h"
#include "stdlib.h"
#include "pair_morse_opt.h"
#include "atom.h"
#include "force.h"
#include "neigh_list.h"
using namespace LAMMPS_NS;
@ -46,3 +51,148 @@ void PairMorseOpt::compute(int eflag, int vflag)
else return eval<0,0,0>();
}
}
/* ---------------------------------------------------------------------- */
template < int EVFLAG, int EFLAG, int NEWTON_PAIR >
void PairMorseOpt::eval()
{
typedef struct { double x,y,z; } vec3_t;
typedef struct {
double cutsq,r0,alpha,morse1,d0,offset;
double _pad[2];
} fast_alpha_t;
int i,j,ii,jj,inum,jnum,itype,jtype,sbindex;
double factor_lj;
double evdwl = 0.0;
double** __restrict__ x = atom->x;
double** __restrict__ f = atom->f;
int* __restrict__ type = atom->type;
int nlocal = atom->nlocal;
double* __restrict__ special_lj = force->special_lj;
inum = list->inum;
int* __restrict__ ilist = list->ilist;
int** __restrict__ firstneigh = list->firstneigh;
int* __restrict__ numneigh = list->numneigh;
vec3_t* __restrict__ xx = (vec3_t*)x[0];
vec3_t* __restrict__ ff = (vec3_t*)f[0];
int ntypes = atom->ntypes;
int ntypes2 = ntypes*ntypes;
fast_alpha_t* __restrict__ fast_alpha =
(fast_alpha_t*) malloc(ntypes2*sizeof(fast_alpha_t));
for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) {
fast_alpha_t& a = fast_alpha[i*ntypes+j];
a.cutsq = cutsq[i+1][j+1];
a.r0 = r0[i+1][j+1];
a.alpha = alpha[i+1][j+1];
a.morse1 = morse1[i+1][j+1];
a.d0 = d0[i+1][j+1];
a.offset = offset[i+1][j+1];
}
fast_alpha_t* __restrict__ tabsix = fast_alpha;
// loop over neighbors of my atoms
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
double xtmp = xx[i].x;
double ytmp = xx[i].y;
double ztmp = xx[i].z;
itype = type[i] - 1;
int* __restrict__ jlist = firstneigh[i];
jnum = numneigh[i];
double tmpfx = 0.0;
double tmpfy = 0.0;
double tmpfz = 0.0;
fast_alpha_t* __restrict__ tabsixi = (fast_alpha_t*)&tabsix[itype*ntypes];
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
sbindex = sbmask(j);
if (sbindex == 0) {
double delx = xtmp - xx[j].x;
double dely = ytmp - xx[j].y;
double delz = ztmp - xx[j].z;
double rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j] - 1;
fast_alpha_t& a = tabsixi[jtype];
if (rsq < a.cutsq) {
double r = sqrt(rsq);
double dr = r - a.r0;
double dexp = exp(-a.alpha * dr);
double fpair = a.morse1 * (dexp*dexp - dexp) / r;
tmpfx += delx*fpair;
tmpfy += dely*fpair;
tmpfz += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
ff[j].x -= delx*fpair;
ff[j].y -= dely*fpair;
ff[j].z -= delz*fpair;
}
if (EFLAG) evdwl = a.d0 * (dexp*dexp - 2.0*dexp) - a.offset;
if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR,
evdwl,0.0,fpair,delx,dely,delz);
}
} else {
factor_lj = special_lj[sbindex];
j &= NEIGHMASK;
double delx = xtmp - xx[j].x;
double dely = ytmp - xx[j].y;
double delz = ztmp - xx[j].z;
double rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j] - 1;
fast_alpha_t& a = tabsixi[jtype];
if (rsq < a.cutsq) {
double r = sqrt(rsq);
double dr = r - a.r0;
double dexp = exp(-a.alpha * dr);
double fpair = factor_lj * a.morse1 * (dexp*dexp - dexp) / r;
tmpfx += delx*fpair;
tmpfy += dely*fpair;
tmpfz += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
ff[j].x -= delx*fpair;
ff[j].y -= dely*fpair;
ff[j].z -= delz*fpair;
}
if (EFLAG) {
evdwl = a.d0 * (dexp*dexp - 2.0*dexp) - a.offset;
evdwl *= factor_lj;
}
if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR,
evdwl,0.0,fpair,delx,dely,delz);
}
}
}
ff[i].x += tmpfx;
ff[i].y += tmpfy;
ff[i].z += tmpfz;
}
free(fast_alpha); fast_alpha = 0;
if (vflag_fdotr) virial_fdotr_compute();
}

View File

@ -11,13 +11,6 @@
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors:
James Fischer, High Performance Technologies, Inc.
David Richie, Stone Ridge Technology
Vincent Natol, Stone Ridge Technology
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(morse/opt,PairMorseOpt)
@ -27,12 +20,7 @@ PairStyle(morse/opt,PairMorseOpt)
#ifndef LMP_PAIR_MORSE_OPT_H
#define LMP_PAIR_MORSE_OPT_H
#include "math.h"
#include "stdlib.h"
#include "pair_morse.h"
#include "atom.h"
#include "force.h"
#include "neigh_list.h"
namespace LAMMPS_NS {
@ -45,149 +33,6 @@ class PairMorseOpt : public PairMorse {
template < int EVFLAG, int EFLAG, int NEWTON_PAIR > void eval();
};
template < int EVFLAG, int EFLAG, int NEWTON_PAIR >
void PairMorseOpt::eval()
{
typedef struct { double x,y,z; } vec3_t;
typedef struct {
double cutsq,r0,alpha,morse1,d0,offset;
double _pad[2];
} fast_alpha_t;
int i,j,ii,jj,inum,jnum,itype,jtype,sbindex;
double factor_lj;
double evdwl = 0.0;
double** __restrict__ x = atom->x;
double** __restrict__ f = atom->f;
int* __restrict__ type = atom->type;
int nlocal = atom->nlocal;
double* __restrict__ special_lj = force->special_lj;
inum = list->inum;
int* __restrict__ ilist = list->ilist;
int** __restrict__ firstneigh = list->firstneigh;
int* __restrict__ numneigh = list->numneigh;
vec3_t* __restrict__ xx = (vec3_t*)x[0];
vec3_t* __restrict__ ff = (vec3_t*)f[0];
int ntypes = atom->ntypes;
int ntypes2 = ntypes*ntypes;
fast_alpha_t* __restrict__ fast_alpha =
(fast_alpha_t*) malloc(ntypes2*sizeof(fast_alpha_t));
for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) {
fast_alpha_t& a = fast_alpha[i*ntypes+j];
a.cutsq = cutsq[i+1][j+1];
a.r0 = r0[i+1][j+1];
a.alpha = alpha[i+1][j+1];
a.morse1 = morse1[i+1][j+1];
a.d0 = d0[i+1][j+1];
a.offset = offset[i+1][j+1];
}
fast_alpha_t* __restrict__ tabsix = fast_alpha;
// loop over neighbors of my atoms
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
double xtmp = xx[i].x;
double ytmp = xx[i].y;
double ztmp = xx[i].z;
itype = type[i] - 1;
int* __restrict__ jlist = firstneigh[i];
jnum = numneigh[i];
double tmpfx = 0.0;
double tmpfy = 0.0;
double tmpfz = 0.0;
fast_alpha_t* __restrict__ tabsixi = (fast_alpha_t*)&tabsix[itype*ntypes];
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
sbindex = sbmask(j);
if (sbindex == 0) {
double delx = xtmp - xx[j].x;
double dely = ytmp - xx[j].y;
double delz = ztmp - xx[j].z;
double rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j] - 1;
fast_alpha_t& a = tabsixi[jtype];
if (rsq < a.cutsq) {
double r = sqrt(rsq);
double dr = r - a.r0;
double dexp = exp(-a.alpha * dr);
double fpair = a.morse1 * (dexp*dexp - dexp) / r;
tmpfx += delx*fpair;
tmpfy += dely*fpair;
tmpfz += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
ff[j].x -= delx*fpair;
ff[j].y -= dely*fpair;
ff[j].z -= delz*fpair;
}
if (EFLAG) evdwl = a.d0 * (dexp*dexp - 2.0*dexp) - a.offset;
if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR,
evdwl,0.0,fpair,delx,dely,delz);
}
} else {
factor_lj = special_lj[sbindex];
j &= NEIGHMASK;
double delx = xtmp - xx[j].x;
double dely = ytmp - xx[j].y;
double delz = ztmp - xx[j].z;
double rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j] - 1;
fast_alpha_t& a = tabsixi[jtype];
if (rsq < a.cutsq) {
double r = sqrt(rsq);
double dr = r - a.r0;
double dexp = exp(-a.alpha * dr);
double fpair = factor_lj * a.morse1 * (dexp*dexp - dexp) / r;
tmpfx += delx*fpair;
tmpfy += dely*fpair;
tmpfz += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
ff[j].x -= delx*fpair;
ff[j].y -= dely*fpair;
ff[j].z -= delz*fpair;
}
if (EFLAG) {
evdwl = a.d0 * (dexp*dexp - 2.0*dexp) - a.offset;
evdwl *= factor_lj;
}
if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR,
evdwl,0.0,fpair,delx,dely,delz);
}
}
}
ff[i].x += tmpfx;
ff[i].y += tmpfy;
ff[i].z += tmpfz;
}
free(fast_alpha); fast_alpha = 0;
if (vflag_fdotr) virial_fdotr_compute();
}
}
#endif