git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@26 f3b2605a-c512-4ea7-a41b-209d697bcdaa

2006-09-27 19:51:33 +00:00
parent 3422cb245c
commit 222c95507e
536 changed files with 108384 additions and 0 deletions
--- a/src/CLASS2/Install.csh
+++ b/src/CLASS2/Install.csh
@ -0,0 +1,50 @@
+# Install/unInstall package classes in LAMMPS
+
+# pair_lj_class2_coul_long.h must always be in src
+
+if ($1 == 1) then
+
+  cp style_class2.h ..
+
+  cp bond_class2.cpp ..
+  cp angle_class2.cpp ..
+  cp dihedral_class2.cpp ..
+  cp improper_class2.cpp ..
+
+  cp pair_lj_class2.cpp ..
+  cp pair_lj_class2_coul_cut.cpp ..
+  cp pair_lj_class2_coul_long.cpp ..
+
+  cp bond_class2.h ..
+  cp angle_class2.h ..
+  cp dihedral_class2.h ..
+  cp improper_class2.h ..
+
+  cp pair_lj_class2.h ..
+  cp pair_lj_class2_coul_cut.h ..
+#  cp pair_lj_class2_coul_long.h ..
+
+else if ($1 == 0) then
+
+  rm ../style_class2.h
+  touch ../style_class2.h
+
+  rm ../bond_class2.cpp
+  rm ../angle_class2.cpp
+  rm ../dihedral_class2.cpp
+  rm ../improper_class2.cpp
+
+  rm ../pair_lj_class2.cpp
+  rm ../pair_lj_class2_coul_cut.cpp
+  rm ../pair_lj_class2_coul_long.cpp
+
+  rm ../bond_class2.h
+  rm ../angle_class2.h
+  rm ../dihedral_class2.h
+  rm ../improper_class2.h
+
+  rm ../pair_lj_class2.h
+  rm ../pair_lj_class2_coul_cut.h
+#  rm ../pair_lj_class2_coul_long.h
+
+endif
--- a/src/CLASS2/angle_class2.cpp
+++ b/src/CLASS2/angle_class2.cpp
@ -0,0 +1,421 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Eric Simon (Cray)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdlib.h"
+#include "angle_class2.h"
+#include "atom.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "comm.h"
+#include "force.h"
+#include "memory.h"
+#include "error.h"
+
+#define SMALL 0.001
+
+/* ----------------------------------------------------------------------
+   free all arrays 
+------------------------------------------------------------------------- */
+
+AngleClass2::~AngleClass2()
+{
+  if (allocated) {
+    memory->sfree(setflag);
+    memory->sfree(setflag_a);
+    memory->sfree(setflag_bb);
+    memory->sfree(setflag_ba);
+
+    memory->sfree(theta0);
+    memory->sfree(k2);
+    memory->sfree(k3);
+    memory->sfree(k4);
+
+    memory->sfree(bb_k);
+    memory->sfree(bb_r1);
+    memory->sfree(bb_r2);
+
+    memory->sfree(ba_k1);
+    memory->sfree(ba_k2);
+    memory->sfree(ba_r1);
+    memory->sfree(ba_r2);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AngleClass2::compute(int eflag, int vflag)
+{
+  int i1,i2,i3,n,type,factor;
+  double delx1,dely1,delz1,delx2,dely2,delz2,rfactor;
+  double dtheta,dtheta2,dtheta3,dtheta4,de_angle;
+  double dr1,dr2,tk1,tk2,aa1,aa2,aa11,aa12,aa21,aa22;
+  double rsq1,rsq2,r1,r2,c,s,a,a11,a12,a22,b1,b2,vx1,vx2,vy1,vy2,vz1,vz2;
+  double vx11,vx12,vy11,vy12,vz11,vz12,vx21,vx22,vy21,vy22,vz21,vz22;
+
+  energy = 0.0;
+  if (vflag) for (n = 0; n < 6; n++) virial[n] = 0.0;
+
+  double **x = atom->x;
+  double **f = atom->f;
+  int **anglelist = neighbor->anglelist;
+  int nanglelist = neighbor->nanglelist;
+  int nlocal = atom->nlocal;
+  int newton_bond = force->newton_bond;
+
+  for (n = 0; n < nanglelist; n++) {
+
+    i1 = anglelist[n][0];
+    i2 = anglelist[n][1];
+    i3 = anglelist[n][2];
+    type = anglelist[n][3];
+
+    if (newton_bond) factor = 3;
+    else {
+      factor = 0;
+      if (i1 < nlocal) factor++;
+      if (i2 < nlocal) factor++;
+      if (i3 < nlocal) factor++;
+    }
+    rfactor = factor/3.0;
+
+    // 1st bond
+
+    delx1 = x[i1][0] - x[i2][0];
+    dely1 = x[i1][1] - x[i2][1];
+    delz1 = x[i1][2] - x[i2][2];
+    domain->minimum_image(&delx1,&dely1,&delz1);
+
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    delx2 = x[i3][0] - x[i2][0];
+    dely2 = x[i3][1] - x[i2][1];
+    delz2 = x[i3][2] - x[i2][2];
+    domain->minimum_image(&delx2,&dely2,&delz2);
+
+    rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+    r2 = sqrt(rsq2);
+
+    // angle (cos and sin)
+
+    c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+    c /= r1*r2;
+        
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+        
+    s = sqrt(1.0 - c*c);
+    if (s < SMALL) s = SMALL;
+    s = 1.0/s;
+
+    // force & energy for angle term
+
+    dtheta = acos(c) - theta0[type];
+    dtheta2 = dtheta*dtheta;
+    dtheta3 = dtheta2*dtheta;
+    dtheta4 = dtheta3*dtheta;
+
+    de_angle = 2.0*k2[type]*dtheta + 3.0*k3[type]*dtheta2 + 
+      4.0*k4[type]*dtheta3;
+
+    a = de_angle*s;
+        
+    a11 = a*c / rsq1;
+    a12 = -a / (r1*r2);
+    a22 = a*c / rsq2;
+        
+    vx1 = a11*delx1 + a12*delx2;
+    vy1 = a11*dely1 + a12*dely2;
+    vz1 = a11*delz1 + a12*delz2;
+
+    vx2 = a22*delx2 + a12*delx1;
+    vy2 = a22*dely2 + a12*dely1;
+    vz2 = a22*delz2 + a12*delz1;
+
+    if (eflag) energy += rfactor * 
+      (k2[type]*dtheta2 + k3[type]*dtheta3 + k4[type]*dtheta4);
+
+    // force & energy for bond-bond term
+
+    dr1 = r1 - bb_r1[type];
+    dr2 = r2 - bb_r2[type];
+    tk1 = bb_k[type] * dr1;
+    tk2 = bb_k[type] * dr2;
+
+    vx1 += delx1*tk2/r1;
+    vy1 += dely1*tk2/r1;
+    vz1 += delz1*tk2/r1;
+
+    vx2 += delx2*tk1/r2;
+    vy2 += dely2*tk1/r2;
+    vz2 += delz2*tk1/r2;
+
+    if (eflag) energy += rfactor * bb_k[type]*dr1*dr2;
+
+    // force & energy for bond-angle term
+
+    aa1 = s * dr1 * ba_k1[type];
+    aa2 = s * dr2 * ba_k2[type];
+
+    aa11 = aa1 * c / rsq1;
+    aa12 = -aa1 / (r1 * r2);
+    aa21 = aa2 * c / rsq1;
+    aa22 = -aa2 / (r1 * r2);
+
+    vx11 = (aa11 * delx1) + (aa12 * delx2);
+    vx12 = (aa21 * delx1) + (aa22 * delx2);
+    vy11 = (aa11 * dely1) + (aa12 * dely2);
+    vy12 = (aa21 * dely1) + (aa22 * dely2);
+    vz11 = (aa11 * delz1) + (aa12 * delz2);
+    vz12 = (aa21 * delz1) + (aa22 * delz2);
+
+    aa11 = aa1 * c / rsq2;
+    aa21 = aa2 * c / rsq2;
+
+    vx21 = (aa11 * delx2) + (aa12 * delx1);
+    vx22 = (aa21 * delx2) + (aa22 * delx1);
+    vy21 = (aa11 * dely2) + (aa12 * dely1);
+    vy22 = (aa21 * dely2) + (aa22 * dely1);
+    vz21 = (aa11 * delz2) + (aa12 * delz1);
+    vz22 = (aa21 * delz2) + (aa22 * delz1);
+
+    b1 = ba_k1[type] * dtheta / r1;
+    b2 = ba_k2[type] * dtheta / r2;
+
+    vx1 += vx11 + b1*delx1 + vx12;
+    vy1 += vy11 + b1*dely1 + vy12;
+    vz1 += vz11 + b1*delz1 + vz12;
+
+    vx2 += vx21 + b2*delx2 + vx22;
+    vy2 += vy21 + b2*dely2 + vy22;
+    vz2 += vz21 + b2*delz2 + vz22;
+
+    if (eflag) energy += rfactor * 
+		 ((ba_k1[type]*dr1*dtheta) + (ba_k2[type]*dr2*dtheta));
+
+    // apply force to each of 3 atoms
+
+    if (newton_bond || i1 < nlocal) {
+      f[i1][0] -= vx1;
+      f[i1][1] -= vy1;
+      f[i1][2] -= vz1;
+    }
+
+    if (newton_bond || i2 < nlocal) {
+      f[i2][0] += vx1 + vx2;
+      f[i2][1] += vy1 + vy2;
+      f[i2][2] += vz1 + vz2;
+    }
+
+    if (newton_bond || i3 < nlocal) {
+      f[i3][0] -= vx2;
+      f[i3][1] -= vy2;
+      f[i3][2] -= vz2;
+    }
+
+    // virial contribution
+
+    if (vflag) {
+      virial[0] -= rfactor * (delx1*vx1 + delx2*vx2);
+      virial[1] -= rfactor * (dely1*vy1 + dely2*vy2);
+      virial[2] -= rfactor * (delz1*vz1 + delz2*vz2);
+      virial[3] -= rfactor * (delx1*vy1 + delx2*vy2);
+      virial[4] -= rfactor * (delx1*vz1 + delx2*vz2);
+      virial[5] -= rfactor * (dely1*vz1 + dely2*vz2);
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AngleClass2::allocate()
+{
+  allocated = 1;
+  int n = atom->nangletypes;
+
+  theta0 = (double *) memory->smalloc((n+1)*sizeof(double),"angle:theta0");
+  k2 = (double *) memory->smalloc((n+1)*sizeof(double),"angle:k2");
+  k3 = (double *) memory->smalloc((n+1)*sizeof(double),"angle:k3");
+  k4 = (double *) memory->smalloc((n+1)*sizeof(double),"angle:k4");
+
+  bb_k = (double *) memory->smalloc((n+1)*sizeof(double),"angle:bb_k");
+  bb_r1 = (double *) memory->smalloc((n+1)*sizeof(double),"angle:bb_r1");
+  bb_r2 = (double *) memory->smalloc((n+1)*sizeof(double),"angle:bb_r2");
+
+  ba_k1 = (double *) memory->smalloc((n+1)*sizeof(double),"angle:ba_k1");
+  ba_k2 = (double *) memory->smalloc((n+1)*sizeof(double),"angle:ba_k2");
+  ba_r1 = (double *) memory->smalloc((n+1)*sizeof(double),"angle:ba_r1");
+  ba_r2 = (double *) memory->smalloc((n+1)*sizeof(double),"angle:ba_r2");
+
+  setflag = (int *) memory->smalloc((n+1)*sizeof(int),"angle:setflag");
+  setflag_a = (int *) memory->smalloc((n+1)*sizeof(int),"angle:setflag_a");
+  setflag_bb = (int *) memory->smalloc((n+1)*sizeof(int),"angle:setflag_bb");
+  setflag_ba = (int *) memory->smalloc((n+1)*sizeof(int),"angle:setflag_ba");
+  for (int i = 1; i <= n; i++)
+    setflag[i] = setflag_a[i] = setflag_bb[i] = setflag_ba[i] = 0;
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more types
+   which = 0 -> Angle coeffs
+   which = 1 -> BondBond coeffs
+   which = 2 -> BondAngle coeffs
+------------------------------------------------------------------------- */
+
+void AngleClass2::coeff(int which, int narg, char **arg)
+{
+  if (which < 0 || which > 2)
+    error->all("Invalid coeffs for this angle style");
+  if (!allocated) allocate();
+
+  int ilo,ihi;
+  force->bounds(arg[0],atom->nangletypes,ilo,ihi);
+
+  int count = 0;
+
+  if (which == 0) {
+    if (narg != 5) error->all("Incorrect args for angle coefficients");
+
+    double theta0_one = atof(arg[1]);
+    double k2_one = atof(arg[2]);
+    double k3_one = atof(arg[3]);
+    double k4_one = atof(arg[4]);
+    
+    // convert theta0 from degrees to radians
+
+    for (int i = ilo; i <= ihi; i++) {
+      theta0[i] = theta0_one/180.0 * PI;
+      k2[i] = k2_one;
+      k3[i] = k3_one;
+      k4[i] = k4_one;
+      setflag_a[i] = 1;
+      count++;
+    }
+  }
+
+  if (which == 1) {
+    if (narg != 4) error->all("Incorrect args for angle coefficients");
+
+    double bb_k_one = atof(arg[1]);
+    double bb_r1_one = atof(arg[2]);
+    double bb_r2_one = atof(arg[3]);
+    
+    for (int i = ilo; i <= ihi; i++) {
+      bb_k[i] = bb_k_one;
+      bb_r1[i] = bb_r1_one;
+      bb_r2[i] = bb_r2_one;
+      setflag_bb[i] = 1;
+      count++;
+    }
+  }
+
+  if (which == 2) {
+    if (narg != 5) error->all("Incorrect args for angle coefficients");
+
+    double ba_k1_one = atof(arg[1]);
+    double ba_k2_one = atof(arg[2]);
+    double ba_r1_one = atof(arg[3]);
+    double ba_r2_one = atof(arg[4]);
+    
+    for (int i = ilo; i <= ihi; i++) {
+      ba_k1[i] = ba_k1_one;
+      ba_k2[i] = ba_k2_one;
+      ba_r1[i] = ba_r1_one;
+      ba_r2[i] = ba_r2_one;
+      setflag_ba[i] = 1;
+      count++;
+    }
+  }
+
+  if (count == 0) error->all("Incorrect args for angle coefficients");
+
+  for (int i = ilo; i <= ihi; i++)
+    if (setflag_a[i] == 1 && setflag_bb[i] == 1 && setflag_ba[i] == 1)
+      setflag[i] = 1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+double AngleClass2::equilibrium_angle(int i)
+{
+  return theta0[i];
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes out coeffs to restart file
+------------------------------------------------------------------------- */
+
+void AngleClass2::write_restart(FILE *fp)
+{
+  fwrite(&theta0[1],sizeof(double),atom->nangletypes,fp);
+  fwrite(&k2[1],sizeof(double),atom->nangletypes,fp);
+  fwrite(&k3[1],sizeof(double),atom->nangletypes,fp);
+  fwrite(&k4[1],sizeof(double),atom->nangletypes,fp);
+
+  fwrite(&bb_k[1],sizeof(double),atom->nangletypes,fp);
+  fwrite(&bb_r1[1],sizeof(double),atom->nangletypes,fp);
+  fwrite(&bb_r2[1],sizeof(double),atom->nangletypes,fp);
+
+  fwrite(&ba_k1[1],sizeof(double),atom->nangletypes,fp);
+  fwrite(&ba_k2[1],sizeof(double),atom->nangletypes,fp);
+  fwrite(&ba_r1[1],sizeof(double),atom->nangletypes,fp);
+  fwrite(&ba_r2[1],sizeof(double),atom->nangletypes,fp);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads coeffs from restart file, bcasts them 
+------------------------------------------------------------------------- */
+
+void AngleClass2::read_restart(FILE *fp)
+{
+  allocate();
+
+  if (comm->me == 0) {
+    fread(&theta0[1],sizeof(double),atom->nangletypes,fp);
+    fread(&k2[1],sizeof(double),atom->nangletypes,fp);
+    fread(&k3[1],sizeof(double),atom->nangletypes,fp);
+    fread(&k4[1],sizeof(double),atom->nangletypes,fp);
+
+    fread(&bb_k[1],sizeof(double),atom->nangletypes,fp);
+    fread(&bb_r1[1],sizeof(double),atom->nangletypes,fp);
+    fread(&bb_r2[1],sizeof(double),atom->nangletypes,fp);
+    
+    fread(&ba_k1[1],sizeof(double),atom->nangletypes,fp);
+    fread(&ba_k2[1],sizeof(double),atom->nangletypes,fp);
+    fread(&ba_r1[1],sizeof(double),atom->nangletypes,fp);
+    fread(&ba_r2[1],sizeof(double),atom->nangletypes,fp);
+  }
+
+  MPI_Bcast(&theta0[1],atom->nangletypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&k2[1],atom->nangletypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&k3[1],atom->nangletypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&k4[1],atom->nangletypes,MPI_DOUBLE,0,world);
+
+  MPI_Bcast(&bb_k[1],atom->nangletypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&bb_r1[1],atom->nangletypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&bb_r2[1],atom->nangletypes,MPI_DOUBLE,0,world);
+
+  MPI_Bcast(&ba_k1[1],atom->nangletypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&ba_k2[1],atom->nangletypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&ba_r1[1],atom->nangletypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&ba_r2[1],atom->nangletypes,MPI_DOUBLE,0,world);
+
+  for (int i = 1; i <= atom->nangletypes; i++) setflag[i] = 1;
+}
--- a/src/CLASS2/angle_class2.h
+++ b/src/CLASS2/angle_class2.h
@ -0,0 +1,39 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef ANGLE_CLASS2_H
+#define ANGLE_CLASS2_H
+
+#include "stdio.h"
+#include "angle.h"
+
+class AngleClass2 : public Angle {
+ public:
+  AngleClass2() {}
+  ~AngleClass2();
+  void compute(int, int);
+  void coeff(int, int, char **);
+  double equilibrium_angle(int);
+  void write_restart(FILE *);
+  void read_restart(FILE *);
+
+ private:
+  double *theta0,*k2,*k3,*k4;
+  double *bb_k,*bb_r1,*bb_r2;
+  double *ba_k1,*ba_k2,*ba_r1,*ba_r2;
+  int *setflag_a,*setflag_bb,*setflag_ba;
+
+  void allocate();
+};
+
+#endif
--- a/src/CLASS2/bond_class2.cpp
+++ b/src/CLASS2/bond_class2.cpp
@ -0,0 +1,230 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Eric Simon (Cray)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdlib.h"
+#include "bond_class2.h"
+#include "atom.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "comm.h"
+#include "force.h"
+#include "memory.h"
+#include "error.h"
+
+/* ----------------------------------------------------------------------
+   free all arrays 
+------------------------------------------------------------------------- */
+
+BondClass2::~BondClass2()
+{
+  if (allocated) {
+    memory->sfree(setflag);
+    memory->sfree(r0);
+    memory->sfree(k2);
+    memory->sfree(k3);
+    memory->sfree(k4);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void BondClass2::compute(int eflag, int vflag)
+{
+  int i1,i2,n,type,factor;
+  double delx,dely,delz,rsq,r,dr,dr2,dr3,dr4,de_bond,fforce,rfactor;
+
+  energy = 0.0;
+  if (vflag) for (n = 0; n < 6; n++) virial[n] = 0.0;
+
+  double **x = atom->x;
+  double **f = atom->f;
+  int **bondlist = neighbor->bondlist;
+  int nbondlist = neighbor->nbondlist;
+  int nlocal = atom->nlocal;
+  int newton_bond = force->newton_bond;
+
+  for (n = 0; n < nbondlist; n++) {
+
+    i1 = bondlist[n][0];
+    i2 = bondlist[n][1];
+    type = bondlist[n][2];
+
+    if (newton_bond) factor = 2;
+    else {
+      factor = 0;
+      if (i1 < nlocal) factor++;
+      if (i2 < nlocal) factor++;
+    }
+    rfactor = 0.5 * factor;
+
+    delx = x[i1][0] - x[i2][0];
+    dely = x[i1][1] - x[i2][1];
+    delz = x[i1][2] - x[i2][2];
+    domain->minimum_image(&delx,&dely,&delz);
+
+    rsq = delx*delx + dely*dely + delz*delz;
+    r = sqrt(rsq);
+    dr = r - r0[type];
+    dr2 = dr*dr;
+    dr3 = dr2*dr;
+    dr4 = dr3*dr;
+
+    // force & energy
+
+    de_bond = 2.0*k2[type]*dr + 3.0*k3[type]*dr2 + 4.0*k4[type]*dr3;
+    if (r > 0.0) fforce = -de_bond/r;
+    else fforce = 0.0;
+
+    if (eflag) 
+      energy += rfactor * (k2[type]*dr2 + k3[type]*dr3 + k4[type]*dr4);
+
+    // apply force to each of 2 atoms
+
+    if (newton_bond || i1 < nlocal) {
+      f[i1][0] += delx*fforce;
+      f[i1][1] += dely*fforce;
+      f[i1][2] += delz*fforce;
+    }
+
+    if (newton_bond || i2 < nlocal) {
+      f[i2][0] -= delx*fforce;
+      f[i2][1] -= dely*fforce;
+      f[i2][2] -= delz*fforce;
+    }
+
+    // virial contribution
+
+    if (vflag) {
+      virial[0] += rfactor*delx*delx*fforce;
+      virial[1] += rfactor*dely*dely*fforce;
+      virial[2] += rfactor*delz*delz*fforce;
+      virial[3] += rfactor*delx*dely*fforce;
+      virial[4] += rfactor*delx*delz*fforce;
+      virial[5] += rfactor*dely*delz*fforce;
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void BondClass2::allocate()
+{
+  allocated = 1;
+  int n = atom->nbondtypes;
+
+  r0 = (double *) memory->smalloc((n+1)*sizeof(double),"bond:r0");
+  k2 = (double *) memory->smalloc((n+1)*sizeof(double),"bond:k2");
+  k3 = (double *) memory->smalloc((n+1)*sizeof(double),"bond:k3");
+  k4 = (double *) memory->smalloc((n+1)*sizeof(double),"bond:k4");
+
+  setflag = (int *) memory->smalloc((n+1)*sizeof(int),"bond:setflag");
+  for (int i = 1; i <= n; i++) setflag[i] = 0;
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs from one line in input script or data file 
+------------------------------------------------------------------------- */
+
+void BondClass2::coeff(int narg, char **arg)
+{
+  if (narg != 5) error->all("Incorrect args for bond coefficients");
+  if (!allocated) allocate();
+
+  int ilo,ihi;
+  force->bounds(arg[0],atom->nbondtypes,ilo,ihi);
+
+  double r0_one = atof(arg[1]);
+  double k2_one = atof(arg[2]);
+  double k3_one = atof(arg[3]);
+  double k4_one = atof(arg[4]);
+
+  int count = 0;
+  for (int i = ilo; i <= ihi; i++) {
+    r0[i] = r0_one;
+    k2[i] = k2_one;
+    k3[i] = k3_one;
+    k4[i] = k4_one;
+    setflag[i] = 1;
+    count++;
+  }
+
+  if (count == 0) error->all("Incorrect args for bond coefficients");
+}
+
+/* ----------------------------------------------------------------------
+   return an equilbrium bond length 
+------------------------------------------------------------------------- */
+
+double BondClass2::equilibrium_distance(int i)
+{
+  return r0[i];
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes out coeffs to restart file 
+------------------------------------------------------------------------- */
+
+void BondClass2::write_restart(FILE *fp)
+{
+  fwrite(&r0[1],sizeof(double),atom->nbondtypes,fp);
+  fwrite(&k2[1],sizeof(double),atom->nbondtypes,fp);
+  fwrite(&k3[1],sizeof(double),atom->nbondtypes,fp);
+  fwrite(&k4[1],sizeof(double),atom->nbondtypes,fp);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads coeffs from restart file, bcasts them 
+------------------------------------------------------------------------- */
+
+void BondClass2::read_restart(FILE *fp)
+{
+  allocate();
+
+  if (comm->me == 0) {
+    fread(&r0[1],sizeof(double),atom->nbondtypes,fp);
+    fread(&k2[1],sizeof(double),atom->nbondtypes,fp);
+    fread(&k3[1],sizeof(double),atom->nbondtypes,fp);
+    fread(&k4[1],sizeof(double),atom->nbondtypes,fp);
+  }
+  MPI_Bcast(&r0[1],atom->nbondtypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&k2[1],atom->nbondtypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&k3[1],atom->nbondtypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&k4[1],atom->nbondtypes,MPI_DOUBLE,0,world);
+
+  for (int i = 1; i <= atom->nbondtypes; i++) setflag[i] = 1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void BondClass2::single(int type, double rsq, int i, int j, double rfactor,
+			int eflag, double &fforce, double &eng)
+{
+  double r = sqrt(rsq);
+  double dr = r - r0[type];
+  double dr2 = dr*dr;
+  double dr3 = dr2*dr;
+  double dr4 = dr3*dr;
+
+  // force & energy
+  
+  double de_bond = 2.0*k2[type]*dr + 3.0*k3[type]*dr2 + 4.0*k4[type]*dr3;
+  if (r > 0.0) fforce = -de_bond/r;
+  else fforce = 0.0;
+
+  if (eflag) eng = rfactor * (k2[type]*dr2 + k3[type]*dr3 + k4[type]*dr4);
+}
--- a/src/CLASS2/bond_class2.h
+++ b/src/CLASS2/bond_class2.h
@ -0,0 +1,37 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef BOND_CLASS2_H
+#define BOND_CLASS2_H
+
+#include "stdio.h"
+#include "bond.h"
+
+class BondClass2 : public Bond {
+ public:
+  BondClass2() {}
+  ~BondClass2();
+  void compute(int, int);
+  void coeff(int, char **);
+  double equilibrium_distance(int);
+  void write_restart(FILE *);
+  void read_restart(FILE *);
+  void single(int, double, int, int, double, int, double &, double &);
+
+ private:
+  double *r0,*k2,*k3,*k4;
+
+  void allocate();
+};
+
+#endif
--- a/src/CLASS2/dihedral_class2.cpp
+++ b/src/CLASS2/dihedral_class2.cpp
@ -0,0 +1,977 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Eric Simon (Cray)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdlib.h"
+#include "dihedral_class2.h"
+#include "atom.h"
+#include "neighbor.h"
+#include "update.h"
+#include "domain.h"
+#include "comm.h"
+#include "force.h"
+#include "memory.h"
+#include "error.h"
+
+#define MIN(A,B) ((A) < (B)) ? (A) : (B)
+#define MAX(A,B) ((A) > (B)) ? (A) : (B)
+
+#define TOLERANCE 0.05
+#define SMALL     0.0000001
+
+/* ----------------------------------------------------------------------
+   set all global defaults 
+------------------------------------------------------------------------- */
+
+DihedralClass2::DihedralClass2()
+{
+  PI = 4.0*atan(1.0);
+}
+
+/* ----------------------------------------------------------------------
+   free all arrays 
+------------------------------------------------------------------------- */
+
+DihedralClass2::~DihedralClass2()
+{
+  if (allocated) {
+    memory->sfree(setflag);
+    memory->sfree(setflag_d);
+    memory->sfree(setflag_mbt);
+    memory->sfree(setflag_ebt);
+    memory->sfree(setflag_at);
+    memory->sfree(setflag_aat);
+    memory->sfree(setflag_bb13t);
+
+    memory->sfree(k1);
+    memory->sfree(k2);
+    memory->sfree(k3);
+    memory->sfree(phi1);
+    memory->sfree(phi2);
+    memory->sfree(phi3);
+
+    memory->sfree(mbt_f1);
+    memory->sfree(mbt_f2);
+    memory->sfree(mbt_f3);
+    memory->sfree(mbt_r0);
+
+    memory->sfree(ebt_f1_1);
+    memory->sfree(ebt_f2_1);
+    memory->sfree(ebt_f3_1);
+    memory->sfree(ebt_r0_1);
+
+    memory->sfree(ebt_f1_2);
+    memory->sfree(ebt_f2_2);
+    memory->sfree(ebt_f3_2);
+    memory->sfree(ebt_r0_2);
+
+    memory->sfree(at_f1_1);
+    memory->sfree(at_f2_1);
+    memory->sfree(at_f3_1);
+    memory->sfree(at_theta0_1);
+
+    memory->sfree(at_f1_2);
+    memory->sfree(at_f2_2);
+    memory->sfree(at_f3_2);
+    memory->sfree(at_theta0_2);
+
+    memory->sfree(aat_k);
+    memory->sfree(aat_theta0_1);
+    memory->sfree(aat_theta0_2);
+
+    memory->sfree(bb13t_k);
+    memory->sfree(bb13t_r10);
+    memory->sfree(bb13t_r30);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void DihedralClass2::compute(int eflag, int vflag)
+{
+  int i1,i2,i3,i4,i,j,k,n,type,factor;
+  double rfactor;
+  double delx1,dely1,delz1,dely2,delz2,delx2m,dely2m,delz2m;
+  double delx2,dely3,delz3,r1mag2,r1,r2mag2,r2,r3mag2,r3;
+  double sb1,rb1,sb2,rb2,sb3,rb3,c0,r12c1;
+  double r12c2,costh12,costh13,costh23,sc1,sc2,s1,s2,c;
+  double cosphi,phi,sinphi,a11,a22,a33,a12,a13,a23,sx1,sx2;
+  double sx12,sy1,sy2,sy12,sz1,sz2,sz12,dphi1,dphi2,dphi3;
+  double de_dihedral,t1,t2,t3,t4,cos2phi,cos3phi,bt1,bt2;
+  double bt3,sumbte,db,sumbtf,at1,at2,at3,da,da1,da2,r1_0;
+  double r3_0,dr1,dr2,tk1,tk2,vx1,vx2,vx3,vy1,vy2,vy3,vz1;
+  double vz2,vz3,delx3,s12,sin2;
+  double dcosphidr[4][3],dphidr[4][3],dbonddr[3][4][3],dthetadr[2][4][3];
+  double fabcd[4][3];
+
+  energy = 0.0;
+  if (vflag) for (n = 0; n < 6; n++) virial[n] = 0.0;
+
+  double **x = atom->x;
+  double **f = atom->f;
+  int **dihedrallist = neighbor->dihedrallist;
+  int ndihedrallist = neighbor->ndihedrallist;
+  int nlocal = atom->nlocal;
+  int newton_bond = force->newton_bond;
+
+  for (n = 0; n < ndihedrallist; n++) {
+
+    i1 = dihedrallist[n][0];
+    i2 = dihedrallist[n][1];
+    i3 = dihedrallist[n][2];
+    i4 = dihedrallist[n][3];
+    type = dihedrallist[n][4];
+
+    if (newton_bond) factor = 4;
+    else {
+      factor = 0;
+      if (i1 < nlocal) factor++;
+      if (i2 < nlocal) factor++;
+      if (i3 < nlocal) factor++;
+      if (i4 < nlocal) factor++;
+      }
+    rfactor = 0.25 * factor;
+
+    // 1st bond
+
+    delx1 = x[i1][0] - x[i2][0];
+    dely1 = x[i1][1] - x[i2][1];
+    delz1 = x[i1][2] - x[i2][2];
+    domain->minimum_image(&delx1,&dely1,&delz1);
+
+    // 2nd bond
+
+    delx2 = x[i3][0] - x[i2][0];
+    dely2 = x[i3][1] - x[i2][1];
+    delz2 = x[i3][2] - x[i2][2];
+    domain->minimum_image(&delx2,&dely2,&delz2);
+
+    delx2m = -delx2;
+    dely2m = -dely2;
+    delz2m = -delz2;
+    domain->minimum_image(&delx2m,&dely2m,&delz2m);
+
+    // 3rd bond
+
+    delx3 = x[i4][0] - x[i3][0];
+    dely3 = x[i4][1] - x[i3][1];
+    delz3 = x[i4][2] - x[i3][2];
+    domain->minimum_image(&delx3,&dely3,&delz3);
+
+    // distances
+
+    r1mag2 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    r1 = sqrt(r1mag2);
+    r2mag2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+    r2 = sqrt(r2mag2);
+    r3mag2 = delx3*delx3 + dely3*dely3 + delz3*delz3;
+    r3 = sqrt(r3mag2);
+
+    sb1 = 1.0/r1mag2;
+    rb1 = 1.0/r1;
+          
+    sb2 = 1.0/r2mag2;
+    rb2 = 1.0/r2;
+
+    sb3 = 1.0/r3mag2;
+    rb3 = 1.0/r3;
+
+    c0 = (delx1*delx3 + dely1*dely3 + delz1*delz3) * rb1*rb3;
+
+    // angles
+
+    r12c1 = rb1*rb2;
+    r12c2 = rb2*rb3;
+    costh12 = (delx1*delx2 + dely1*dely2 + delz1*delz2) * r12c1;
+    costh13 = c0;
+    costh23 = (delx2m*delx3 + dely2m*dely3 + delz2m*delz3) * r12c2;
+          
+    // cos and sin of 2 angles and final c
+
+    sin2 = MAX(1.0 - costh12*costh12,0.0);
+    sc1 = sqrt(sin2);
+    if (sc1 < SMALL) sc1 = SMALL;
+    sc1 = 1.0/sc1;
+          
+    sin2 = MAX(1.0 - costh23*costh23,0.0);
+    sc2 = sqrt(sin2);
+    if (sc2 < SMALL) sc2 = SMALL;
+    sc2 = 1.0/sc2;
+          
+    s1 = sc1 * sc1;
+    s2 = sc2 * sc2;
+    s12 = sc1 * sc2;
+    c = (c0 + costh12*costh23) * s12;
+
+    // error check
+
+    if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) {
+      int me;
+      MPI_Comm_rank(world,&me);
+      if (screen) {
+	fprintf(screen,"Dihedral problem: %d %d %d %d %d %d\n",
+		me,update->ntimestep,
+		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
+	fprintf(screen,"  1st atom: %d %g %g %g\n",
+		me,x[i1][0],x[i1][1],x[i1][2]);
+	fprintf(screen,"  2nd atom: %d %g %g %g\n",
+		me,x[i2][0],x[i2][1],x[i2][2]);
+	fprintf(screen,"  3rd atom: %d %g %g %g\n",
+		me,x[i3][0],x[i3][1],x[i3][2]);
+	fprintf(screen,"  4th atom: %d %g %g %g\n",
+		me,x[i4][0],x[i4][1],x[i4][2]);
+      }
+    }
+
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+    cosphi = c;
+    phi = acos(c);
+
+    sinphi = sqrt(1.0 - c*c);
+    sinphi = MAX(sinphi,SMALL);
+
+    a11 = -c*sb1*s1;
+    a22 = sb2 * (2.0*costh13*s12 - c*(s1+s2));
+    a33 = -c*sb3*s2;
+    a12 = r12c1 * (costh12*c*s1 + costh23*s12);
+    a13 = rb1*rb3*s12;
+    a23 = r12c2 * (-costh23*c*s2 - costh12*s12);
+          
+    sx1  = a11*delx1 + a12*delx2 + a13*delx3;
+    sx2  = a12*delx1 + a22*delx2 + a23*delx3;
+    sx12 = a13*delx1 + a23*delx2 + a33*delx3;
+    sy1  = a11*dely1 + a12*dely2 + a13*dely3;
+    sy2  = a12*dely1 + a22*dely2 + a23*dely3;
+    sy12 = a13*dely1 + a23*dely2 + a33*dely3;
+    sz1  = a11*delz1 + a12*delz2 + a13*delz3;
+    sz2  = a12*delz1 + a22*delz2 + a23*delz3;
+    sz12 = a13*delz1 + a23*delz2 + a33*delz3;
+
+    // set up d(cos(phi))/d(r) and dphi/dr arrays
+
+    dcosphidr[0][0] = -sx1;
+    dcosphidr[0][1] = -sy1;
+    dcosphidr[0][2] = -sz1;
+    dcosphidr[1][0] = sx2 + sx1;
+    dcosphidr[1][1] = sy2 + sy1;
+    dcosphidr[1][2] = sz2 + sz1;
+    dcosphidr[2][0] = sx12 - sx2;
+    dcosphidr[2][1] = sy12 - sy2;
+    dcosphidr[2][2] = sz12 - sz2;
+    dcosphidr[3][0] = -sx12;
+    dcosphidr[3][1] = -sy12;
+    dcosphidr[3][2] = -sz12;
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++)
+	dphidr[i][j] = -dcosphidr[i][j] / sinphi;
+
+    // energy
+
+    dphi1 = phi - phi1[type];
+    dphi2 = 2.0*phi - phi2[type];
+    dphi3 = 3.0*phi - phi3[type];
+    
+    if (eflag) energy += rfactor * (k1[type]*(1.0 - cos(dphi1)) +
+				    k2[type]*(1.0 - cos(dphi2)) +
+				    k3[type]*(1.0 - cos(dphi3)));
+          
+    de_dihedral = k1[type]*sin(dphi1) + 2.0*k2[type]*sin(dphi2) +
+      3.0*k3[type]*sin(dphi3);
+
+    // torsion forces on all 4 atoms
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++)
+	fabcd[i][j] = de_dihedral*dphidr[i][j];
+
+    // set up d(bond)/d(r) array
+    // dbonddr(i,j,k) = bond i, atom j, coordinate k
+
+    for (i = 0; i < 3; i++)
+      for (j = 0; j < 4; j++)
+	for (k = 0; k < 3; k++)
+	  dbonddr[i][j][k] = 0.0;
+    
+    // bond1
+    
+    dbonddr[0][0][0] = delx1 / r1;
+    dbonddr[0][0][1] = dely1 / r1;
+    dbonddr[0][0][2] = delz1 / r1;
+    dbonddr[0][1][0] = -delx1 / r1;
+    dbonddr[0][1][1] = -dely1 / r1;
+    dbonddr[0][1][2] = -delz1 / r1;
+
+    // bond2
+
+    dbonddr[1][1][0] = delx2 / r2;
+    dbonddr[1][1][1] = dely2 / r2;
+    dbonddr[1][1][2] = delz2 / r2;
+    dbonddr[1][2][0] = -delx2 / r2;
+    dbonddr[1][2][1] = -dely2 / r2;
+    dbonddr[1][2][2] = -delz2 / r2;
+
+    // bond3
+    
+    dbonddr[2][2][0] = delx3 / r3;
+    dbonddr[2][2][1] = dely3 / r3;
+    dbonddr[2][2][2] = delz3 / r3;
+    dbonddr[2][3][0] = -delx3 / r3;
+    dbonddr[2][3][1] = -dely3 / r3;
+    dbonddr[2][3][2] = -delz3 / r3;
+
+    // set up d(theta)/d(r) array
+    // dthetadr(i,j,k) = angle i, atom j, coordinate k
+
+    for (i = 0; i < 2; i++)
+      for (j = 0; j < 4; j++)
+	for (k = 0; k < 3; k++)
+	  dthetadr[i][j][k] = 0.0;
+    
+    t1 = costh12 / r1mag2;
+    t2 = costh23 / r2mag2;
+    t3 = costh12 / r2mag2;
+    t4 = costh23 / r3mag2;
+    
+    // angle12
+    
+    dthetadr[0][0][0] = sc1 * ((t1 * delx1) - (delx2 * r12c1));
+    dthetadr[0][0][1] = sc1 * ((t1 * dely1) - (dely2 * r12c1));
+    dthetadr[0][0][2] = sc1 * ((t1 * delz1) - (delz2 * r12c1));
+    
+    dthetadr[0][1][0] = sc1 * ((-t1 * delx1) + (delx2 * r12c1) +
+			       (-t3 * delx2) + (delx1 * r12c1));
+    dthetadr[0][1][1] = sc1 * ((-t1 * dely1) + (dely2 * r12c1) +
+			       (-t3 * dely2) + (dely1 * r12c1));
+    dthetadr[0][1][2] = sc1 * ((-t1 * delz1) + (delz2 * r12c1) +
+			       (-t3 * delz2) + (delz1 * r12c1));
+    
+    dthetadr[0][2][0] = sc1 * ((t3 * delx2) - (delx1 * r12c1)); 
+    dthetadr[0][2][1] = sc1 * ((t3 * dely2) - (dely1 * r12c1));
+    dthetadr[0][2][2] = sc1 * ((t3 * delz2) - (delz1 * r12c1));
+    
+    // angle23
+    
+    dthetadr[1][1][0] = sc2 * ((t2 * delx2) + (delx3 * r12c2));
+    dthetadr[1][1][1] = sc2 * ((t2 * dely2) + (dely3 * r12c2));
+    dthetadr[1][1][2] = sc2 * ((t2 * delz2) + (delz3 * r12c2));
+    
+    dthetadr[1][2][0] = sc2 * ((-t2 * delx2) - (delx3 * r12c2) +
+			       (t4 * delx3) + (delx2 * r12c2));
+    dthetadr[1][2][1] = sc2 * ((-t2 * dely2) - (dely3 * r12c2) +
+			       (t4 * dely3) + (dely2 * r12c2));
+    dthetadr[1][2][2] = sc2 * ((-t2 * delz2) - (delz3 * r12c2) +
+			       (t4 * delz3) + (delz2 * r12c2));
+    
+    dthetadr[1][3][0] = -sc2 * ((t4 * delx3) + (delx2 * r12c2));
+    dthetadr[1][3][1] = -sc2 * ((t4 * dely3) + (dely2 * r12c2));
+    dthetadr[1][3][2] = -sc2 * ((t4 * delz3) + (delz2 * r12c2));
+    
+    // mid-bond/torsion coupling
+    // energy on bond2 (middle bond)
+    
+    cos2phi = cos(2.0*phi);
+    cos3phi = cos(3.0*phi);
+    
+    bt1 = mbt_f1[type] * cosphi;
+    bt2 = mbt_f2[type] * cos2phi;
+    bt3 = mbt_f3[type] * cos3phi;
+    sumbte = bt1 + bt2 + bt3;
+    db = r2 - mbt_r0[type];
+    if (eflag) energy += rfactor * db * sumbte;
+    
+    // force on bond2
+    
+    bt1 = -mbt_f1[type] * sinphi;
+    bt2 = -2.0 * mbt_f2[type] * sin(2.0*phi);
+    bt3 = -3.0 * mbt_f3[type] * sin(3.0*phi);
+    sumbtf = bt1 + bt2 + bt3;
+    
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++)
+	fabcd[i][j] += db*sumbtf*dphidr[i][j] + sumbte*dbonddr[1][i][j];
+
+    // end-bond/torsion coupling
+    // energy on bond1 (first bond)
+
+    bt1 = ebt_f1_1[type] * cosphi;
+    bt2 = ebt_f2_1[type] * cos2phi;
+    bt3 = ebt_f3_1[type] * cos3phi;
+    sumbte = bt1 + bt2 + bt3;
+
+    db = r1 - ebt_r0_1[type];
+    if (eflag) energy += rfactor * db * (bt1+bt2+bt3);
+
+    // force on bond1
+
+    bt1 = ebt_f1_1[type] * sinphi;
+    bt2 = 2.0 * ebt_f2_1[type] * sin(2.0*phi);
+    bt3 = 3.0 * ebt_f3_1[type] * sin(3.0*phi);
+    sumbtf = bt1 + bt2 + bt3;
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++)
+	fabcd[i][j] -= db*sumbtf*dphidr[i][j] + sumbte*dbonddr[0][i][j];
+
+    // end-bond/torsion coupling
+    // energy on bond3 (last bond)
+
+    bt1 = ebt_f1_2[type] * cosphi;
+    bt2 = ebt_f2_2[type] * cos2phi;
+    bt3 = ebt_f3_2[type] * cos3phi;
+    sumbte = bt1 + bt2 + bt3;
+
+    db = r3 - ebt_r0_2[type];
+    if (eflag) energy += rfactor * db * (bt1+bt2+bt3);
+
+    // force on bond3
+
+    bt1 = -ebt_f1_2[type] * sinphi;
+    bt2 = -2.0 * ebt_f2_2[type] * sin(2.0*phi);
+    bt3 = -3.0 * ebt_f3_2[type] * sin(3.0*phi);
+    sumbtf = bt1 + bt2 + bt3;
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++)
+	fabcd[i][j] += db*sumbtf*dphidr[i][j] + sumbte*dbonddr[2][i][j];
+
+    // angle/torsion coupling
+    // energy on angle1
+
+    at1 = at_f1_1[type] * cosphi;
+    at2 = at_f2_1[type] * cos2phi;
+    at3 = at_f3_1[type] * cos3phi;
+    sumbte = at1 + at2 + at3;
+
+    da = acos(costh12) - at_theta0_1[type];
+    if (eflag) energy += rfactor * da * (at1+at2+at3);
+
+    // force on angle1
+
+    bt1 = at_f1_1[type] * sinphi;
+    bt2 = 2.0 * at_f2_1[type] * sin(2.0*phi);
+    bt3 = 3.0 * at_f3_1[type] * sin(3.0*phi);
+    sumbtf = bt1 + bt2 + bt3;
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++)
+	fabcd[i][j] -= da*sumbtf*dphidr[i][j] + sumbte*dthetadr[0][i][j];
+
+    // energy on angle2
+
+    at1 = at_f1_2[type] * cosphi;
+    at2 = at_f2_2[type] * cos2phi;
+    at3 = at_f3_2[type] * cos3phi;
+    sumbte = at1 + at2 + at3;
+
+    da = acos(costh23) - at_theta0_2[type];
+    if (eflag) energy += rfactor *da * (at1+at2+at3);
+
+    // force on angle2
+
+    bt1 = -at_f1_2[type] * sinphi;
+    bt2 = -2.0 * at_f2_2[type] * sin(2.0*phi);
+    bt3 = -3.0 * at_f3_2[type] * sin(3.0*phi);
+    sumbtf = bt1 + bt2 + bt3;
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++)
+	fabcd[i][j] += da*sumbtf*dphidr[i][j] + sumbte*dthetadr[1][i][j];
+
+    // angle/angle/torsion coupling
+
+    da1 = acos(costh12) - aat_theta0_1[type];
+    da2 = acos(costh23) - aat_theta0_2[type];
+          
+    // energy
+
+    if (eflag) energy += rfactor * aat_k[type]*da1*da2*cosphi;
+
+    // force
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++)
+	fabcd[i][j] -= aat_k[type] * 
+	  (cosphi * (da2*dthetadr[0][i][j] - da1*dthetadr[1][i][j]) +
+	   sinphi * da1*da2*dphidr[i][j]);
+
+    // bond1/bond3 coupling
+
+    if (fabs(bb13t_k[type]) > SMALL) {
+
+      r1_0 = bb13t_r10[type];
+      r3_0 = bb13t_r30[type];
+      dr1 = r1 - r1_0;
+      dr2 = r3 - r3_0;
+      tk1 = -bb13t_k[type] * dr1 / r3;
+      tk2 = -bb13t_k[type] * dr2 / r1;
+
+      if (eflag) energy += rfactor * bb13t_k[type]*dr1*dr2;
+        
+      fabcd[0][0] += tk2 * delx1;
+      fabcd[0][1] += tk2 * dely1;
+      fabcd[0][2] += tk2 * delz1;
+
+      fabcd[1][0] -= tk2 * delx1;
+      fabcd[1][1] -= tk2 * dely1;
+      fabcd[1][2] -= tk2 * delz1;
+        
+      fabcd[2][0] -= tk1 * delx3;
+      fabcd[2][1] -= tk1 * dely3;
+      fabcd[2][2] -= tk1 * delz3;
+
+      fabcd[3][0] += tk1 * delx3;
+      fabcd[3][1] += tk1 * dely3;
+      fabcd[3][2] += tk1 * delz3;
+    }
+
+    // apply force to each of 4 atoms
+
+    if (newton_bond || i1 < nlocal) {
+      f[i1][0] += fabcd[0][0];
+      f[i1][1] += fabcd[0][1];
+      f[i1][2] += fabcd[0][2];
+    }
+
+    if (newton_bond || i2 < nlocal) {
+      f[i2][0] += fabcd[1][0];
+      f[i2][1] += fabcd[1][1];
+      f[i2][2] += fabcd[1][2];
+    }
+
+    if (newton_bond || i3 < nlocal) {
+      f[i3][0] += fabcd[2][0];
+      f[i3][1] += fabcd[2][1];
+      f[i3][2] += fabcd[2][2];
+    }
+
+    if (newton_bond || i4 < nlocal) {
+      f[i4][0] += fabcd[3][0];
+      f[i4][1] += fabcd[3][1];
+      f[i4][2] += fabcd[3][2];
+    }
+
+    // virial contribution
+
+    if (vflag) {
+      vx1 = fabcd[0][0];
+      vx2 = fabcd[2][0] + fabcd[3][0];
+      vx3 = fabcd[3][0];
+
+      vy1 = fabcd[0][1];
+      vy2 = fabcd[2][1] + fabcd[3][1];
+      vy3 = fabcd[3][1];
+
+      vz1 = fabcd[0][2];
+      vz2 = fabcd[2][2] + fabcd[3][2];
+      vz3 = fabcd[3][2];
+
+      virial[0] += rfactor * (delx1*vx1 + delx2*vx2 + delx3*vx3);
+      virial[1] += rfactor * (dely1*vy1 + dely2*vy2 + dely3*vy3);
+      virial[2] += rfactor * (delz1*vz1 + delz2*vz2 + delz3*vz3);
+      virial[3] += rfactor * (delx1*vy1 + delx2*vy2 + delx3*vy3);
+      virial[4] += rfactor * (delx1*vz1 + delx2*vz2 + delx3*vz3);
+      virial[5] += rfactor * (dely1*vz1 + dely2*vz2 + dely3*vz3);
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void DihedralClass2::allocate()
+{
+  allocated = 1;
+  int n = atom->ndihedraltypes;
+
+  k1 = (double *) memory->smalloc((n+1)*sizeof(double),"dihedral:k1");
+  k2 = (double *) memory->smalloc((n+1)*sizeof(double),"dihedral:k2");
+  k3 = (double *) memory->smalloc((n+1)*sizeof(double),"dihedral:k3");
+  phi1 = (double *) memory->smalloc((n+1)*sizeof(double),"dihedral:phi1");
+  phi2 = (double *) memory->smalloc((n+1)*sizeof(double),"dihedral:phi2");
+  phi3 = (double *) memory->smalloc((n+1)*sizeof(double),"dihedral:phi3");
+
+  mbt_f1 = (double *) memory->smalloc((n+1)*sizeof(double),"dihedral:mbt_f1");
+  mbt_f2 = (double *) memory->smalloc((n+1)*sizeof(double),"dihedral:mbt_f2");
+  mbt_f3 = (double *) memory->smalloc((n+1)*sizeof(double),"dihedral:mbt_f3");
+  mbt_r0 = (double *) memory->smalloc((n+1)*sizeof(double),"dihedral:mbt_r0");
+
+  ebt_f1_1 = (double *) 
+    memory->smalloc((n+1)*sizeof(double),"dihedral:ebt_f1_1");
+  ebt_f2_1 = (double *) 
+    memory->smalloc((n+1)*sizeof(double),"dihedral:ebt_f2_1");
+  ebt_f3_1 = (double *) 
+    memory->smalloc((n+1)*sizeof(double),"dihedral:ebt_f3_1");
+  ebt_r0_1 = (double *) 
+    memory->smalloc((n+1)*sizeof(double),"dihedral:ebt_r0_1");
+
+  ebt_f1_2 = (double *) 
+    memory->smalloc((n+1)*sizeof(double),"dihedral:ebt_f1_2");
+  ebt_f2_2 = (double *) 
+    memory->smalloc((n+1)*sizeof(double),"dihedral:ebt_f2_2");
+  ebt_f3_2 = (double *) 
+    memory->smalloc((n+1)*sizeof(double),"dihedral:ebt_f3_2");
+  ebt_r0_2 = (double *) 
+    memory->smalloc((n+1)*sizeof(double),"dihedral:ebt_r0_2");
+
+  at_f1_1 = (double *) 
+    memory->smalloc((n+1)*sizeof(double),"dihedral:at_f1_1");
+  at_f2_1 = (double *) 
+    memory->smalloc((n+1)*sizeof(double),"dihedral:at_f2_1");
+  at_f3_1 = (double *) 
+    memory->smalloc((n+1)*sizeof(double),"dihedral:at_f3_1");
+  at_theta0_1 = (double *) 
+    memory->smalloc((n+1)*sizeof(double),"dihedral:at_theta0_1");
+
+  at_f1_2 = (double *) 
+    memory->smalloc((n+1)*sizeof(double),"dihedral:at_f1_2");
+  at_f2_2 = (double *) 
+    memory->smalloc((n+1)*sizeof(double),"dihedral:at_f2_2");
+  at_f3_2 = (double *) 
+    memory->smalloc((n+1)*sizeof(double),"dihedral:at_f3_2");
+  at_theta0_2 = (double *) 
+    memory->smalloc((n+1)*sizeof(double),"dihedral:at_theta0_2");
+
+  aat_k = (double *) memory->smalloc((n+1)*sizeof(double),"dihedral:aat_k");
+  aat_theta0_1 = (double *) 
+    memory->smalloc((n+1)*sizeof(double),"dihedral:aat_theta0_1");
+  aat_theta0_2 = (double *) 
+    memory->smalloc((n+1)*sizeof(double),"dihedral:aat_theta0_2");
+
+  bb13t_k = (double *) 
+    memory->smalloc((n+1)*sizeof(double),"dihedral:bb13t_k");
+  bb13t_r10 = (double *) 
+    memory->smalloc((n+1)*sizeof(double),"dihedral:bb13t_r10");
+  bb13t_r30 = (double *) 
+    memory->smalloc((n+1)*sizeof(double),"dihedral:bb13t_r30");
+
+  setflag = (int *) memory->smalloc((n+1)*sizeof(int),"dihedral:setflag");
+  setflag_d = (int *) 
+    memory->smalloc((n+1)*sizeof(int),"dihedral:setflag_d");
+  setflag_mbt = (int *) 
+    memory->smalloc((n+1)*sizeof(int),"dihedral:setflag_mbt");
+  setflag_ebt = (int *) 
+    memory->smalloc((n+1)*sizeof(int),"dihedral:setflag_ebt");
+  setflag_at = (int *) 
+    memory->smalloc((n+1)*sizeof(int),"dihedral:setflag_at");
+  setflag_aat = (int *) 
+    memory->smalloc((n+1)*sizeof(int),"dihedral:setflag_aat");
+  setflag_bb13t = (int *) 
+    memory->smalloc((n+1)*sizeof(int),"dihedral:setflag_bb13t");
+  for (int i = 1; i <= n; i++)
+    setflag[i] = setflag_d[i] = setflag_mbt[i] = setflag_ebt[i] = 
+      setflag_at[i] = setflag_aat[i] = setflag_bb13t[i] = 0;
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more types
+   which = 0 -> Dihedral coeffs
+   which = 1 -> MiddleBondTorsion coeffs
+   which = 2 -> EndBondTorsion coeffs
+   which = 3 -> AngleTorsion coeffs
+   which = 4 -> AngleAngleTorsion coeffs
+   which = 5 -> BondBond13Torsion coeffs
+------------------------------------------------------------------------- */
+
+void DihedralClass2::coeff(int which, int narg, char **arg)
+{
+  if (which < 0 || which > 5)
+    error->all("Invalid coeffs for this dihedral style");
+  if (!allocated) allocate();
+
+  int ilo,ihi;
+  force->bounds(arg[0],atom->ndihedraltypes,ilo,ihi);
+
+  int count = 0;
+
+  if (which == 0) {
+    if (narg != 7) error->all("Incorrect args for dihedral coefficients");
+
+    double k1_one = atof(arg[1]);
+    double phi1_one = atof(arg[2]);
+    double k2_one = atof(arg[3]);
+    double phi2_one = atof(arg[4]);
+    double k3_one = atof(arg[5]);
+    double phi3_one = atof(arg[6]);
+    
+    // convert phi's from degrees to radians
+
+    for (int i = ilo; i <= ihi; i++) {
+      k1[i] = k1_one;
+      phi1[i] = phi1_one/180.0 * PI;
+      k2[i] = k2_one;
+      phi2[i] = phi2_one/180.0 * PI;
+      k3[i] = k3_one;
+      phi3[i] = phi3_one/180.0 * PI;
+      setflag_d[i] = 1;
+      count++;
+    }
+  }
+
+  if (which == 1) {
+    if (narg != 5) error->all("Incorrect args for dihedral coefficients");
+
+    double f1_one = atof(arg[1]);
+    double f2_one = atof(arg[2]);
+    double f3_one = atof(arg[3]);
+    double r0_one = atof(arg[4]);
+    
+    for (int i = ilo; i <= ihi; i++) {
+      mbt_f1[i] = f1_one;
+      mbt_f2[i] = f2_one;
+      mbt_f3[i] = f3_one;
+      mbt_r0[i] = r0_one;
+      setflag_mbt[i] = 1;
+      count++;
+    }
+  }
+
+  if (which == 2) {
+    if (narg != 9) error->all("Incorrect args for dihedral coefficients");
+
+    double f1_1_one = atof(arg[1]);
+    double f2_1_one = atof(arg[2]);
+    double f3_1_one = atof(arg[3]);
+    double f1_2_one = atof(arg[4]);
+    double f2_2_one = atof(arg[5]);
+    double f3_2_one = atof(arg[6]);
+    double r0_1_one = atof(arg[7]);
+    double r0_2_one = atof(arg[8]);
+    
+    for (int i = ilo; i <= ihi; i++) {
+      ebt_f1_1[i] = f1_1_one;
+      ebt_f2_1[i] = f2_1_one;
+      ebt_f3_1[i] = f3_1_one;
+      ebt_f1_2[i] = f1_2_one;
+      ebt_f2_2[i] = f2_2_one;
+      ebt_f3_2[i] = f3_2_one;
+      ebt_r0_1[i] = r0_1_one;
+      ebt_r0_2[i] = r0_2_one;
+      setflag_ebt[i] = 1;
+      count++;
+    }
+  }
+
+  if (which == 3) {
+    if (narg != 9) error->all("Incorrect args for dihedral coefficients");
+
+    double f1_1_one = atof(arg[1]);
+    double f2_1_one = atof(arg[2]);
+    double f3_1_one = atof(arg[3]);
+    double f1_2_one = atof(arg[4]);
+    double f2_2_one = atof(arg[5]);
+    double f3_2_one = atof(arg[6]);
+    double theta0_1_one = atof(arg[7]);
+    double theta0_2_one = atof(arg[8]);
+
+    // convert theta0's from degrees to radians
+    
+    for (int i = ilo; i <= ihi; i++) {
+      at_f1_1[i] = f1_1_one;
+      at_f2_1[i] = f2_1_one;
+      at_f3_1[i] = f3_1_one;
+      at_f1_2[i] = f1_2_one;
+      at_f2_2[i] = f2_2_one;
+      at_f3_2[i] = f3_2_one;
+      at_theta0_1[i] = theta0_1_one/180.0 * PI;
+      at_theta0_2[i] = theta0_2_one/180.0 * PI;
+      setflag_at[i] = 1;
+      count++;
+    }
+  }
+
+  if (which == 4) {
+    if (narg != 4) error->all("Incorrect args for dihedral coefficients");
+
+    double k_one = atof(arg[1]);
+    double theta0_1_one = atof(arg[2]);
+    double theta0_2_one = atof(arg[3]);
+
+    // convert theta0's from degrees to radians
+    
+    for (int i = ilo; i <= ihi; i++) {
+      aat_k[i] = k_one;
+      aat_theta0_1[i] = theta0_1_one/180.0 * PI;
+      aat_theta0_2[i] = theta0_2_one/180.0 * PI;
+      setflag_aat[i] = 1;
+      count++;
+    }
+  }
+
+  if (which == 5) {
+    if (narg != 4) error->all("Incorrect args for dihedral coefficients");
+
+    double k_one = atof(arg[1]);
+    double r10_one = atof(arg[2]);
+    double r30_one = atof(arg[3]);
+    
+    for (int i = ilo; i <= ihi; i++) {
+      bb13t_k[i] = k_one;
+      bb13t_r10[i] = r10_one;
+      bb13t_r30[i] = r30_one;
+      setflag_bb13t[i] = 1;
+      count++;
+    }
+  }
+
+  if (count == 0) error->all("Incorrect args for dihedral coefficients");
+
+  for (int i = ilo; i <= ihi; i++)
+    if (setflag_d[i] == 1 && setflag_mbt[i] == 1 && setflag_ebt[i] == 1 &&
+	setflag_at[i] == 1 && setflag_aat[i] == 1 && setflag_bb13t[i] == 1)
+      setflag[i] = 1;
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes out coeffs to restart file 
+------------------------------------------------------------------------- */
+
+void DihedralClass2::write_restart(FILE *fp)
+{
+  fwrite(&k1[1],sizeof(double),atom->ndihedraltypes,fp);
+  fwrite(&k2[1],sizeof(double),atom->ndihedraltypes,fp);
+  fwrite(&k3[1],sizeof(double),atom->ndihedraltypes,fp);
+  fwrite(&phi1[1],sizeof(double),atom->ndihedraltypes,fp);
+  fwrite(&phi2[1],sizeof(double),atom->ndihedraltypes,fp);
+  fwrite(&phi3[1],sizeof(double),atom->ndihedraltypes,fp);
+
+  fwrite(&mbt_f1[1],sizeof(double),atom->ndihedraltypes,fp);
+  fwrite(&mbt_f2[1],sizeof(double),atom->ndihedraltypes,fp);
+  fwrite(&mbt_f3[1],sizeof(double),atom->ndihedraltypes,fp);
+  fwrite(&mbt_r0[1],sizeof(double),atom->ndihedraltypes,fp);
+
+  fwrite(&ebt_f1_1[1],sizeof(double),atom->ndihedraltypes,fp);
+  fwrite(&ebt_f2_1[1],sizeof(double),atom->ndihedraltypes,fp);
+  fwrite(&ebt_f3_1[1],sizeof(double),atom->ndihedraltypes,fp);
+  fwrite(&ebt_r0_1[1],sizeof(double),atom->ndihedraltypes,fp);
+
+  fwrite(&ebt_f1_2[1],sizeof(double),atom->ndihedraltypes,fp);
+  fwrite(&ebt_f2_2[1],sizeof(double),atom->ndihedraltypes,fp);
+  fwrite(&ebt_f3_2[1],sizeof(double),atom->ndihedraltypes,fp);
+  fwrite(&ebt_r0_2[1],sizeof(double),atom->ndihedraltypes,fp);
+
+  fwrite(&at_f1_1[1],sizeof(double),atom->ndihedraltypes,fp);
+  fwrite(&at_f2_1[1],sizeof(double),atom->ndihedraltypes,fp);
+  fwrite(&at_f3_1[1],sizeof(double),atom->ndihedraltypes,fp);
+  fwrite(&at_theta0_1[1],sizeof(double),atom->ndihedraltypes,fp);
+
+  fwrite(&at_f1_2[1],sizeof(double),atom->ndihedraltypes,fp);
+  fwrite(&at_f2_2[1],sizeof(double),atom->ndihedraltypes,fp);
+  fwrite(&at_f3_2[1],sizeof(double),atom->ndihedraltypes,fp);
+  fwrite(&at_theta0_2[1],sizeof(double),atom->ndihedraltypes,fp);
+
+  fwrite(&aat_k[1],sizeof(double),atom->ndihedraltypes,fp);
+  fwrite(&aat_theta0_1[1],sizeof(double),atom->ndihedraltypes,fp);
+  fwrite(&aat_theta0_2[1],sizeof(double),atom->ndihedraltypes,fp);
+
+  fwrite(&bb13t_k[1],sizeof(double),atom->ndihedraltypes,fp);
+  fwrite(&bb13t_r10[1],sizeof(double),atom->ndihedraltypes,fp);
+  fwrite(&bb13t_r30[1],sizeof(double),atom->ndihedraltypes,fp);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads coeffs from restart file, bcasts them 
+------------------------------------------------------------------------- */
+
+void DihedralClass2::read_restart(FILE *fp)
+{
+  allocate();
+
+  if (comm->me == 0) {
+    fread(&k1[1],sizeof(double),atom->ndihedraltypes,fp);
+    fread(&k2[1],sizeof(double),atom->ndihedraltypes,fp);
+    fread(&k3[1],sizeof(double),atom->ndihedraltypes,fp);
+    fread(&phi1[1],sizeof(double),atom->ndihedraltypes,fp);
+    fread(&phi2[1],sizeof(double),atom->ndihedraltypes,fp);
+    fread(&phi3[1],sizeof(double),atom->ndihedraltypes,fp);
+
+    fread(&mbt_f1[1],sizeof(double),atom->ndihedraltypes,fp);
+    fread(&mbt_f2[1],sizeof(double),atom->ndihedraltypes,fp);
+    fread(&mbt_f3[1],sizeof(double),atom->ndihedraltypes,fp);
+    fread(&mbt_r0[1],sizeof(double),atom->ndihedraltypes,fp);
+
+    fread(&ebt_f1_1[1],sizeof(double),atom->ndihedraltypes,fp);
+    fread(&ebt_f2_1[1],sizeof(double),atom->ndihedraltypes,fp);
+    fread(&ebt_f3_1[1],sizeof(double),atom->ndihedraltypes,fp);
+    fread(&ebt_r0_1[1],sizeof(double),atom->ndihedraltypes,fp);
+
+    fread(&ebt_f1_2[1],sizeof(double),atom->ndihedraltypes,fp);
+    fread(&ebt_f2_2[1],sizeof(double),atom->ndihedraltypes,fp);
+    fread(&ebt_f3_2[1],sizeof(double),atom->ndihedraltypes,fp);
+    fread(&ebt_r0_2[1],sizeof(double),atom->ndihedraltypes,fp);
+
+    fread(&at_f1_1[1],sizeof(double),atom->ndihedraltypes,fp);
+    fread(&at_f2_1[1],sizeof(double),atom->ndihedraltypes,fp);
+    fread(&at_f3_1[1],sizeof(double),atom->ndihedraltypes,fp);
+    fread(&at_theta0_1[1],sizeof(double),atom->ndihedraltypes,fp);
+
+    fread(&at_f1_2[1],sizeof(double),atom->ndihedraltypes,fp);
+    fread(&at_f2_2[1],sizeof(double),atom->ndihedraltypes,fp);
+    fread(&at_f3_2[1],sizeof(double),atom->ndihedraltypes,fp);
+    fread(&at_theta0_2[1],sizeof(double),atom->ndihedraltypes,fp);
+
+    fread(&aat_k[1],sizeof(double),atom->ndihedraltypes,fp);
+    fread(&aat_theta0_1[1],sizeof(double),atom->ndihedraltypes,fp);
+    fread(&aat_theta0_2[1],sizeof(double),atom->ndihedraltypes,fp);
+
+    fread(&bb13t_k[1],sizeof(double),atom->ndihedraltypes,fp);
+    fread(&bb13t_r10[1],sizeof(double),atom->ndihedraltypes,fp);
+    fread(&bb13t_r30[1],sizeof(double),atom->ndihedraltypes,fp);
+  }
+
+  MPI_Bcast(&k1[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&k2[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&k3[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&phi1[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&phi2[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&phi3[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+
+  MPI_Bcast(&mbt_f1[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&mbt_f2[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&mbt_f3[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&mbt_r0[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+
+  MPI_Bcast(&ebt_f1_1[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&ebt_f2_1[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&ebt_f3_1[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&ebt_r0_1[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+
+  MPI_Bcast(&ebt_f1_2[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&ebt_f2_2[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&ebt_f3_2[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&ebt_r0_2[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+
+  MPI_Bcast(&at_f1_1[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&at_f2_1[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&at_f3_1[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&at_theta0_1[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+
+  MPI_Bcast(&at_f1_2[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&at_f2_2[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&at_f3_2[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&at_theta0_2[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+
+  MPI_Bcast(&aat_k[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&aat_theta0_1[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&aat_theta0_2[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+
+  MPI_Bcast(&bb13t_k[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&bb13t_r10[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&bb13t_r30[1],atom->ndihedraltypes,MPI_DOUBLE,0,world);
+
+  for (int i = 1; i <= atom->ndihedraltypes; i++) setflag[i] = 1;
+}
--- a/src/CLASS2/dihedral_class2.h
+++ b/src/CLASS2/dihedral_class2.h
@ -0,0 +1,46 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef DIHEDRAL_CLASS2_H
+#define DIHEDRAL_CLASS2_H
+
+#include "stdio.h"
+#include "dihedral.h"
+
+class DihedralClass2 : public Dihedral {
+ public:
+  DihedralClass2();
+  ~DihedralClass2();
+  void compute(int, int);
+  void coeff(int, int, char **);
+  void write_restart(FILE *);
+  void read_restart(FILE *);
+
+ private:
+  double *k1,*k2,*k3;
+  double *phi1,*phi2,*phi3;
+  double *mbt_f1,*mbt_f2,*mbt_f3,*mbt_r0;
+  double *ebt_f1_1,*ebt_f2_1,*ebt_f3_1,*ebt_r0_1;
+  double *ebt_f1_2,*ebt_f2_2,*ebt_f3_2,*ebt_r0_2;
+  double *at_f1_1,*at_f2_1,*at_f3_1,*at_theta0_1;
+  double *at_f1_2,*at_f2_2,*at_f3_2,*at_theta0_2;
+  double *aat_k,*aat_theta0_1,*aat_theta0_2;
+  double *bb13t_k,*bb13t_r10,*bb13t_r30;
+  int *setflag_d,*setflag_mbt,*setflag_ebt;
+  int *setflag_at,*setflag_aat,*setflag_bb13t;
+  double PI;
+
+  void allocate();
+};
+
+#endif
--- a/src/CLASS2/improper_class2.cpp
+++ b/src/CLASS2/improper_class2.cpp
@ -0,0 +1,891 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Eric Simon (Cray)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdlib.h"
+#include "improper_class2.h"
+#include "atom.h"
+#include "neighbor.h"
+#include "update.h"
+#include "domain.h"
+#include "comm.h"
+#include "force.h"
+#include "memory.h"
+#include "error.h"
+
+#define SMALL 0.001
+
+/* ----------------------------------------------------------------------
+   set all global defaults 
+------------------------------------------------------------------------- */
+
+ImproperClass2::ImproperClass2()
+{
+  PI = 4.0*atan(1.0);
+}
+
+/* ----------------------------------------------------------------------
+   free all arrays 
+------------------------------------------------------------------------- */
+
+ImproperClass2::~ImproperClass2()
+{
+  if (allocated) {
+    memory->sfree(setflag);
+    memory->sfree(setflag_i);
+    memory->sfree(setflag_aa);
+
+    memory->sfree(k0);
+    memory->sfree(chi0);
+
+    memory->sfree(aa_k1);
+    memory->sfree(aa_k2);
+    memory->sfree(aa_k3);
+    memory->sfree(aa_theta0_1);
+    memory->sfree(aa_theta0_2);
+    memory->sfree(aa_theta0_3);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ImproperClass2::compute(int eflag, int vflag)
+{
+  int i1,i2,i3,i4,i,j,k,n,type,factor;
+  double rfactor;
+  double delr[3][3],rmag[3],rinvmag[3],rmag2[3];
+  double theta[3],costheta[3],sintheta[3];
+  double cossqtheta[3],sinsqtheta[3],invstheta[3];
+  double rABxrCB[3],rDBxrAB[3],rCBxrDB[3];
+  double ddelr[3][4],dr[3][4][3],dinvr[3][4][3];
+  double dthetadr[3][4][3],dinvsth[3][4][3];
+  double dinv3r[4][3],dinvs3r[3][4][3];
+  double drCBxrDB[3],rCBxdrDB[3],drDBxrAB[3],rDBxdrAB[3];
+  double drABxrCB[3],rABxdrCB[3];
+  double dot1,dot2,dd[3];
+  double fdot[3][4][3],f2[3][4][3],invs3r[3],inv3r;
+  double t,tt1,tt3,sc1;
+  double dotCBDBAB,dotDBABCB,dotABCBDB;
+  double chi,deltachi,d2chi,cossin2;
+  double drAB[3][4][3],drCB[3][4][3],drDB[3][4][3];
+  double dchi[3][4][3],dtotalchi[4][3];
+  double schiABCD,chiABCD,schiCBDA,chiCBDA,schiDBAC,chiDBAC;
+  double fabcd[4][3];
+
+  energy = 0.0;
+  if (vflag) for (n = 0; n < 6; n++) virial[n] = 0.0;
+
+  for (i = 0; i < 3; i++)
+    for (j = 0; j < 4; j++)
+      for (k = 0; k < 3; k++) {
+	dthetadr[i][j][k] = 0.0;
+	drAB[i][j][k] = 0.0;
+	drCB[i][j][k] = 0.0;
+	drDB[i][j][k] = 0.0;
+      }
+
+  double **x = atom->x;
+  double **f = atom->f;
+  int **improperlist = neighbor->improperlist;
+  int nimproperlist = neighbor->nimproperlist;
+  int nlocal = atom->nlocal;
+  int newton_bond = force->newton_bond;
+
+  for (n = 0; n < nimproperlist; n++) {
+
+    i1 = improperlist[n][0];
+    i2 = improperlist[n][1];
+    i3 = improperlist[n][2];
+    i4 = improperlist[n][3];
+    type = improperlist[n][4];
+
+    if (k0[type] == 0.0) continue;
+
+    if (newton_bond) factor = 4;
+    else {
+      factor = 0;
+      if (i1 < nlocal) factor++;
+      if (i2 < nlocal) factor++;
+      if (i3 < nlocal) factor++;
+      if (i4 < nlocal) factor++;
+      }
+    rfactor = 0.25 * factor;
+
+    // difference vectors
+
+    delr[0][0] = x[i1][0] - x[i2][0];
+    delr[0][1] = x[i1][1] - x[i2][1];
+    delr[0][2] = x[i1][2] - x[i2][2];
+    domain->minimum_image(&delr[0][0],&delr[0][1],&delr[0][2]);
+
+    delr[1][0] = x[i3][0] - x[i2][0];
+    delr[1][1] = x[i3][1] - x[i2][1];
+    delr[1][2] = x[i3][2] - x[i2][2];
+    domain->minimum_image(&delr[1][0],&delr[1][1],&delr[1][2]);
+
+    delr[2][0] = x[i4][0] - x[i2][0];
+    delr[2][1] = x[i4][1] - x[i2][1];
+    delr[2][2] = x[i4][2] - x[i2][2];
+    domain->minimum_image(&delr[2][0],&delr[2][1],&delr[2][2]);
+
+    // bond lengths and associated values
+
+    for (i = 0; i < 3; i++) {
+      rmag2[i] = delr[i][0]*delr[i][0] + delr[i][1]*delr[i][1] + 
+	delr[i][2]*delr[i][2];
+      rmag[i] = sqrt(rmag2[i]);
+      rinvmag[i] = 1.0/rmag[i];
+    }
+
+    // angle ABC, CBD, ABD
+
+    costheta[0] = (delr[0][0]*delr[1][0] + delr[0][1]*delr[1][1] +  
+		   delr[0][2]*delr[1][2]) / (rmag[0]*rmag[1]);
+    costheta[1] = (delr[1][0]*delr[2][0] + delr[1][1]*delr[2][1] + 
+		   delr[1][2]*delr[2][2]) / (rmag[1]*rmag[2]);
+    costheta[2] = (delr[0][0]*delr[2][0] + delr[0][1]*delr[2][1] + 
+		   delr[0][2]*delr[2][2]) / (rmag[0]*rmag[2]);
+
+    // angle error check
+
+    for (i = 0; i < 3; i++) {
+      if (costheta[i] == -1.0) {
+	int me;
+	MPI_Comm_rank(world,&me);
+	if (screen) {
+	  fprintf(screen,"Improper problem: %d %d %d %d %d %d\n",
+		  me,update->ntimestep,
+		  atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
+	  fprintf(screen,"  1st atom: %d %g %g %g\n",
+		  me,x[i1][0],x[i1][1],x[i1][2]);
+	  fprintf(screen,"  2nd atom: %d %g %g %g\n",
+		  me,x[i2][0],x[i2][1],x[i2][2]);
+	  fprintf(screen,"  3rd atom: %d %g %g %g\n",
+		  me,x[i3][0],x[i3][1],x[i3][2]);
+	  fprintf(screen,"  4th atom: %d %g %g %g\n",
+		  me,x[i4][0],x[i4][1],x[i4][2]);
+	}
+      }
+    }
+
+    for (i = 0; i < 3; i++) {
+      if (costheta[i] > 1.0)  costheta[i] = 1.0;
+      if (costheta[i] < -1.0) costheta[i] = -1.0;
+      theta[i] = acos(costheta[i]);
+      cossqtheta[i] = costheta[i]*costheta[i];
+      sintheta[i] = sin(theta[i]);
+      invstheta[i] = 1.0/sintheta[i];
+      sinsqtheta[i] = sintheta[i]*sintheta[i];
+    }
+
+    // cross & dot products
+
+    cross(delr[0],delr[1],rABxrCB);
+    cross(delr[2],delr[0],rDBxrAB);
+    cross(delr[1],delr[2],rCBxrDB);
+
+    dotCBDBAB = dot(rCBxrDB,delr[0]);
+    dotDBABCB = dot(rDBxrAB,delr[1]);
+    dotABCBDB = dot(rABxrCB,delr[2]);
+
+    t = rmag[0] * rmag[1] * rmag[2];
+    inv3r = 1.0/t;
+    invs3r[0] = invstheta[1] * inv3r;
+    invs3r[1] = invstheta[2] * inv3r;
+    invs3r[2] = invstheta[0] * inv3r;
+
+    // chi ABCD, CBDA, DBAC
+    // final chi is average of three
+
+    schiABCD = dotCBDBAB * invs3r[0];
+    chiABCD = asin(schiABCD);
+    schiCBDA = dotDBABCB * invs3r[1];
+    chiCBDA = asin(schiCBDA);
+    schiDBAC = dotABCBDB * invs3r[2];
+    chiDBAC = asin(schiDBAC);
+
+    chi = (chiABCD + chiCBDA + chiDBAC) / 3.0;
+    deltachi = chi - chi0[type];
+    d2chi = deltachi * deltachi;
+
+    // energy
+
+    if (eflag) energy += rfactor * k0[type] * d2chi;
+
+    // forces
+    // define d(delr)
+    // i = bond AB/CB/DB, j = atom A/B/C/D
+
+    for (i = 0; i < 3; i++)
+      for (j = 0; j < 4; j++)
+	ddelr[i][j] = 0.0;
+
+    ddelr[0][0] = 1.0;
+    ddelr[0][1] = -1.0;
+    ddelr[1][1] = -1.0;
+    ddelr[1][2] = 1.0;
+    ddelr[2][1] = -1.0;
+    ddelr[2][3] = 1.0;
+
+    // compute d(|r|)/dr and d(1/|r|)/dr for each direction, bond and atom
+    // define d(r) for each r
+    // i = bond AB/CB/DB, j = atom A/B/C/D, k = X/Y/Z
+
+    for (i = 0; i < 3; i++)
+      for (j = 0; j < 4; j++)
+	for (k = 0; k < 3; k++) {
+	  dr[i][j][k] = delr[i][k] * ddelr[i][j] / rmag[i];
+	  dinvr[i][j][k] = -dr[i][j][k] / rmag2[i];
+	}
+
+    // compute d(1 / (|r_AB| * |r_CB| * |r_DB|) / dr
+    // i = atom A/B/C/D, j = X/Y/Z
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++)
+	dinv3r[i][j] = rinvmag[1] * (rinvmag[2] * dinvr[0][i][j] +
+				     rinvmag[0] * dinvr[2][i][j]) +
+	  rinvmag[2] * rinvmag[0] * dinvr[1][i][j];
+
+    // compute d(theta)/d(r) for 3 angles
+    // angleABC
+
+    tt1 = costheta[0] / rmag2[0];
+    tt3 = costheta[0] / rmag2[1];
+    sc1 = 1.0 / sqrt(1.0 - cossqtheta[0]);
+
+    dthetadr[0][0][0] = sc1 * ((tt1 * delr[0][0]) - 
+			       (delr[1][0] * rinvmag[0] * rinvmag[1]));
+    dthetadr[0][0][1] = sc1 * ((tt1 * delr[0][1]) - 
+			       (delr[1][1] * rinvmag[0] * rinvmag[1]));
+    dthetadr[0][0][2] = sc1 * ((tt1 * delr[0][2]) - 
+			       (delr[1][2] * rinvmag[0] * rinvmag[1]));
+    dthetadr[0][1][0] = -sc1 * ((tt1 * delr[0][0]) - 
+				(delr[1][0] * rinvmag[0] * rinvmag[1]) +
+				(tt3 * delr[1][0]) - 
+				(delr[0][0] * rinvmag[0] * rinvmag[1]));
+    dthetadr[0][1][1] = -sc1 * ((tt1 * delr[0][1]) - 
+				(delr[1][1] * rinvmag[0] * rinvmag[1]) +
+				(tt3 * delr[1][1]) - 
+				(delr[0][1] * rinvmag[0] * rinvmag[1]));
+    dthetadr[0][1][2] = -sc1 * ((tt1 * delr[0][2]) - 
+				(delr[1][2] * rinvmag[0] * rinvmag[1]) +
+				(tt3 * delr[1][2]) - 
+				(delr[0][2] * rinvmag[0] * rinvmag[1]));
+    dthetadr[0][2][0] = sc1 * ((tt3 * delr[1][0]) - 
+			       (delr[0][0] * rinvmag[0] * rinvmag[1]));
+    dthetadr[0][2][1] = sc1 * ((tt3 * delr[1][1]) - 
+			       (delr[0][1] * rinvmag[0] * rinvmag[1]));
+    dthetadr[0][2][2] = sc1 * ((tt3 * delr[1][2]) - 
+			       (delr[0][2] * rinvmag[0] * rinvmag[1]));
+
+    // angleCBD
+
+    tt1 = costheta[1] / rmag2[1];
+    tt3 = costheta[1] / rmag2[2];
+    sc1 = 1.0 / sqrt(1.0 - cossqtheta[1]);
+
+    dthetadr[1][2][0] = sc1 * ((tt1 * delr[1][0]) - 
+			       (delr[2][0] * rinvmag[1] * rinvmag[2]));
+    dthetadr[1][2][1] = sc1 * ((tt1 * delr[1][1]) - 
+			       (delr[2][1] * rinvmag[1] * rinvmag[2]));
+    dthetadr[1][2][2] = sc1 * ((tt1 * delr[1][2]) - 
+			       (delr[2][2] * rinvmag[1] * rinvmag[2]));
+    dthetadr[1][1][0] = -sc1 * ((tt1 * delr[1][0]) - 
+				(delr[2][0] * rinvmag[1] * rinvmag[2]) +
+				(tt3 * delr[2][0]) - 
+				(delr[1][0] * rinvmag[2] * rinvmag[1]));
+    dthetadr[1][1][1] = -sc1 * ((tt1 * delr[1][1]) - 
+				(delr[2][1] * rinvmag[1] * rinvmag[2]) +
+				(tt3 * delr[2][1]) - 
+				(delr[1][1] * rinvmag[2] * rinvmag[1]));
+    dthetadr[1][1][2] = -sc1 * ((tt1 * delr[1][2]) - 
+				(delr[2][2] * rinvmag[1] * rinvmag[2]) +
+				(tt3 * delr[2][2]) - 
+				(delr[1][2] * rinvmag[2] * rinvmag[1]));
+    dthetadr[1][3][0] = sc1 * ((tt3 * delr[2][0]) - 
+			       (delr[1][0] * rinvmag[2] * rinvmag[1]));
+    dthetadr[1][3][1] = sc1 * ((tt3 * delr[2][1]) - 
+			       (delr[1][1] * rinvmag[2] * rinvmag[1]));
+    dthetadr[1][3][2] = sc1 * ((tt3 * delr[2][2]) - 
+			       (delr[1][2] * rinvmag[2] * rinvmag[1]));
+
+    // angleABD
+
+    tt1 = costheta[2] / rmag2[0];
+    tt3 = costheta[2] / rmag2[2];
+    sc1 = 1.0 / sqrt(1.0 - cossqtheta[2]);
+
+    dthetadr[2][0][0] = sc1 * ((tt1 * delr[0][0]) - 
+			       (delr[2][0] * rinvmag[0] * rinvmag[2]));
+    dthetadr[2][0][1] = sc1 * ((tt1 * delr[0][1]) - 
+			       (delr[2][1] * rinvmag[0] * rinvmag[2]));
+    dthetadr[2][0][2] = sc1 * ((tt1 * delr[0][2]) - 
+			       (delr[2][2] * rinvmag[0] * rinvmag[2]));
+    dthetadr[2][1][0] = -sc1 * ((tt1 * delr[0][0]) - 
+				(delr[2][0] * rinvmag[0] * rinvmag[2]) +
+				(tt3 * delr[2][0]) - 
+				(delr[0][0] * rinvmag[2] * rinvmag[0]));
+    dthetadr[2][1][1] = -sc1 * ((tt1 * delr[0][1]) - 
+				(delr[2][1] * rinvmag[0] * rinvmag[2]) +
+				(tt3 * delr[2][1]) - 
+				(delr[0][1] * rinvmag[2] * rinvmag[0]));
+    dthetadr[2][1][2] = -sc1 * ((tt1 * delr[0][2]) - 
+				(delr[2][2] * rinvmag[0] * rinvmag[2]) +
+				(tt3 * delr[2][2]) - 
+				(delr[0][2] * rinvmag[2] * rinvmag[0]));
+    dthetadr[2][3][0] = sc1 * ((tt3 * delr[2][0]) - 
+			       (delr[0][0] * rinvmag[2] * rinvmag[0]));
+    dthetadr[2][3][1] = sc1 * ((tt3 * delr[2][1]) - 
+			       (delr[0][1] * rinvmag[2] * rinvmag[0]));
+    dthetadr[2][3][2] = sc1 * ((tt3 * delr[2][2]) - 
+			       (delr[0][2] * rinvmag[2] * rinvmag[0]));
+
+    // compute d( 1 / sin(theta))/dr
+    // i = angle, j = atom, k = direction
+
+    for (i = 0; i < 3; i++) {
+      cossin2 = -costheta[i] / sinsqtheta[i];
+      for (j = 0; j < 4; j++)
+	for (k = 0; k < 3; k++)
+	  dinvsth[i][j][k] = cossin2 * dthetadr[i][j][k];
+    }
+
+    // compute d(1 / sin(theta) * |r_AB| * |r_CB| * |r_DB|)/dr
+    // i = angle, j = atom
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++) {
+	dinvs3r[0][i][j] = (invstheta[1] * dinv3r[i][j]) +
+	  (inv3r * dinvsth[1][i][j]);
+	dinvs3r[1][i][j] = (invstheta[2] * dinv3r[i][j]) +
+	  (inv3r * dinvsth[2][i][j]);
+	dinvs3r[2][i][j] = (invstheta[0] * dinv3r[i][j]) +
+	  (inv3r * dinvsth[0][i][j]);
+      }
+
+    // drCB(i,j,k), etc
+    // i = vector X'/Y'/Z', j = atom A/B/C/D, k = direction X/Y/Z
+
+    for (i = 0; i < 3; i++) {
+      drCB[i][1][i] = -1.0;
+      drAB[i][1][i] = -1.0;
+      drDB[i][1][i] = -1.0;
+      drDB[i][3][i] = 1.0;
+      drCB[i][2][i] = 1.0;
+      drAB[i][0][i] = 1.0;
+    }
+
+    // d((r_CB x r_DB) dot r_AB)
+    // r_CB x d(r_DB)
+    // d(r_CB) x r_DB
+    // (r_CB x d(r_DB)) + (d(r_CB) x r_DB)
+    // (r_CB x d(r_DB)) + (d(r_CB) x r_DB) dot r_AB
+    // d(r_AB) dot (r_CB x r_DB)
+
+    for (i = 0; i < 3; i++)
+      for (j = 0; j < 4; j++) {
+	cross(delr[1],drDB[i][j],rCBxdrDB);
+	cross(drCB[i][j],delr[2],drCBxrDB);
+	for (k = 0; k < 3; k++) dd[k] = rCBxdrDB[k] + drCBxrDB[k];
+	dot1 = dot(dd,delr[0]);
+	dot2 = dot(rCBxrDB,drAB[i][j]);
+	fdot[0][j][i] = dot1 + dot2;
+      }
+
+    // d((r_DB x r_AB) dot r_CB)
+    // r_DB x d(r_AB)
+    // d(r_DB) x r_AB
+    // (r_DB x d(r_AB)) + (d(r_DB) x r_AB)
+    // (r_DB x d(r_AB)) + (d(r_DB) x r_AB) dot r_CB
+    // d(r_CB) dot (r_DB x r_AB)
+
+    for (i = 0; i < 3; i++)
+      for (j = 0; j < 4; j++) {
+	cross(delr[2],drAB[i][j],rDBxdrAB);
+	cross(drDB[i][j],delr[0],drDBxrAB);
+	for (k = 0; k < 3; k++) dd[k] = rDBxdrAB[k] + drDBxrAB[k];
+	dot1 = dot(dd,delr[1]);
+	dot2 = dot(rDBxrAB,drCB[i][j]);
+	fdot[1][j][i] = dot1 + dot2;
+      }
+
+    // d((r_AB x r_CB) dot r_DB)
+    // r_AB x d(r_CB)
+    // d(r_AB) x r_CB
+    // (r_AB x d(r_CB)) + (d(r_AB) x r_CB)
+    // (r_AB x d(r_CB)) + (d(r_AB) x r_CB) dot r_DB
+    // d(r_DB) dot (r_AB x r_CB)
+
+    for (i = 0; i < 3; i++)
+      for (j = 0; j < 4; j++) {
+	cross(delr[0],drCB[i][j],rABxdrCB);
+	cross(drAB[i][j],delr[1],drABxrCB);
+	for (k = 0; k < 3; k++) dd[k] = rABxdrCB[k] + drABxrCB[k];
+	dot1 = dot(dd,delr[2]);
+	dot2 = dot(rABxrCB,drDB[i][j]);
+	fdot[2][j][i] = dot1 + dot2;
+      }
+
+    // force on each atom
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++) {
+	f2[0][i][j] = (fdot[0][i][j] * invs3r[0]) + 
+	  (dinvs3r[0][i][j] * dotCBDBAB);
+	dchi[0][i][j] = f2[0][i][j] / cos(chiABCD);
+	f2[1][i][j] = (fdot[1][i][j] * invs3r[1]) + 
+	  (dinvs3r[1][i][j] * dotDBABCB);
+	dchi[1][i][j] = f2[1][i][j] / cos(chiCBDA);
+	f2[2][i][j] = (fdot[2][i][j] * invs3r[2]) + 
+	  (dinvs3r[2][i][j] * dotABCBDB);
+	dchi[2][i][j] = f2[2][i][j] / cos(chiDBAC);
+	dtotalchi[i][j] = (dchi[0][i][j]+dchi[1][i][j]+dchi[2][i][j]) / 3.0;
+      }
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++)
+	fabcd[i][j] = -2.0*k0[type] * deltachi*dtotalchi[i][j];
+
+    // apply force to each of 4 atoms
+
+    if (newton_bond || i1 < nlocal) {
+      f[i1][0] += fabcd[0][0];
+      f[i1][1] += fabcd[0][1];
+      f[i1][2] += fabcd[0][2];
+    }
+
+    if (newton_bond || i2 < nlocal) {
+      f[i2][0] += fabcd[1][0];
+      f[i2][1] += fabcd[1][1];
+      f[i2][2] += fabcd[1][2];
+    }
+
+    if (newton_bond || i3 < nlocal) {
+      f[i3][0] += fabcd[2][0];
+      f[i3][1] += fabcd[2][1];
+      f[i3][2] += fabcd[2][2];
+    }
+
+    if (newton_bond || i4 < nlocal) {
+      f[i4][0] += fabcd[3][0];
+      f[i4][1] += fabcd[3][1];
+      f[i4][2] += fabcd[3][2];
+    }
+
+    // virial contribution
+
+    if (vflag) {
+      virial[0] += rfactor * (delr[0][0]*fabcd[0][0] + 
+			      delr[1][0]*fabcd[2][0] + delr[2][0]*fabcd[3][0]);
+      virial[1] += rfactor * (delr[0][1]*fabcd[0][1] + 
+			      delr[1][1]*fabcd[2][1] + delr[2][1]*fabcd[3][1]);
+      virial[2] += rfactor * (delr[0][2]*fabcd[0][2] + 
+			      delr[1][2]*fabcd[2][2] + delr[2][2]*fabcd[3][2]);
+      virial[3] += rfactor * (delr[0][0]*fabcd[0][1] + 
+			      delr[1][0]*fabcd[2][1] + delr[2][0]*fabcd[3][1]);
+      virial[4] += rfactor * (delr[0][0]*fabcd[0][2] + 
+			      delr[1][0]*fabcd[2][2] + delr[2][0]*fabcd[3][2]);
+      virial[5] += rfactor * (delr[0][1]*fabcd[0][2] + 
+			      delr[1][1]*fabcd[2][2] + delr[2][1]*fabcd[3][2]);
+    }
+  }
+
+  // compute angle-angle interactions
+
+  angleangle(eflag,vflag);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ImproperClass2::allocate()
+{
+  allocated = 1;
+  int n = atom->nimpropertypes;
+
+  k0 = (double *) memory->smalloc((n+1)*sizeof(double),"improper:k0");
+  chi0 = (double *) memory->smalloc((n+1)*sizeof(double),"improper:chi0");
+
+  aa_k1 = (double *) memory->smalloc((n+1)*sizeof(double),"improper:aa_k1");
+  aa_k2 = (double *) memory->smalloc((n+1)*sizeof(double),"improper:aa_k2");
+  aa_k3 = (double *) memory->smalloc((n+1)*sizeof(double),"improper:aa_k3");
+  aa_theta0_1 = (double *) 
+    memory->smalloc((n+1)*sizeof(double),"improper:aa_theta0_1");
+  aa_theta0_2 = (double *) 
+    memory->smalloc((n+1)*sizeof(double),"improper:aa_theta0_2");
+  aa_theta0_3 = (double *) 
+    memory->smalloc((n+1)*sizeof(double),"improper:aa_theta0_3");
+
+  setflag = (int *) memory->smalloc((n+1)*sizeof(int),"improper:setflag");
+  setflag_i = (int *) 
+    memory->smalloc((n+1)*sizeof(int),"improper:setflag_i");
+  setflag_aa = (int *) 
+    memory->smalloc((n+1)*sizeof(int),"improper:setflag_aa");
+  for (int i = 1; i <= n; i++)
+    setflag[i] = setflag_i[i] = setflag_aa[i] = 0;
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more types
+   which = 0 -> improper coeffs
+   which = 1 -> AngleAngle coeffs
+------------------------------------------------------------------------- */
+
+void ImproperClass2::coeff(int which, int narg, char **arg)
+{
+  if (which < 0 || which > 1)
+    error->all("Invalid coeffs for this improper style");
+  if (!allocated) allocate();
+
+  int ilo,ihi;
+  force->bounds(arg[0],atom->nimpropertypes,ilo,ihi);
+
+  int count = 0;
+
+  if (which == 0) {
+    if (narg != 3) error->all("Incorrect args for improper coefficients");
+
+    double k0_one = atof(arg[1]);
+    double chi0_one = atof(arg[2]);
+
+    // convert chi0 from degrees to radians
+
+    for (int i = ilo; i <= ihi; i++) {
+      k0[i] = k0_one;
+      chi0[i] = chi0_one/180.0 * PI;
+      setflag_i[i] = 1;
+      count++;
+    }
+  }
+
+  if (which == 1) {
+    if (narg != 7) error->all("Incorrect args for improper coefficients");
+
+    double k1_one = atof(arg[1]);
+    double k2_one = atof(arg[2]);
+    double k3_one = atof(arg[3]);
+    double theta0_1_one = atof(arg[4]);
+    double theta0_2_one = atof(arg[5]);
+    double theta0_3_one = atof(arg[6]);
+    
+    // convert theta0's from degrees to radians
+
+    for (int i = ilo; i <= ihi; i++) {
+      aa_k1[i] = k1_one;
+      aa_k2[i] = k2_one;
+      aa_k3[i] = k3_one;
+      aa_theta0_1[i] = theta0_1_one/180.0 * PI;
+      aa_theta0_2[i] = theta0_2_one/180.0 * PI;
+      aa_theta0_3[i] = theta0_3_one/180.0 * PI;
+      setflag_aa[i] = 1;
+      count++;
+    }
+  }
+
+  if (count == 0) error->all("Incorrect args for improper coefficients");
+
+  for (int i = ilo; i <= ihi; i++)
+    if (setflag_i[i] == 1 && setflag_aa[i] == 1) setflag[i] = 1;
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes out coeffs to restart file 
+------------------------------------------------------------------------- */
+
+void ImproperClass2::write_restart(FILE *fp)
+{
+  fwrite(&k0[1],sizeof(double),atom->nimpropertypes,fp);
+  fwrite(&chi0[1],sizeof(double),atom->nimpropertypes,fp);
+
+  fwrite(&aa_k1[1],sizeof(double),atom->nimpropertypes,fp);
+  fwrite(&aa_k2[1],sizeof(double),atom->nimpropertypes,fp);
+  fwrite(&aa_k3[1],sizeof(double),atom->nimpropertypes,fp);
+  fwrite(&aa_theta0_1[1],sizeof(double),atom->nimpropertypes,fp);
+  fwrite(&aa_theta0_2[1],sizeof(double),atom->nimpropertypes,fp);
+  fwrite(&aa_theta0_3[1],sizeof(double),atom->nimpropertypes,fp);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads coeffs from restart file, bcasts them 
+------------------------------------------------------------------------- */
+
+void ImproperClass2::read_restart(FILE *fp)
+{
+  allocate();
+
+  if (comm->me == 0) {
+    fread(&k0[1],sizeof(double),atom->nimpropertypes,fp);
+    fread(&chi0[1],sizeof(double),atom->nimpropertypes,fp);
+
+    fread(&aa_k1[1],sizeof(double),atom->nimpropertypes,fp);
+    fread(&aa_k2[1],sizeof(double),atom->nimpropertypes,fp);
+    fread(&aa_k3[1],sizeof(double),atom->nimpropertypes,fp);
+    fread(&aa_theta0_1[1],sizeof(double),atom->nimpropertypes,fp);
+    fread(&aa_theta0_2[1],sizeof(double),atom->nimpropertypes,fp);
+    fread(&aa_theta0_3[1],sizeof(double),atom->nimpropertypes,fp);
+  }
+  MPI_Bcast(&k0[1],atom->nimpropertypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&chi0[1],atom->nimpropertypes,MPI_DOUBLE,0,world);
+
+  MPI_Bcast(&aa_k1[1],atom->nimpropertypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&aa_k2[1],atom->nimpropertypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&aa_k3[1],atom->nimpropertypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&aa_theta0_1[1],atom->nimpropertypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&aa_theta0_2[1],atom->nimpropertypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&aa_theta0_3[1],atom->nimpropertypes,MPI_DOUBLE,0,world);
+
+  for (int i = 1; i <= atom->nimpropertypes; i++) setflag[i] = 1;
+}
+
+/* ----------------------------------------------------------------------
+   angle-angle interactions within improper
+------------------------------------------------------------------------- */
+
+void ImproperClass2::angleangle(int eflag, int vflag)
+{
+  int i1,i2,i3,i4,i,j,k,n,type,factor;
+  double rfactor;
+  double delxAB,delyAB,delzAB,rABmag2,rAB;
+  double delxBC,delyBC,delzBC,rBCmag2,rBC;
+  double delxBD,delyBD,delzBD,rBDmag2,rBD;
+  double costhABC,thetaABC,costhABD;
+  double thetaABD,costhCBD,thetaCBD,dthABC,dthCBD,dthABD;
+  double sc1,t1,t3,r12;
+  double dthetadr[3][4][3],fabcd[4][3];
+
+  double **x = atom->x;
+  double **f = atom->f;
+  int **improperlist = neighbor->improperlist;
+  int nimproperlist = neighbor->nimproperlist;
+  int nlocal = atom->nlocal;
+  int newton_bond = force->newton_bond;
+
+  for (n = 0; n < nimproperlist; n++) {
+
+    i1 = improperlist[n][0];
+    i2 = improperlist[n][1];
+    i3 = improperlist[n][2];
+    i4 = improperlist[n][3];
+    type = improperlist[n][4];
+
+    if (newton_bond) factor = 4;
+    else {
+      factor = 0;
+      if (i1 < nlocal) factor++;
+      if (i2 < nlocal) factor++;
+      if (i3 < nlocal) factor++;
+      if (i4 < nlocal) factor++;
+      }
+    rfactor = 0.25 * factor;
+
+    // difference vectors
+
+    delxAB = x[i1][0] - x[i2][0];
+    delyAB = x[i1][1] - x[i2][1];
+    delzAB = x[i1][2] - x[i2][2];
+    domain->minimum_image(&delxAB,&delyAB,&delzAB);
+
+    delxBC = x[i3][0] - x[i2][0];
+    delyBC = x[i3][1] - x[i2][1];
+    delzBC = x[i3][2] - x[i2][2];
+    domain->minimum_image(&delxBC,&delyBC,&delzBC);
+
+    delxBD = x[i4][0] - x[i2][0];
+    delyBD = x[i4][1] - x[i2][1];
+    delzBD = x[i4][2] - x[i2][2];
+    domain->minimum_image(&delxBD,&delyBD,&delzBD);
+
+    // bond lengths
+
+    rABmag2 = delxAB*delxAB + delyAB*delyAB + delzAB*delzAB;
+    rAB = sqrt(rABmag2);
+    rBCmag2 = delxBC*delxBC + delyBC*delyBC + delzBC*delzBC;
+    rBC = sqrt(rBCmag2);
+    rBDmag2 = delxBD*delxBD + delyBD*delyBD + delzBD*delzBD;
+    rBD = sqrt(rBDmag2);
+        
+    // angle ABC, ABD, CBD
+
+    costhABC = (delxAB*delxBC + delyAB*delyBC + delzAB*delzBC) / (rAB * rBC);
+    if (costhABC > 1.0)  costhABC = 1.0;
+    if (costhABC < -1.0) costhABC = -1.0;
+    thetaABC = acos(costhABC);
+
+    costhABD = (delxAB*delxBD + delyAB*delyBD + delzAB*delzBD) / (rAB * rBD);
+    if (costhABD > 1.0)  costhABD = 1.0;
+    if (costhABD < -1.0) costhABD = -1.0;
+    thetaABD = acos(costhABD);
+
+    costhCBD = (delxBC*delxBD + delyBC*delyBD + delzBC*delzBD) /(rBC * rBD);
+    if (costhCBD > 1.0)  costhCBD = 1.0;
+    if (costhCBD < -1.0) costhCBD = -1.0;
+    thetaCBD = acos(costhCBD);
+
+    dthABC = thetaABC - aa_theta0_1[type];
+    dthCBD = thetaCBD - aa_theta0_2[type];
+    dthABD = thetaABD - aa_theta0_3[type];
+
+    // energy
+
+    if (eflag) energy += rfactor * ((aa_k2[type] * dthABC * dthABD) + 
+				    (aa_k1[type] * dthABC * dthCBD) +
+				    (aa_k3[type] * dthABD * dthCBD));
+
+    // d(theta)/d(r) array
+    // angle i, atom j, coordinate k
+
+    for (i = 0; i < 3; i++)
+      for (j = 0; j < 4; j++)
+	for (k = 0; k < 3; k++)
+	  dthetadr[i][j][k] = 0.0;
+
+    // angle ABC
+
+    sc1 = sqrt(1.0/(1.0 - costhABC*costhABC));
+    t1 = costhABC / rABmag2;
+    t3 = costhABC / rBCmag2;
+    r12 = 1.0 / (rAB * rBC);
+
+    dthetadr[0][0][0] = sc1 * ((t1 * delxAB) - (delxBC * r12));
+    dthetadr[0][0][1] = sc1 * ((t1 * delyAB) - (delyBC * r12));
+    dthetadr[0][0][2] = sc1 * ((t1 * delzAB) - (delzBC * r12));
+    dthetadr[0][1][0] = sc1 * ((-t1 * delxAB) + (delxBC * r12) +
+			       (-t3 * delxBC) + (delxAB * r12));
+    dthetadr[0][1][1] = sc1 * ((-t1 * delyAB) + (delyBC * r12) +
+			       (-t3 * delyBC) + (delyAB * r12));
+    dthetadr[0][1][2] = sc1 * ((-t1 * delzAB) + (delzBC * r12) +
+			       (-t3 * delzBC) + (delzAB * r12));
+    dthetadr[0][2][0] = sc1 * ((t3 * delxBC) - (delxAB * r12));
+    dthetadr[0][2][1] = sc1 * ((t3 * delyBC) - (delyAB * r12));
+    dthetadr[0][2][2] = sc1 * ((t3 * delzBC) - (delzAB * r12));
+
+    // angle CBD
+
+    sc1 = sqrt(1.0/(1.0 - costhCBD*costhCBD));
+    t1 = costhCBD / rBCmag2;
+    t3 = costhCBD / rBDmag2;
+    r12 = 1.0 / (rBC * rBD);
+
+    dthetadr[1][2][0] = sc1 * ((t1 * delxBC) - (delxBD * r12));
+    dthetadr[1][2][1] = sc1 * ((t1 * delyBC) - (delyBD * r12));
+    dthetadr[1][2][2] = sc1 * ((t1 * delzBC) - (delzBD * r12));
+    dthetadr[1][1][0] = sc1 * ((-t1 * delxBC) + (delxBD * r12) +
+			       (-t3 * delxBD) + (delxBC * r12));
+    dthetadr[1][1][1] = sc1 * ((-t1 * delyBC) + (delyBD * r12) +
+			       (-t3 * delyBD) + (delyBC * r12));
+    dthetadr[1][1][2] = sc1 * ((-t1 * delzBC) + (delzBD * r12) +
+			       (-t3 * delzBD) + (delzBC * r12));
+    dthetadr[1][3][0] = sc1 * ((t3 * delxBD) - (delxBC * r12));
+    dthetadr[1][3][1] = sc1 * ((t3 * delyBD) - (delyBC * r12));
+    dthetadr[1][3][2] = sc1 * ((t3 * delzBD) - (delzBC * r12));
+
+    // angle ABD
+
+    sc1 = sqrt(1.0/(1.0 - costhABD*costhABD));
+    t1 = costhABD / rABmag2;
+    t3 = costhABD / rBDmag2;
+    r12 = 1.0 / (rAB * rBD);
+
+    dthetadr[2][0][0] = sc1 * ((t1 * delxAB) - (delxBD * r12));
+    dthetadr[2][0][1] = sc1 * ((t1 * delyAB) - (delyBD * r12));
+    dthetadr[2][0][2] = sc1 * ((t1 * delzAB) - (delzBD * r12));
+    dthetadr[2][1][0] = sc1 * ((-t1 * delxAB) + (delxBD * r12) +
+			       (-t3 * delxBD) + (delxAB * r12));
+    dthetadr[2][1][1] = sc1 * ((-t1 * delyAB) + (delyBD * r12) +
+			       (-t3 * delyBD) + (delyAB * r12));
+    dthetadr[2][1][2] = sc1 * ((-t1 * delzAB) + (delzBD * r12) +
+			       (-t3 * delzBD) + (delzAB * r12));
+    dthetadr[2][3][0] = sc1 * ((t3 * delxBD) - (delxAB * r12));
+    dthetadr[2][3][1] = sc1 * ((t3 * delyBD) - (delyAB * r12));
+    dthetadr[2][3][2] = sc1 * ((t3 * delzBD) - (delzAB * r12));
+
+    // angleangle forces
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++)
+	fabcd[i][j] = - 
+	  ((aa_k1[type] * 
+	    (dthABC*dthetadr[1][i][j] + dthCBD*dthetadr[0][i][j])) +
+	   (aa_k2[type] * 
+	    (dthABC*dthetadr[2][i][j] + dthABD*dthetadr[0][i][j])) +
+	   (aa_k3[type] *
+	    (dthABD*dthetadr[1][i][j] + dthCBD*dthetadr[2][i][j])));
+
+    // apply force to each of 4 atoms
+
+    if (newton_bond || i1 < nlocal) {
+      f[i1][0] += fabcd[0][0];
+      f[i1][1] += fabcd[0][1];
+      f[i1][2] += fabcd[0][2];
+    }
+
+    if (newton_bond || i2 < nlocal) {
+      f[i2][0] += fabcd[1][0];
+      f[i2][1] += fabcd[1][1];
+      f[i2][2] += fabcd[1][2];
+    }
+
+    if (newton_bond || i3 < nlocal) {
+      f[i3][0] += fabcd[2][0];
+      f[i3][1] += fabcd[2][1];
+      f[i3][2] += fabcd[2][2];
+    }
+
+    if (newton_bond || i4 < nlocal) {
+      f[i4][0] += fabcd[3][0];
+      f[i4][1] += fabcd[3][1];
+      f[i4][2] += fabcd[3][2];
+    }
+
+    // virial contribution
+
+    if (vflag) {
+      virial[0] += rfactor * (delxAB*fabcd[0][0] + 
+			      delxBC*fabcd[2][0] + delxBD*fabcd[3][0]);
+      virial[1] += rfactor * (delyAB*fabcd[0][1] + 
+			      delyBC*fabcd[2][1] + delyBD*fabcd[3][1]);
+      virial[2] += rfactor * (delzAB*fabcd[0][2] + 
+			      delzBC*fabcd[2][2] + delzBD*fabcd[3][2]);
+      virial[3] += rfactor * (delxAB*fabcd[0][1] + 
+			      delxBC*fabcd[2][1] + delxBD*fabcd[3][1]);
+      virial[4] += rfactor * (delxAB*fabcd[0][2] + 
+			      delxBC*fabcd[2][2] + delxBD*fabcd[3][2]);
+      virial[5] += rfactor * (delyAB*fabcd[0][2] + 
+			      delyBC*fabcd[2][2] + delyBD*fabcd[3][2]);
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   cross product: c = a x b
+------------------------------------------------------------------------- */
+
+void ImproperClass2::cross(double *a, double *b, double *c)
+{
+  c[0] = a[1]*b[2] - a[2]*b[1];
+  c[1] = a[2]*b[0] - a[0]*b[2];
+  c[2] = a[0]*b[1] - a[1]*b[0];
+}
+
+/* ----------------------------------------------------------------------
+   dot product of a dot b
+------------------------------------------------------------------------- */
+
+double ImproperClass2::dot(double *a, double *b)
+{
+  return (a[0]*b[0] + a[1]*b[1] + a[2]*b[2]);
+}
--- a/src/CLASS2/improper_class2.h
+++ b/src/CLASS2/improper_class2.h
@ -0,0 +1,41 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef IMPROPER_CLASS2_H
+#define IMPROPER_CLASS2_H
+
+#include "stdio.h"
+#include "improper.h"
+
+class ImproperClass2 : public Improper {
+ public:
+  ImproperClass2();
+  ~ImproperClass2();
+  void compute(int, int);
+  void coeff(int, int, char **);
+  void write_restart(FILE *);
+  void read_restart(FILE *);
+
+ private:
+  double *k0,*chi0;
+  double *aa_k1,*aa_k2,*aa_k3,*aa_theta0_1,*aa_theta0_2,*aa_theta0_3;
+  int *setflag_i,*setflag_aa;
+  double PI;
+
+  void allocate();
+  void angleangle(int, int);
+  void cross(double *, double *, double *);
+  double dot(double *, double *);
+};
+
+#endif
--- a/src/CLASS2/pair_lj_class2.cpp
+++ b/src/CLASS2/pair_lj_class2.cpp
@ -0,0 +1,383 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "pair_lj_class2.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "update.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "error.h"
+
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+
+/* ----------------------------------------------------------------------
+   free all arrays 
+------------------------------------------------------------------------- */
+
+PairLJClass2::~PairLJClass2()
+{
+  if (allocated) {
+    memory->destroy_2d_int_array(setflag);
+    memory->destroy_2d_double_array(cutsq);
+
+    memory->destroy_2d_double_array(cut);
+    memory->destroy_2d_double_array(epsilon);
+    memory->destroy_2d_double_array(sigma);
+    memory->destroy_2d_double_array(lj1);
+    memory->destroy_2d_double_array(lj2);
+    memory->destroy_2d_double_array(lj3);
+    memory->destroy_2d_double_array(lj4);
+    memory->destroy_2d_double_array(offset);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJClass2::compute(int eflag, int vflag)
+{
+  int i,j,k,numneigh,itype,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz;
+  double rsq,rinv,r2inv,r3inv,r6inv,forcelj,fforce,factor_lj,philj;
+  int *neighs;
+  double **f;
+
+  eng_vdwl = 0.0;
+  if (vflag) for (i = 0; i < 6; i++) virial[i] = 0.0;
+
+  if (vflag == 2) f = update->f_pair;
+  else f = atom->f;
+  double **x = atom->x;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  int nall = atom->nlocal + atom->nghost;
+  double *special_lj = force->special_lj;
+  int newton_pair = force->newton_pair;
+
+  // loop over neighbors of my atoms
+
+  for (i = 0; i < nlocal; i++) {
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    neighs = neighbor->firstneigh[i];
+    numneigh = neighbor->numneigh[i];
+
+    for (k = 0; k < numneigh; k++) {
+      j = neighs[k];
+
+      if (j < nall) factor_lj = 1.0;
+      else {
+	factor_lj = special_lj[j/nall];
+	j %= nall;
+      }
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+	r2inv = 1.0/rsq;
+	rinv = sqrt(r2inv);
+	r3inv = r2inv*rinv;
+	r6inv = r3inv*r3inv;
+	forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
+	fforce = factor_lj*forcelj*r2inv;
+
+	f[i][0] += delx*fforce;
+	f[i][1] += dely*fforce;
+	f[i][2] += delz*fforce;
+	if (newton_pair || j < nlocal) {
+	  f[j][0] -= delx*fforce;
+	  f[j][1] -= dely*fforce;
+	  f[j][2] -= delz*fforce;
+	}
+
+	if (eflag) {
+	  philj = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
+	    offset[itype][jtype];
+	  if (newton_pair || j < nlocal) eng_vdwl += factor_lj*philj;
+	  else eng_vdwl += 0.5*factor_lj*philj;
+	}
+
+	if (vflag == 1) {
+	  if (newton_pair || j < nlocal) {
+	    virial[0] += delx*delx*fforce;
+	    virial[1] += dely*dely*fforce;
+	    virial[2] += delz*delz*fforce;
+	    virial[3] += delx*dely*fforce;
+	    virial[4] += delx*delz*fforce;
+	    virial[5] += dely*delz*fforce;
+	  } else {
+	    virial[0] += 0.5*delx*delx*fforce;
+	    virial[1] += 0.5*dely*dely*fforce;
+	    virial[2] += 0.5*delz*delz*fforce;
+	    virial[3] += 0.5*delx*dely*fforce;
+	    virial[4] += 0.5*delx*delz*fforce;
+	    virial[5] += 0.5*dely*delz*fforce;
+	  }
+	}
+      }
+    }
+  }
+  if (vflag == 2) virial_compute();
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays 
+------------------------------------------------------------------------- */
+
+void PairLJClass2::allocate()
+{
+  allocated = 1;
+  int n = atom->ntypes;
+
+  setflag = memory->create_2d_int_array(n+1,n+1,"pair:setflag");
+  for (int i = 1; i <= n; i++)
+    for (int j = i; j <= n; j++)
+      setflag[i][j] = 0;
+
+  cutsq = memory->create_2d_double_array(n+1,n+1,"pair:cutsq");
+
+  cut = memory->create_2d_double_array(n+1,n+1,"pair:cut");
+  epsilon = memory->create_2d_double_array(n+1,n+1,"pair:epsilon");
+  sigma = memory->create_2d_double_array(n+1,n+1,"pair:sigma");
+  lj1 = memory->create_2d_double_array(n+1,n+1,"pair:lj1");
+  lj2 = memory->create_2d_double_array(n+1,n+1,"pair:lj2");
+  lj3 = memory->create_2d_double_array(n+1,n+1,"pair:lj3");
+  lj4 = memory->create_2d_double_array(n+1,n+1,"pair:lj4");
+  offset = memory->create_2d_double_array(n+1,n+1,"pair:offset");
+}
+
+/* ----------------------------------------------------------------------
+   global settings 
+------------------------------------------------------------------------- */
+
+void PairLJClass2::settings(int narg, char **arg)
+{
+  if (narg != 1) error->all("Illegal pair_style command");
+
+  cut_global = atof(arg[0]);
+
+  // reset cutoffs that have been explicitly set
+
+  if (allocated) {
+    int i,j;
+    for (i = 1; i <= atom->ntypes; i++)
+      for (j = i+1; j <= atom->ntypes; j++)
+	if (setflag[i][j]) cut[i][j] = cut_global;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more type pairs
+------------------------------------------------------------------------- */
+
+void PairLJClass2::coeff(int narg, char **arg)
+{
+  if (narg < 4 || narg > 5) error->all("Incorrect args for pair coefficients");
+  if (!allocated) allocate();
+
+  int ilo,ihi,jlo,jhi;
+  force->bounds(arg[0],atom->ntypes,ilo,ihi);
+  force->bounds(arg[1],atom->ntypes,jlo,jhi);
+
+  double epsilon_one = atof(arg[2]);
+  double sigma_one = atof(arg[3]);
+
+  double cut_one = cut_global;
+  if (narg == 5) cut_one = atof(arg[4]);
+
+  int count = 0;
+  for (int i = ilo; i <= ihi; i++) {
+    for (int j = MAX(jlo,i); j <= jhi; j++) {
+      epsilon[i][j] = epsilon_one;
+      sigma[i][j] = sigma_one;
+      cut[i][j] = cut_one;
+      setflag[i][j] = 1;
+      count++;
+    }
+  }
+
+  if (count == 0) error->all("Incorrect args for pair coefficients");
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+double PairLJClass2::init_one(int i, int j)
+{
+  // always mix epsilon,sigma via sixthpower rules
+  // mix distance via user-defined rule
+
+  if (setflag[i][j] == 0) {
+    epsilon[i][j] = 2.0 * sqrt(epsilon[i][i]*epsilon[j][j]) *
+      pow(sigma[i][i],3.0) * pow(sigma[j][j],3.0) / 
+      (pow(sigma[i][i],6.0) + pow(sigma[j][j],6.0));
+    sigma[i][j] = 
+      pow((0.5 * (pow(sigma[i][i],6.0) + pow(sigma[j][j],6.0))),1.0/6.0);
+    cut[i][j] = mix_distance(cut[i][i],cut[j][j]);
+  }
+
+  lj1[i][j] = 18.0 * epsilon[i][j] * pow(sigma[i][j],9.0);
+  lj2[i][j] = 18.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
+  lj3[i][j] = 2.0 * epsilon[i][j] * pow(sigma[i][j],9.0);
+  lj4[i][j] = 3.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
+
+  if (offset_flag) {
+    double ratio = sigma[i][j] / cut[i][j];
+    offset[i][j] = epsilon[i][j] * (2.0*pow(ratio,9.0) - 3.0*pow(ratio,6.0));
+  } else offset[i][j] = 0.0;
+
+  lj1[j][i] = lj1[i][j];
+  lj2[j][i] = lj2[i][j];
+  lj3[j][i] = lj3[i][j];
+  lj4[j][i] = lj4[i][j];
+  offset[j][i] = offset[i][j];
+
+  // compute I,J contribution to long-range tail correction
+  // count total # of atoms of type I and J via Allreduce
+
+  if (tail_flag) {
+    int *type = atom->type;
+    int nlocal = atom->nlocal;
+
+    double count[2],all[2];
+    count[0] = count[1] = 0.0;
+    for (int k = 0; k < nlocal; k++) {
+      if (type[k] == i) count[0] += 1.0;
+      if (type[k] == j) count[1] += 1.0;
+    }
+    MPI_Allreduce(count,all,2,MPI_DOUBLE,MPI_SUM,world);
+        
+    double PI = 4.0*atan(1.0);
+    double sig3 = sigma[i][j]*sigma[i][j]*sigma[i][j];
+    double sig6 = sig3*sig3;
+    double rc3 = cut[i][j]*cut[i][j]*cut[i][j];
+    double rc6 = rc3*rc3;
+    etail_ij = 2.0*PI*all[0]*all[1]*epsilon[i][j] *
+      sig6 * (sig3 - 3.0*rc3) / (3.0*rc6);
+    ptail_ij = 2.0*PI*all[0]*all[1]*epsilon[i][j] * 
+      sig6 * (sig3 - 2.0*rc3) / rc6;
+  } 
+
+  return cut[i][j];
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file 
+------------------------------------------------------------------------- */
+
+void PairLJClass2::write_restart(FILE *fp)
+{
+  write_restart_settings(fp);
+
+  int i,j;
+  for (i = 1; i <= atom->ntypes; i++)
+    for (j = i; j <= atom->ntypes; j++) {
+      fwrite(&setflag[i][j],sizeof(int),1,fp);
+      if (setflag[i][j]) {
+	fwrite(&epsilon[i][j],sizeof(double),1,fp);
+	fwrite(&sigma[i][j],sizeof(double),1,fp);
+	fwrite(&cut[i][j],sizeof(double),1,fp);
+      }
+    }
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairLJClass2::read_restart(FILE *fp)
+{
+  read_restart_settings(fp);
+  allocate();
+
+  int i,j;
+  int me = comm->me;
+  for (i = 1; i <= atom->ntypes; i++)
+    for (j = i; j <= atom->ntypes; j++) {
+      if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
+      MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
+      if (setflag[i][j]) {
+	if (me == 0) {
+	  fread(&epsilon[i][j],sizeof(double),1,fp);
+	  fread(&sigma[i][j],sizeof(double),1,fp);
+	  fread(&cut[i][j],sizeof(double),1,fp);
+	}
+	MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
+	MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
+	MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
+      }
+    }
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairLJClass2::write_restart_settings(FILE *fp)
+{
+  fwrite(&cut_global,sizeof(double),1,fp);
+  fwrite(&offset_flag,sizeof(int),1,fp);
+  fwrite(&mix_flag,sizeof(int),1,fp);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairLJClass2::read_restart_settings(FILE *fp)
+{
+  int me = comm->me;
+  if (me == 0) {
+    fread(&cut_global,sizeof(double),1,fp);
+    fread(&offset_flag,sizeof(int),1,fp);
+    fread(&mix_flag,sizeof(int),1,fp);
+  }
+  MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
+  MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJClass2::single(int i, int j, int itype, int jtype, double rsq,
+		       double factor_coul, double factor_lj, int eflag,
+		       One &one)
+{
+  double r2inv,rinv,r3inv,r6inv,forcelj,philj;
+
+  r2inv = 1.0/rsq;
+  rinv = sqrt(r2inv);
+  r3inv = r2inv*rinv;
+  r6inv = r3inv*r3inv;
+  forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
+  one.fforce = factor_lj*forcelj*r2inv;
+
+  if (eflag) {
+    philj = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
+      offset[itype][jtype];
+    one.eng_vdwl = factor_lj*philj;
+    one.eng_coul = 0.0;
+  }
+}
--- a/src/CLASS2/pair_lj_class2.h
+++ b/src/CLASS2/pair_lj_class2.h
@ -0,0 +1,42 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef PAIR_LJ_CLASS2_H
+#define PAIR_LJ_CLASS2_H
+
+#include "pair.h"
+
+class PairLJClass2 : public Pair {
+ public:
+  PairLJClass2() {}
+  ~PairLJClass2();
+  void compute(int, int);
+  void settings(int, char **);
+  void coeff(int, char **);
+  double init_one(int, int);
+  void write_restart(FILE *);
+  void read_restart(FILE *);
+  void write_restart_settings(FILE *);
+  void read_restart_settings(FILE *);
+  void single(int, int, int, int, double, double, double, int, One &);
+
+ private:
+  double cut_global;
+  double **cut;
+  double **epsilon,**sigma;
+  double **lj1,**lj2,**lj3,**lj4,**offset;
+
+  void allocate();
+};
+
+#endif
--- a/src/CLASS2/pair_lj_class2_coul_cut.cpp
+++ b/src/CLASS2/pair_lj_class2_coul_cut.cpp
@ -0,0 +1,452 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "pair_lj_class2_coul_cut.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "update.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "error.h"
+
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+
+/* ----------------------------------------------------------------------
+   free all arrays 
+------------------------------------------------------------------------- */
+
+PairLJClass2CoulCut::~PairLJClass2CoulCut()
+{
+  if (allocated) {
+    memory->destroy_2d_int_array(setflag);
+    memory->destroy_2d_double_array(cutsq);
+
+    memory->destroy_2d_double_array(cut_lj);
+    memory->destroy_2d_double_array(cut_ljsq);
+    memory->destroy_2d_double_array(cut_coul);
+    memory->destroy_2d_double_array(cut_coulsq);
+    memory->destroy_2d_double_array(epsilon);
+    memory->destroy_2d_double_array(sigma);
+    memory->destroy_2d_double_array(lj1);
+    memory->destroy_2d_double_array(lj2);
+    memory->destroy_2d_double_array(lj3);
+    memory->destroy_2d_double_array(lj4);
+    memory->destroy_2d_double_array(offset);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJClass2CoulCut::compute(int eflag, int vflag)
+{
+  int i,j,k,numneigh,itype,jtype;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz;
+  double rsq,rinv,r2inv,r3inv,r6inv,forcecoul,forcelj,fforce;
+  double factor_coul,factor_lj,factor,phicoul,philj;
+  int *neighs;
+  double **f;
+
+  eng_vdwl = eng_coul = 0.0;
+  if (vflag) for (i = 0; i < 6; i++) virial[i] = 0.0;
+
+  if (vflag == 2) f = update->f_pair;
+  else f = atom->f;
+  double **x = atom->x;
+  double *q = atom->q;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  int nall = atom->nlocal + atom->nghost;
+  double *special_coul = force->special_coul;
+  double *special_lj = force->special_lj;
+  int newton_pair = force->newton_pair;
+  double qqrd2e = force->qqrd2e;
+
+  // loop over neighbors of my atoms
+
+  for (i = 0; i < nlocal; i++) {
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    neighs = neighbor->firstneigh[i];
+    numneigh = neighbor->numneigh[i];
+
+    for (k = 0; k < numneigh; k++) {
+      j = neighs[k];
+
+      if (j < nall) factor_coul = factor_lj = 1.0;
+      else {
+	factor_coul = special_coul[j/nall];
+	factor_lj = special_lj[j/nall];
+	j %= nall;
+      }
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+	r2inv = 1.0/rsq;
+
+	if (rsq < cut_coulsq[itype][jtype])
+	  forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
+	else forcecoul = 0.0;
+
+	if (rsq < cut_ljsq[itype][jtype]) {
+	  rinv = sqrt(r2inv);
+	  r3inv = r2inv*rinv;
+	  r6inv = r3inv*r3inv;
+	  forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
+	} else forcelj = 0.0;
+
+	fforce = (factor_coul*forcecoul + factor_lj*forcelj) * r2inv;
+
+	f[i][0] += delx*fforce;
+	f[i][1] += dely*fforce;
+	f[i][2] += delz*fforce;
+	if (newton_pair || j < nlocal) {
+	  f[j][0] -= delx*fforce;
+	  f[j][1] -= dely*fforce;
+	  f[j][2] -= delz*fforce;
+	}
+
+	if (eflag) {
+	  if (newton_pair || j < nlocal) factor = 1.0;
+	  else factor = 0.5;
+	  if (rsq < cut_coulsq[itype][jtype]) {
+	    phicoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
+	    eng_coul += factor*factor_coul*phicoul;
+	  }
+	  if (rsq < cut_ljsq[itype][jtype]) {
+	    philj = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
+	      offset[itype][jtype];
+	    eng_vdwl += factor*factor_lj*philj;
+	  }
+	}
+
+	if (vflag == 1) {
+	  if (newton_pair || j < nlocal) {
+	    virial[0] += delx*delx*fforce;
+	    virial[1] += dely*dely*fforce;
+	    virial[2] += delz*delz*fforce;
+	    virial[3] += delx*dely*fforce;
+	    virial[4] += delx*delz*fforce;
+	    virial[5] += dely*delz*fforce;
+	  } else {
+	    virial[0] += 0.5*delx*delx*fforce;
+	    virial[1] += 0.5*dely*dely*fforce;
+	    virial[2] += 0.5*delz*delz*fforce;
+	    virial[3] += 0.5*delx*dely*fforce;
+	    virial[4] += 0.5*delx*delz*fforce;
+	    virial[5] += 0.5*dely*delz*fforce;
+	  }
+	}
+      }
+    }
+  }
+  if (vflag == 2) virial_compute();
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays 
+------------------------------------------------------------------------- */
+
+void PairLJClass2CoulCut::allocate()
+{
+  allocated = 1;
+  int n = atom->ntypes;
+
+  setflag = memory->create_2d_int_array(n+1,n+1,"pair:setflag");
+  for (int i = 1; i <= n; i++)
+    for (int j = i; j <= n; j++)
+      setflag[i][j] = 0;
+
+  cutsq = memory->create_2d_double_array(n+1,n+1,"pair:cutsq");
+
+  cut_lj = memory->create_2d_double_array(n+1,n+1,"pair:cut_lj");
+  cut_ljsq = memory->create_2d_double_array(n+1,n+1,"pair:cut_ljsq");
+  cut_coul = memory->create_2d_double_array(n+1,n+1,"pair:cut_coul");
+  cut_coulsq = memory->create_2d_double_array(n+1,n+1,"pair:cut_coulsq");
+  epsilon = memory->create_2d_double_array(n+1,n+1,"pair:epsilon");
+  sigma = memory->create_2d_double_array(n+1,n+1,"pair:sigma");
+  lj1 = memory->create_2d_double_array(n+1,n+1,"pair:lj1");
+  lj2 = memory->create_2d_double_array(n+1,n+1,"pair:lj2");
+  lj3 = memory->create_2d_double_array(n+1,n+1,"pair:lj3");
+  lj4 = memory->create_2d_double_array(n+1,n+1,"pair:lj4");
+  offset = memory->create_2d_double_array(n+1,n+1,"pair:offset");
+}
+
+/* ----------------------------------------------------------------------
+   global settings 
+------------------------------------------------------------------------- */
+
+void PairLJClass2CoulCut::settings(int narg, char **arg)
+{
+  if (narg < 1 || narg > 2) error->all("Illegal pair_style command");
+
+  cut_lj_global = atof(arg[0]);
+  if (narg == 1) cut_coul_global = cut_lj_global;
+  else cut_coul_global = atof(arg[1]);
+
+  // reset cutoffs that have been explicitly set
+
+  if (allocated) {
+    int i,j;
+    for (i = 1; i <= atom->ntypes; i++)
+      for (j = i+1; j <= atom->ntypes; j++)
+	if (setflag[i][j]) {
+	  cut_lj[i][j] = cut_lj_global;
+	  cut_coul[i][j] = cut_coul_global;
+	}
+  }
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more type pairs
+------------------------------------------------------------------------- */
+
+void PairLJClass2CoulCut::coeff(int narg, char **arg)
+{
+  if (narg < 4 || narg > 6) error->all("Incorrect args for pair coefficients");
+  if (!allocated) allocate();
+
+  int ilo,ihi,jlo,jhi;
+  force->bounds(arg[0],atom->ntypes,ilo,ihi);
+  force->bounds(arg[1],atom->ntypes,jlo,jhi);
+
+  double epsilon_one = atof(arg[2]);
+  double sigma_one = atof(arg[3]);
+
+  double cut_lj_one = cut_lj_global;
+  double cut_coul_one = cut_coul_global;
+  if (narg >= 5) cut_coul_one = cut_lj_one = atof(arg[4]);
+  if (narg == 6) cut_coul_one = atof(arg[5]);
+
+  int count = 0;
+  for (int i = ilo; i <= ihi; i++) {
+    for (int j = MAX(jlo,i); j <= jhi; j++) {
+      epsilon[i][j] = epsilon_one;
+      sigma[i][j] = sigma_one;
+      cut_lj[i][j] = cut_lj_one;
+      cut_coul[i][j] = cut_coul_one;
+      setflag[i][j] = 1;
+      count++;
+    }
+  }
+
+  if (count == 0) error->all("Incorrect args for pair coefficients");
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+double PairLJClass2CoulCut::init_one(int i, int j)
+{
+  // always mix epsilon,sigma via sixthpower rules
+  // mix distance via user-defined rule
+
+  if (setflag[i][j] == 0) {
+    epsilon[i][j] = 2.0 * sqrt(epsilon[i][i]*epsilon[j][j]) *
+      pow(sigma[i][i],3.0) * pow(sigma[j][j],3.0) / 
+      (pow(sigma[i][i],6.0) + pow(sigma[j][j],6.0));
+    sigma[i][j] = 
+      pow((0.5 * (pow(sigma[i][i],6.0) + pow(sigma[j][j],6.0))),1.0/6.0);
+    cut_lj[i][j] = mix_distance(cut_lj[i][i],cut_lj[j][j]);
+    cut_coul[i][j] = mix_distance(cut_coul[i][i],cut_coul[j][j]);
+  }
+
+  double cut = MAX(cut_lj[i][j],cut_coul[i][j]);
+  cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
+  cut_coulsq[i][j] = cut_coul[i][j] * cut_coul[i][j];
+
+  lj1[i][j] = 18.0 * epsilon[i][j] * pow(sigma[i][j],9.0);
+  lj2[i][j] = 18.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
+  lj3[i][j] = 2.0 * epsilon[i][j] * pow(sigma[i][j],9.0);
+  lj4[i][j] = 3.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
+
+  if (offset_flag) {
+    double ratio = sigma[i][j] / cut_lj[i][j];
+    offset[i][j] = epsilon[i][j] * (2.0*pow(ratio,9.0) - 3.0*pow(ratio,6.0));
+  } else offset[i][j] = 0.0;
+
+  cut_ljsq[j][i] = cut_ljsq[i][j];
+  cut_coulsq[j][i] = cut_coulsq[i][j];
+  lj1[j][i] = lj1[i][j];
+  lj2[j][i] = lj2[i][j];
+  lj3[j][i] = lj3[i][j];
+  lj4[j][i] = lj4[i][j];
+  offset[j][i] = offset[i][j];
+
+  // compute I,J contribution to long-range tail correction
+  // count total # of atoms of type I and J via Allreduce
+
+  if (tail_flag) {
+    int *type = atom->type;
+    int nlocal = atom->nlocal;
+
+    double count[2],all[2];
+    count[0] = count[1] = 0.0;
+    for (int k = 0; k < nlocal; k++) {
+      if (type[k] == i) count[0] += 1.0;
+      if (type[k] == j) count[1] += 1.0;
+    }
+    MPI_Allreduce(count,all,2,MPI_DOUBLE,MPI_SUM,world);
+        
+    double PI = 4.0*atan(1.0);
+    double sig3 = sigma[i][j]*sigma[i][j]*sigma[i][j];
+    double sig6 = sig3*sig3;
+    double rc3 = cut_lj[i][j]*cut_lj[i][j]*cut_lj[i][j];
+    double rc6 = rc3*rc3;
+    etail_ij = 2.0*PI*all[0]*all[1]*epsilon[i][j] *
+      sig6 * (sig3 - 3.0*rc3) / (3.0*rc6);
+    ptail_ij = 2.0*PI*all[0]*all[1]*epsilon[i][j] * 
+      sig6 * (sig3 - 2.0*rc3) / rc6;
+  } 
+
+  return cut;
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairLJClass2CoulCut::init_style()
+{
+  // require an atom style with charge defined
+
+  if (atom->charge_allow == 0)
+    error->all("Must use charged atom style with this pair style");
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file 
+------------------------------------------------------------------------- */
+
+void PairLJClass2CoulCut::write_restart(FILE *fp)
+{
+  write_restart_settings(fp);
+
+  int i,j;
+  for (i = 1; i <= atom->ntypes; i++)
+    for (j = i; j <= atom->ntypes; j++) {
+      fwrite(&setflag[i][j],sizeof(int),1,fp);
+      if (setflag[i][j]) {
+	fwrite(&epsilon[i][j],sizeof(double),1,fp);
+	fwrite(&sigma[i][j],sizeof(double),1,fp);
+	fwrite(&cut_lj[i][j],sizeof(double),1,fp);
+	fwrite(&cut_coul[i][j],sizeof(double),1,fp);
+      }
+    }
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairLJClass2CoulCut::read_restart(FILE *fp)
+{
+  read_restart_settings(fp);
+  allocate();
+
+  int i,j;
+  int me = comm->me;
+  for (i = 1; i <= atom->ntypes; i++)
+    for (j = i; j <= atom->ntypes; j++) {
+      if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
+      MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
+      if (setflag[i][j]) {
+	if (me == 0) {
+	  fread(&epsilon[i][j],sizeof(double),1,fp);
+	  fread(&sigma[i][j],sizeof(double),1,fp);
+	  fread(&cut_lj[i][j],sizeof(double),1,fp);
+	  fread(&cut_coul[i][j],sizeof(double),1,fp);
+	}
+	MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
+	MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
+	MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world);
+	MPI_Bcast(&cut_coul[i][j],1,MPI_DOUBLE,0,world);
+      }
+    }
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairLJClass2CoulCut::write_restart_settings(FILE *fp)
+{
+  fwrite(&cut_lj_global,sizeof(double),1,fp);
+  fwrite(&cut_coul_global,sizeof(double),1,fp);
+  fwrite(&offset_flag,sizeof(int),1,fp);
+  fwrite(&mix_flag,sizeof(int),1,fp);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairLJClass2CoulCut::read_restart_settings(FILE *fp)
+{
+  if (comm->me == 0) {
+    fread(&cut_lj_global,sizeof(double),1,fp);
+    fread(&cut_coul_global,sizeof(double),1,fp);
+    fread(&offset_flag,sizeof(int),1,fp);
+    fread(&mix_flag,sizeof(int),1,fp);
+  }
+  MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&cut_coul_global,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
+  MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJClass2CoulCut::single(int i, int j, int itype, int jtype,
+				 double rsq, double factor_coul,
+				 double factor_lj, int eflag, One &one)
+{
+  double r2inv,rinv,r3inv,r6inv,forcecoul,forcelj,phicoul,philj;
+
+  r2inv = 1.0/rsq;
+  if (rsq < cut_coulsq[itype][jtype])
+    forcecoul = force->qqrd2e * atom->q[i]*atom->q[j]*sqrt(r2inv);
+  else forcecoul = 0.0;
+  if (rsq < cut_ljsq[itype][jtype]) {
+    rinv = sqrt(r2inv);
+    r3inv = r2inv*rinv;
+    r6inv = r3inv*r3inv;
+    forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
+  } else forcelj = 0.0;
+  one.fforce = (factor_coul*forcecoul + factor_lj*forcelj) * r2inv;
+
+  if (eflag) {
+    if (rsq < cut_coulsq[itype][jtype]) {
+      phicoul = force->qqrd2e * atom->q[i]*atom->q[j]*sqrt(r2inv);
+      one.eng_coul = factor_coul*phicoul;
+    } else one.eng_coul = 0.0;
+    if (rsq < cut_ljsq[itype][jtype]) {
+      philj = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
+	offset[itype][jtype];
+      one.eng_vdwl = factor_lj*philj;
+    } else one.eng_vdwl = 0.0;
+  }
+}
--- a/src/CLASS2/pair_lj_class2_coul_cut.h
+++ b/src/CLASS2/pair_lj_class2_coul_cut.h
@ -0,0 +1,44 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef PAIR_LJ_CLASS2_COUL_CUT_H
+#define PAIR_LJ_CLASS2_COUL_CUT_H
+
+#include "pair.h"
+
+class PairLJClass2CoulCut : public Pair {
+ public:
+  PairLJClass2CoulCut() {}
+  ~PairLJClass2CoulCut();
+  void compute(int, int);
+  void settings(int, char **);
+  void coeff(int, char **);
+  double init_one(int, int);
+  void init_style();
+  void write_restart(FILE *);
+  void read_restart(FILE *);
+  void write_restart_settings(FILE *);
+  void read_restart_settings(FILE *);
+  void single(int, int, int, int, double, double, double, int, One &);
+
+ private:
+  double cut_lj_global,cut_coul_global;
+  double **cut_lj,**cut_ljsq;
+  double **cut_coul,**cut_coulsq;
+  double **epsilon,**sigma;
+  double **lj1,**lj2,**lj3,**lj4,**offset;
+
+  void allocate();
+};
+
+#endif
--- a/src/CLASS2/pair_lj_class2_coul_long.cpp
+++ b/src/CLASS2/pair_lj_class2_coul_long.cpp
@ -0,0 +1,476 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "pair_lj_class2_coul_long.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "kspace.h"
+#include "update.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "error.h"
+
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+
+#define EWALD_F   1.12837917
+#define EWALD_P   0.3275911
+#define A1        0.254829592
+#define A2       -0.284496736
+#define A3        1.421413741
+#define A4       -1.453152027
+#define A5        1.061405429
+
+/* ----------------------------------------------------------------------
+   free all arrays 
+------------------------------------------------------------------------- */
+
+PairLJClass2CoulLong::~PairLJClass2CoulLong()
+{
+  if (allocated) {
+    memory->destroy_2d_int_array(setflag);
+    memory->destroy_2d_double_array(cutsq);
+
+    memory->destroy_2d_double_array(cut_lj);
+    memory->destroy_2d_double_array(cut_ljsq);
+    memory->destroy_2d_double_array(epsilon);
+    memory->destroy_2d_double_array(sigma);
+    memory->destroy_2d_double_array(lj1);
+    memory->destroy_2d_double_array(lj2);
+    memory->destroy_2d_double_array(lj3);
+    memory->destroy_2d_double_array(lj4);
+    memory->destroy_2d_double_array(offset);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJClass2CoulLong::compute(int eflag, int vflag)
+{
+  int i,j,k,numneigh,itype,jtype;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz;
+  double rsq,r,rinv,r2inv,r3inv,r6inv,forcecoul,forcelj,fforce;
+  double grij,expm2,prefactor,t,erfc;
+  double factor_coul,factor_lj,factor,phicoul,philj;
+  int *neighs;
+  double **f;
+
+  eng_vdwl = eng_coul = 0.0;
+  if (vflag) for (i = 0; i < 6; i++) virial[i] = 0.0;
+
+  if (vflag == 2) f = update->f_pair;
+  else f = atom->f;
+  double **x = atom->x;
+  double *q = atom->q;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  int nall = atom->nlocal + atom->nghost;
+  double *special_coul = force->special_coul;
+  double *special_lj = force->special_lj;
+  int newton_pair = force->newton_pair;
+  double qqrd2e = force->qqrd2e;
+
+  // loop over neighbors of my atoms
+
+  for (i = 0; i < nlocal; i++) {
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    neighs = neighbor->firstneigh[i];
+    numneigh = neighbor->numneigh[i];
+
+    for (k = 0; k < numneigh; k++) {
+      j = neighs[k];
+
+      if (j < nall) factor_coul = factor_lj = 1.0;
+      else {
+	factor_coul = special_coul[j/nall];
+	factor_lj = special_lj[j/nall];
+	j %= nall;
+      }
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+	r2inv = 1.0/rsq;
+
+	if (rsq < cut_coulsq) {
+	  r = sqrt(rsq);
+	  grij = g_ewald * r;
+	  expm2 = exp(-grij*grij);
+	  t = 1.0 / (1.0 + EWALD_P*grij);
+	  erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+	  prefactor = qqrd2e * qtmp*q[j]/r;
+	  forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+	  if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+	} else forcecoul = 0.0;
+
+	if (rsq < cut_ljsq[itype][jtype]) {
+	  rinv = sqrt(r2inv);
+	  r3inv = r2inv*rinv;
+	  r6inv = r3inv*r3inv;
+	  forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
+	} else forcelj = 0.0;
+
+	fforce = (forcecoul + factor_lj*forcelj) * r2inv;
+
+	f[i][0] += delx*fforce;
+	f[i][1] += dely*fforce;
+	f[i][2] += delz*fforce;
+	if (newton_pair || j < nlocal) {
+	  f[j][0] -= delx*fforce;
+	  f[j][1] -= dely*fforce;
+	  f[j][2] -= delz*fforce;
+	}
+
+	if (eflag) {
+	  if (newton_pair || j < nlocal) factor = 1.0;
+	  else factor = 0.5;
+	  if (rsq < cut_coulsq) {
+	    phicoul = prefactor*erfc;
+	    if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor;
+	    eng_coul += factor*phicoul;
+	  }
+	  if (rsq < cut_ljsq[itype][jtype]) {
+	    philj = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
+	      offset[itype][jtype];
+	    eng_vdwl += factor*factor_lj*philj;
+	  }
+	}
+
+	if (vflag == 1) {
+	  if (newton_pair || j < nlocal) {
+	    virial[0] += delx*delx*fforce;
+	    virial[1] += dely*dely*fforce;
+	    virial[2] += delz*delz*fforce;
+	    virial[3] += delx*dely*fforce;
+	    virial[4] += delx*delz*fforce;
+	    virial[5] += dely*delz*fforce;
+	  } else {
+	    virial[0] += 0.5*delx*delx*fforce;
+	    virial[1] += 0.5*dely*dely*fforce;
+	    virial[2] += 0.5*delz*delz*fforce;
+	    virial[3] += 0.5*delx*dely*fforce;
+	    virial[4] += 0.5*delx*delz*fforce;
+	    virial[5] += 0.5*dely*delz*fforce;
+	  }
+	}
+      }
+    }
+  }
+  if (vflag == 2) virial_compute();
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays 
+------------------------------------------------------------------------- */
+
+void PairLJClass2CoulLong::allocate()
+{
+  allocated = 1;
+  int n = atom->ntypes;
+
+  setflag = memory->create_2d_int_array(n+1,n+1,"pair:setflag");
+  for (int i = 1; i <= n; i++)
+    for (int j = i; j <= n; j++)
+      setflag[i][j] = 0;
+
+  cutsq = memory->create_2d_double_array(n+1,n+1,"pair:cutsq");
+
+  cut_lj = memory->create_2d_double_array(n+1,n+1,"pair:cut_lj");
+  cut_ljsq = memory->create_2d_double_array(n+1,n+1,"pair:cut_ljsq");
+  epsilon = memory->create_2d_double_array(n+1,n+1,"pair:epsilon");
+  sigma = memory->create_2d_double_array(n+1,n+1,"pair:sigma");
+  lj1 = memory->create_2d_double_array(n+1,n+1,"pair:lj1");
+  lj2 = memory->create_2d_double_array(n+1,n+1,"pair:lj2");
+  lj3 = memory->create_2d_double_array(n+1,n+1,"pair:lj3");
+  lj4 = memory->create_2d_double_array(n+1,n+1,"pair:lj4");
+  offset = memory->create_2d_double_array(n+1,n+1,"pair:offset");
+}
+
+/* ----------------------------------------------------------------------
+   global settings 
+------------------------------------------------------------------------- */
+
+void PairLJClass2CoulLong::settings(int narg, char **arg)
+{
+  if (narg < 1 || narg > 2) error->all("Illegal pair_style command");
+
+  cut_lj_global = atof(arg[0]);
+  if (narg == 1) cut_coul = cut_lj_global;
+  else cut_coul = atof(arg[1]);
+
+  // reset cutoffs that have been explicitly set
+
+  if (allocated) {
+    int i,j;
+    for (i = 1; i <= atom->ntypes; i++)
+      for (j = i+1; j <= atom->ntypes; j++)
+	if (setflag[i][j]) cut_lj[i][j] = cut_lj_global;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more type pairs
+------------------------------------------------------------------------- */
+
+void PairLJClass2CoulLong::coeff(int narg, char **arg)
+{
+  if (narg < 4 || narg > 6) error->all("Incorrect args for pair coefficients");
+  if (!allocated) allocate();
+
+  int ilo,ihi,jlo,jhi;
+  force->bounds(arg[0],atom->ntypes,ilo,ihi);
+  force->bounds(arg[1],atom->ntypes,jlo,jhi);
+
+  double epsilon_one = atof(arg[2]);
+  double sigma_one = atof(arg[3]);
+
+ double cut_lj_one = cut_lj_global;
+ if (narg == 5) cut_lj_one = atof(arg[4]);
+
+  int count = 0;
+  for (int i = ilo; i <= ihi; i++) {
+    for (int j = MAX(jlo,i); j <= jhi; j++) {
+      epsilon[i][j] = epsilon_one;
+      sigma[i][j] = sigma_one;
+      cut_lj[i][j] = cut_lj_one;
+      setflag[i][j] = 1;
+      count++;
+    }
+  }
+
+  if (count == 0) error->all("Incorrect args for pair coefficients");
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+double PairLJClass2CoulLong::init_one(int i, int j)
+{
+  // always mix epsilon,sigma via sixthpower rules
+  // mix distance via user-defined rule
+
+  if (setflag[i][j] == 0) {
+    epsilon[i][j] = 2.0 * sqrt(epsilon[i][i]*epsilon[j][j]) *
+      pow(sigma[i][i],3.0) * pow(sigma[j][j],3.0) / 
+      (pow(sigma[i][i],6.0) + pow(sigma[j][j],6.0));
+    sigma[i][j] = 
+      pow((0.5 * (pow(sigma[i][i],6.0) + pow(sigma[j][j],6.0))),1.0/6.0);
+    cut_lj[i][j] = mix_distance(cut_lj[i][i],cut_lj[j][j]);
+  }
+
+  double cut = MAX(cut_lj[i][j],cut_coul);
+  cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
+
+  lj1[i][j] = 18.0 * epsilon[i][j] * pow(sigma[i][j],9.0);
+  lj2[i][j] = 18.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
+  lj3[i][j] = 2.0 * epsilon[i][j] * pow(sigma[i][j],9.0);
+  lj4[i][j] = 3.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
+
+  if (offset_flag) {
+    double ratio = sigma[i][j] / cut_lj[i][j];
+    offset[i][j] = epsilon[i][j] * (2.0*pow(ratio,9.0) - 3.0*pow(ratio,6.0));
+  } else offset[i][j] = 0.0;
+
+  cut_ljsq[j][i] = cut_ljsq[i][j];
+  lj1[j][i] = lj1[i][j];
+  lj2[j][i] = lj2[i][j];
+  lj3[j][i] = lj3[i][j];
+  lj4[j][i] = lj4[i][j];
+  offset[j][i] = offset[i][j];
+
+  // compute I,J contribution to long-range tail correction
+  // count total # of atoms of type I and J via Allreduce
+
+  if (tail_flag) {
+    int *type = atom->type;
+    int nlocal = atom->nlocal;
+
+    double count[2],all[2];
+    count[0] = count[1] = 0.0;
+    for (int k = 0; k < nlocal; k++) {
+      if (type[k] == i) count[0] += 1.0;
+      if (type[k] == j) count[1] += 1.0;
+    }
+    MPI_Allreduce(count,all,2,MPI_DOUBLE,MPI_SUM,world);
+        
+    double PI = 4.0*atan(1.0);
+    double sig3 = sigma[i][j]*sigma[i][j]*sigma[i][j];
+    double sig6 = sig3*sig3;
+    double rc3 = cut_lj[i][j]*cut_lj[i][j]*cut_lj[i][j];
+    double rc6 = rc3*rc3;
+    etail_ij = 2.0*PI*all[0]*all[1]*epsilon[i][j] *
+      sig6 * (sig3 - 3.0*rc3) / (3.0*rc6);
+    ptail_ij = 2.0*PI*all[0]*all[1]*epsilon[i][j] * 
+      sig6 * (sig3 - 2.0*rc3) / rc6;
+  } 
+
+  return cut;
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairLJClass2CoulLong::init_style()
+{
+  // require an atom style with charge defined
+
+  if (atom->charge_allow == 0)
+    error->all("Must use charged atom style with this pair style");
+
+  cut_coulsq = cut_coul * cut_coul;
+
+  // insure use of KSpace long-range solver, set g_ewald
+
+ if (force->kspace == NULL) 
+    error->all("Pair style is incompatible with KSpace style");
+  else if (strcmp(force->kspace_style,"ewald") == 0)
+    g_ewald = force->kspace->g_ewald;
+  else if (strcmp(force->kspace_style,"pppm") == 0)
+    g_ewald = force->kspace->g_ewald;
+  else error->all("Pair style is incompatible with KSpace style");
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file 
+------------------------------------------------------------------------- */
+
+void PairLJClass2CoulLong::write_restart(FILE *fp)
+{
+  write_restart_settings(fp);
+
+  int i,j;
+  for (i = 1; i <= atom->ntypes; i++)
+    for (j = i; j <= atom->ntypes; j++) {
+      fwrite(&setflag[i][j],sizeof(int),1,fp);
+      if (setflag[i][j]) {
+	fwrite(&epsilon[i][j],sizeof(double),1,fp);
+	fwrite(&sigma[i][j],sizeof(double),1,fp);
+	fwrite(&cut_lj[i][j],sizeof(double),1,fp);
+      }
+    }
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairLJClass2CoulLong::read_restart(FILE *fp)
+{
+  read_restart_settings(fp);
+  allocate();
+
+  int i,j;
+  int me = comm->me;
+  for (i = 1; i <= atom->ntypes; i++)
+    for (j = i; j <= atom->ntypes; j++) {
+      if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
+      MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
+      if (setflag[i][j]) {
+	if (me == 0) {
+	  fread(&epsilon[i][j],sizeof(double),1,fp);
+	  fread(&sigma[i][j],sizeof(double),1,fp);
+	  fread(&cut_lj[i][j],sizeof(double),1,fp);
+	}
+	MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
+	MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
+	MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world);
+      }
+    }
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairLJClass2CoulLong::write_restart_settings(FILE *fp)
+{
+  fwrite(&cut_lj_global,sizeof(double),1,fp);
+  fwrite(&cut_coul,sizeof(double),1,fp);
+  fwrite(&offset_flag,sizeof(int),1,fp);
+  fwrite(&mix_flag,sizeof(int),1,fp);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairLJClass2CoulLong::read_restart_settings(FILE *fp)
+{
+  if (comm->me == 0) {
+    fread(&cut_lj_global,sizeof(double),1,fp);
+    fread(&cut_coul,sizeof(double),1,fp);
+    fread(&offset_flag,sizeof(int),1,fp);
+    fread(&mix_flag,sizeof(int),1,fp);
+  }
+  MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
+  MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJClass2CoulLong::single(int i, int j, int itype, int jtype,
+				  double rsq, double factor_coul,
+				  double factor_lj, int eflag, One &one)
+{
+  double r2inv,r,rinv,r3inv,r6inv,grij,expm2,t,erfc,prefactor;
+  double forcecoul,forcelj,phicoul,philj;
+
+  r2inv = 1.0/rsq;
+  if (rsq < cut_coulsq) {
+    r = sqrt(rsq);
+    grij = g_ewald * r;
+    expm2 = exp(-grij*grij);
+    t = 1.0 / (1.0 + EWALD_P*grij);
+    erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+    prefactor = force->qqrd2e * atom->q[i]*atom->q[j]/r;
+    forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+    if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+  } else forcecoul = 0.0;
+  if (rsq < cut_ljsq[itype][jtype]) {
+    rinv = sqrt(r2inv);
+    r3inv = r2inv*rinv;
+    r6inv = r3inv*r3inv;
+    forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
+  } else forcelj = 0.0;
+  one.fforce = (forcecoul + factor_lj*forcelj) * r2inv;
+
+  if (eflag) {
+    if (rsq < cut_coulsq) {
+      phicoul = prefactor*erfc;
+      if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor;
+      one.eng_coul = phicoul;
+    } else one.eng_coul = 0.0;
+    if (rsq < cut_ljsq[itype][jtype]) {
+      philj = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
+	offset[itype][jtype];
+      one.eng_vdwl = factor_lj*philj;
+    } else one.eng_vdwl = 0.0;
+  }
+}
--- a/src/CLASS2/style_class2.h
+++ b/src/CLASS2/style_class2.h
@ -0,0 +1,56 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef AngleInclude
+#include "angle_class2.h"
+#endif
+
+#ifdef AngleClass
+AngleStyle(class2,AngleClass2)
+#endif
+
+#ifdef BondInclude
+#include "bond_class2.h"
+#endif
+
+#ifdef BondClass
+BondStyle(class2,BondClass2)
+#endif
+
+#ifdef DihedralInclude
+#include "dihedral_class2.h"
+#endif
+
+#ifdef DihedralClass
+DihedralStyle(class2,DihedralClass2)
+#endif
+
+#ifdef ImproperInclude
+#include "improper_class2.h"
+#endif
+
+#ifdef ImproperClass
+ImproperStyle(class2,ImproperClass2)
+#endif
+
+#ifdef PairInclude
+#include "pair_lj_class2.h"
+#include "pair_lj_class2_coul_cut.h"
+#include "pair_lj_class2_coul_long.h"
+#endif
+
+#ifdef PairClass
+PairStyle(lj/class2,PairLJClass2)
+PairStyle(lj/class2/coul/cut,PairLJClass2CoulCut)
+PairStyle(lj/class2/coul/long,PairLJClass2CoulLong)
+#endif
--- a/src/DPD/Install.csh
+++ b/src/DPD/Install.csh
@ -0,0 +1,24 @@
+# Install/unInstall package classes in LAMMPS
+
+if ($1 == 1) then
+
+  cp style_dpd.h ..
+
+  cp atom_dpd.cpp ..
+  cp pair_dpd.cpp ..
+
+  cp atom_dpd.h ..
+  cp pair_dpd.h ..
+
+else if ($1 == 0) then
+
+  rm ../style_dpd.h
+  touch ../style_dpd.h
+
+  rm ../atom_dpd.cpp
+  rm ../pair_dpd.cpp
+
+  rm ../atom_dpd.h
+  rm ../pair_dpd.h
+
+endif
--- a/src/DPD/atom_dpd.cpp
+++ b/src/DPD/atom_dpd.cpp
@ -0,0 +1,236 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "atom_dpd.h"
+#include "domain.h"
+#include "modify.h"
+#include "fix.h"
+
+/* ---------------------------------------------------------------------- */
+
+AtomDPD::AtomDPD(int narg, char **arg) : Atom(narg, arg) {}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomDPD::copy(int i, int j)
+{
+  tag[j] = tag[i];
+  type[j] = type[i];
+  mask[j] = mask[i];
+  image[j] = image[i];
+  x[j][0] = x[i][0];
+  x[j][1] = x[i][1];
+  x[j][2] = x[i][2];
+  v[j][0] = v[i][0];
+  v[j][1] = v[i][1];
+  v[j][2] = v[i][2];
+
+  if (nextra_grow)
+    for (int iextra = 0; iextra < nextra_grow; iextra++) 
+      modify->fix[extra_grow[iextra]]->copy_arrays(i,j);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomDPD::pack_comm(int n, int *list, double *buf, int *pbc_flags)
+{
+  int i,j,m;
+
+  m = 0;
+  if (pbc_flags[0] == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = x[j][0];
+      buf[m++] = x[j][1];
+      buf[m++] = x[j][2];
+      buf[m++] = v[j][0];
+      buf[m++] = v[j][1];
+      buf[m++] = v[j][2];
+    }
+  } else {
+    double xprd = domain->xprd;
+    double yprd = domain->yprd;
+    double zprd = domain->zprd;
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = x[j][0] + pbc_flags[1]*xprd;
+      buf[m++] = x[j][1] + pbc_flags[2]*yprd;
+      buf[m++] = x[j][2] + pbc_flags[3]*zprd;
+      buf[m++] = v[j][0];
+      buf[m++] = v[j][1];
+      buf[m++] = v[j][2];
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomDPD::unpack_comm(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    x[i][0] = buf[m++];
+    x[i][1] = buf[m++];
+    x[i][2] = buf[m++];
+    v[i][0] = buf[m++];
+    v[i][1] = buf[m++];
+    v[i][2] = buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomDPD::pack_reverse(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    buf[m++] = f[i][0];
+    buf[m++] = f[i][1];
+    buf[m++] = f[i][2];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomDPD::unpack_reverse(int n, int *list, double *buf)
+{
+  int i,j,m;
+
+  m = 0;
+  for (i = 0; i < n; i++) {
+    j = list[i];
+    f[j][0] += buf[m++];
+    f[j][1] += buf[m++];
+    f[j][2] += buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomDPD::pack_border(int n, int *list, double *buf, int *pbc_flags)
+{
+  int i,j,m;
+
+  m = 0;
+  if (pbc_flags[0] == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = x[j][0];
+      buf[m++] = x[j][1];
+      buf[m++] = x[j][2];
+      buf[m++] = tag[j];
+      buf[m++] = type[j];
+      buf[m++] = mask[j];
+      buf[m++] = v[j][0];
+      buf[m++] = v[j][1];
+      buf[m++] = v[j][2];
+    }
+  } else {
+    double xprd = domain->xprd;
+    double yprd = domain->yprd;
+    double zprd = domain->zprd;
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = x[j][0] + pbc_flags[1]*xprd;
+      buf[m++] = x[j][1] + pbc_flags[2]*yprd;
+      buf[m++] = x[j][2] + pbc_flags[3]*zprd;
+      buf[m++] = tag[j];
+      buf[m++] = type[j];
+      buf[m++] = mask[j];
+      buf[m++] = v[j][0];
+      buf[m++] = v[j][1];
+      buf[m++] = v[j][2];
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomDPD::unpack_border(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    if (i == nmax) grow(0);
+    x[i][0] = buf[m++];
+    x[i][1] = buf[m++];
+    x[i][2] = buf[m++];
+    tag[i] = static_cast<int> (buf[m++]);
+    type[i] = static_cast<int> (buf[m++]);
+    mask[i] = static_cast<int> (buf[m++]);
+    v[i][0] = buf[m++];
+    v[i][1] = buf[m++];
+    v[i][2] = buf[m++];
+  }
+}
+
+/* ----------------------------------------------------------------------
+   pack all atom quantities for shipping to another proc
+   xyz must be 1st 3 values, so that comm::exchange can test on them 
+------------------------------------------------------------------------- */
+
+int AtomDPD::pack_exchange(int i, double *buf)
+{
+  int m = 1;
+  buf[m++] = x[i][0];
+  buf[m++] = x[i][1];
+  buf[m++] = x[i][2];
+  buf[m++] = v[i][0];
+  buf[m++] = v[i][1];
+  buf[m++] = v[i][2];
+  buf[m++] = tag[i];
+  buf[m++] = type[i];
+  buf[m++] = mask[i];
+  buf[m++] = image[i];
+
+  if (nextra_grow)
+    for (int iextra = 0; iextra < nextra_grow; iextra++) 
+      m += modify->fix[extra_grow[iextra]]->pack_exchange(i,&buf[m]);
+
+  buf[0] = m;
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomDPD::unpack_exchange(double *buf)
+{
+  if (nlocal == nmax) grow(0);
+
+  int m = 1;
+  x[nlocal][0] = buf[m++];
+  x[nlocal][1] = buf[m++];
+  x[nlocal][2] = buf[m++];
+  v[nlocal][0] = buf[m++];
+  v[nlocal][1] = buf[m++];
+  v[nlocal][2] = buf[m++];
+  tag[nlocal] = static_cast<int> (buf[m++]);
+  type[nlocal] = static_cast<int> (buf[m++]);
+  mask[nlocal] = static_cast<int> (buf[m++]);
+  image[nlocal] = static_cast<int> (buf[m++]);
+
+  if (nextra_grow)
+    for (int iextra = 0; iextra < nextra_grow; iextra++) 
+      m += modify->fix[extra_grow[iextra]]->unpack_exchange(nlocal,&buf[m]);
+
+  nlocal++;
+  return m;
+}
--- a/src/DPD/atom_dpd.h
+++ b/src/DPD/atom_dpd.h
@ -0,0 +1,34 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef ATOM_DPD_H
+#define ATOM_DPD_H
+
+#include "atom.h"
+
+class AtomDPD : public Atom {
+ public:
+  AtomDPD(int, char **);
+  ~AtomDPD() {}
+  void copy(int, int);
+  void pack_comm(int, int *, double *, int *);
+  void unpack_comm(int, int, double *);
+  void pack_reverse(int, int, double *);
+  void unpack_reverse(int, int *, double *);
+  void pack_border(int, int *, double *, int *);
+  void unpack_border(int, int, double *);
+  int pack_exchange(int, double *);
+  int unpack_exchange(double *);
+};
+
+#endif
--- a/src/DPD/pair_dpd.cpp
+++ b/src/DPD/pair_dpd.cpp
@ -0,0 +1,413 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Kurt Smith (U Pittsburgh)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "pair_dpd.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "update.h"
+#include "random_mars.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "error.h"
+
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+
+#define EPSILON 1.0e-10
+
+/* ---------------------------------------------------------------------- */
+
+PairDPD::PairDPD()
+{
+  random = NULL;
+}
+
+/* ----------------------------------------------------------------------
+   free all arrays 
+------------------------------------------------------------------------- */
+
+PairDPD::~PairDPD()
+{
+  if (allocated) {
+    memory->destroy_2d_int_array(setflag);
+    memory->destroy_2d_double_array(cutsq);
+
+    memory->destroy_2d_double_array(cut);
+    memory->destroy_2d_double_array(a0);
+    memory->destroy_2d_double_array(gamma);
+    memory->destroy_2d_double_array(sigma);
+  }
+
+  if (random) delete random;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairDPD::compute(int eflag, int vflag)
+{
+  int i,j,k,numneigh,itype,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz,vxtmp,vytmp,vztmp,delvx,delvy,delvz;
+  double rsq,r,rinv,dot,wd,randnum,fforce,factor_dpd,phi;
+  int *neighs;
+  double **f;
+
+  eng_vdwl = 0.0;
+  if (vflag) for (i = 0; i < 6; i++) virial[i] = 0.0;
+
+  if (vflag == 2) f = update->f_pair;
+  else f = atom->f;
+  double **x = atom->x;
+  double **v = atom->v;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  int nall = atom->nlocal + atom->nghost;
+  double *special_lj = force->special_lj;
+  int newton_pair = force->newton_pair;
+  double dtinvsqrt = 1.0/sqrt(update->dt);
+
+  // loop over neighbors of my atoms
+
+  for (i = 0; i < nlocal; i++) {
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    vxtmp = v[i][0];
+    vytmp = v[i][1];
+    vztmp = v[i][2];
+    itype = type[i];
+    neighs = neighbor->firstneigh[i];
+    numneigh = neighbor->numneigh[i];
+
+    for (k = 0; k < numneigh; k++) {
+      j = neighs[k];
+
+      if (j < nall) factor_dpd = 1.0;
+      else {
+	factor_dpd = special_lj[j/nall];
+	j %= nall;
+      }
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+	r = sqrt(rsq);
+	if (r < EPSILON) continue;     // r can be 0.0 in DPD systems
+	rinv = 1.0/r;
+	delvx = vxtmp - v[j][0];
+	delvy = vytmp - v[j][1];
+	delvz = vztmp - v[j][2];
+	dot = delx*delvx + dely*delvy + delz*delvz;
+	wd = 1.0 - r/cut[itype][jtype];
+	randnum = random->gaussian();
+
+	// conservative force = a0 * wd
+	// drag force = -gamma * wd^2 * (delx dot delv) / r
+	// random force = sigma * wd * rnd * dtinvsqrt;
+
+	fforce = a0[itype][jtype]*wd;
+	fforce -= gamma[itype][jtype]*wd*wd*dot*rinv;
+	fforce += sigma[itype][jtype]*wd*randnum*dtinvsqrt;
+	fforce *= factor_dpd*rinv;	
+
+	f[i][0] += delx*fforce;
+	f[i][1] += dely*fforce;
+	f[i][2] += delz*fforce;
+	if (newton_pair || j < nlocal) {
+	  f[j][0] -= delx*fforce;
+	  f[j][1] -= dely*fforce;
+	  f[j][2] -= delz*fforce;
+	}
+
+	if (eflag) {
+	  phi = a0[itype][jtype] * r * (1.0 - 0.5*r/cut[itype][jtype]);
+	  if (newton_pair || j < nlocal) eng_vdwl += factor_dpd*phi;
+	  else eng_vdwl += 0.5*factor_dpd*phi;
+	}
+
+	if (vflag == 1) {
+	  if (newton_pair || j < nlocal) {
+	    virial[0] += delx*delx*fforce;
+	    virial[1] += dely*dely*fforce;
+	    virial[2] += delz*delz*fforce;
+	    virial[3] += delx*dely*fforce;
+	    virial[4] += delx*delz*fforce;
+	    virial[5] += dely*delz*fforce;
+	  } else {
+	    virial[0] += 0.5*delx*delx*fforce;
+	    virial[1] += 0.5*dely*dely*fforce;
+	    virial[2] += 0.5*delz*delz*fforce;
+	    virial[3] += 0.5*delx*dely*fforce;
+	    virial[4] += 0.5*delx*delz*fforce;
+	    virial[5] += 0.5*dely*delz*fforce;
+	  }
+	}
+      }
+    }
+  }
+  if (vflag == 2) virial_compute();
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays 
+------------------------------------------------------------------------- */
+
+void PairDPD::allocate()
+{
+  allocated = 1;
+  int n = atom->ntypes;
+
+  setflag = memory->create_2d_int_array(n+1,n+1,"pair:setflag");
+  for (int i = 1; i <= n; i++)
+    for (int j = i; j <= n; j++)
+      setflag[i][j] = 0;
+
+  cutsq = memory->create_2d_double_array(n+1,n+1,"pair:cutsq");
+
+  cut = memory->create_2d_double_array(n+1,n+1,"pair:cut");
+  a0 = memory->create_2d_double_array(n+1,n+1,"pair:a0");
+  gamma = memory->create_2d_double_array(n+1,n+1,"pair:gamma");
+  sigma = memory->create_2d_double_array(n+1,n+1,"pair:sigma");
+}
+
+/* ----------------------------------------------------------------------
+   global settings 
+------------------------------------------------------------------------- */
+
+void PairDPD::settings(int narg, char **arg)
+{
+  if (narg != 3) error->all("Illegal pair_style command");
+
+  temperature = atof(arg[0]);
+  cut_global = atof(arg[1]);
+  seed = atoi(arg[2]);
+
+  // initialize Marsaglia RNG with processor-unique seed
+
+  if (seed <= 0 || seed > 900000000)
+    error->all("Illegal fix pair_style command");
+  if (random) delete random;
+  random = new RanMars(seed + comm->me);
+
+  // reset cutoffs that have been explicitly set
+
+  if (allocated) {
+    int i,j;
+    for (i = 1; i <= atom->ntypes; i++)
+      for (j = i+1; j <= atom->ntypes; j++)
+	if (setflag[i][j]) cut[i][j] = cut_global;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more type pairs
+------------------------------------------------------------------------- */
+
+void PairDPD::coeff(int narg, char **arg)
+{
+  if (narg < 4 || narg > 5) error->all("Incorrect args for pair coefficients");
+  if (!allocated) allocate();
+
+  int ilo,ihi,jlo,jhi;
+  force->bounds(arg[0],atom->ntypes,ilo,ihi);
+  force->bounds(arg[1],atom->ntypes,jlo,jhi);
+
+  double a0_one = atof(arg[2]);
+  double gamma_one = atof(arg[3]);
+
+  double cut_one = cut_global;
+  if (narg == 5) cut_one = atof(arg[4]);
+
+  int count = 0;
+  for (int i = ilo; i <= ihi; i++) {
+    for (int j = MAX(jlo,i); j <= jhi; j++) {
+      a0[i][j] = a0_one;
+      gamma[i][j] = gamma_one;
+      cut[i][j] = cut_one;
+      setflag[i][j] = 1;
+      count++;
+    }
+  }
+
+  if (count == 0) error->all("Incorrect args for pair coefficients");
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+double PairDPD::init_one(int i, int j)
+{
+  if (setflag[i][j] == 0) error->all("All pair coeffs are not set");
+
+  sigma[i][j] = sqrt(2.0*temperature*gamma[i][j]);
+     
+  cut[j][i] = cut[i][j];
+  a0[j][i] = a0[i][j];
+  gamma[j][i] = gamma[i][j];
+  sigma[j][i] = sigma[i][j];
+
+  return cut[i][j];
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairDPD::init_style()
+{
+  // check that atom style is dpd
+  // else compute() will not have ghost atom velocities
+
+  if (atom->check_style("dpd") == 0)
+    error->all("Must use atom style dpd with pair style dpd");
+
+  // if newton off, forces between atoms ij will be double computed
+  //   using different random numbers
+
+  if (force->newton_pair == 0 && comm->me == 0) error->warning(
+      "DPD potential needs newton pair on for momentum conservation");
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairDPD::write_restart(FILE *fp)
+{
+  write_restart_settings(fp);
+
+  int i,j;
+  for (i = 1; i <= atom->ntypes; i++)
+    for (j = i; j <= atom->ntypes; j++) {
+      fwrite(&setflag[i][j],sizeof(int),1,fp);
+      if (setflag[i][j]) {
+	fwrite(&a0[i][j],sizeof(double),1,fp);
+	fwrite(&gamma[i][j],sizeof(double),1,fp);
+	fwrite(&cut[i][j],sizeof(double),1,fp);
+      }
+    }
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairDPD::read_restart(FILE *fp)
+{
+  read_restart_settings(fp);
+
+  allocate();
+
+  int i,j;
+  int me = comm->me;
+  for (i = 1; i <= atom->ntypes; i++)
+    for (j = i; j <= atom->ntypes; j++) {
+      if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
+      MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
+      if (setflag[i][j]) {
+	if (me == 0) {
+	  fread(&a0[i][j],sizeof(double),1,fp);
+	  fread(&gamma[i][j],sizeof(double),1,fp);
+	  fread(&cut[i][j],sizeof(double),1,fp);
+	}
+	MPI_Bcast(&a0[i][j],1,MPI_DOUBLE,0,world);
+	MPI_Bcast(&gamma[i][j],1,MPI_DOUBLE,0,world);
+	MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
+      }
+    }
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairDPD::write_restart_settings(FILE *fp)
+{
+  fwrite(&temperature,sizeof(double),1,fp);
+  fwrite(&cut_global,sizeof(double),1,fp);
+  fwrite(&seed,sizeof(int),1,fp);
+  fwrite(&mix_flag,sizeof(int),1,fp);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairDPD::read_restart_settings(FILE *fp)
+{
+  if (comm->me == 0) {
+    fread(&temperature,sizeof(double),1,fp);
+    fread(&cut_global,sizeof(double),1,fp);
+    fread(&seed,sizeof(int),1,fp);
+    fread(&mix_flag,sizeof(int),1,fp);
+  }
+  MPI_Bcast(&temperature,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&seed,1,MPI_INT,0,world);
+  MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
+
+  // initialize Marsaglia RNG with processor-unique seed
+  // same seed that pair_style command initially specified
+
+  if (random) delete random;
+  random = new RanMars(seed + comm->me);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairDPD::single(int i, int j, int itype, int jtype, double rsq,
+		     double factor_coul, double factor_dpd, int eflag,
+		     One &one)
+{
+  double r,rinv,dot,wd,randnum,phi;
+
+  double delx = atom->x[i][0] - atom->x[j][0];
+  double dely = atom->x[i][1] - atom->x[j][1];
+  double delz = atom->x[i][2] - atom->x[j][2];
+  double delvx = atom->v[i][0] - atom->v[j][0];
+  double delvy = atom->v[i][1] - atom->v[j][1];
+  double delvz = atom->v[i][2] - atom->v[j][2];
+  double dtinvsqrt = 1.0/sqrt(update->dt);
+
+  r = sqrt(rsq);
+  if (r < EPSILON) {
+    one.fforce = 0.0;
+    if (eflag) one.eng_vdwl = one.eng_coul = 0.0;
+    return;
+  }
+
+  rinv = 1.0/r;
+  dot = delx*delvx + dely*delvy + delz*delvz;
+  wd = 1.0 - r/cut[itype][jtype];
+  randnum = random->gaussian();
+
+  one.fforce = a0[itype][jtype]*wd * factor_dpd*rinv;
+  
+  if (eflag) {
+    phi = a0[itype][jtype] * r * (1.0 - 0.5*r/cut[itype][jtype]);
+    one.eng_vdwl = factor_dpd*phi;
+    one.eng_coul = 0.0;
+  }
+}
--- a/src/DPD/pair_dpd.h
+++ b/src/DPD/pair_dpd.h
@ -0,0 +1,47 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef PAIR_DPD_H
+#define PAIR_DPD_H
+
+#include "pair.h"
+
+class RanMars;
+
+class PairDPD : public Pair {
+ public:
+  PairDPD();
+  ~PairDPD();
+  void compute(int, int);
+  void settings(int, char **);
+  void coeff(int, char **);
+  double init_one(int, int);
+  void init_style();
+  void write_restart(FILE *);
+  void read_restart(FILE *);
+  void write_restart_settings(FILE *);
+  void read_restart_settings(FILE *);
+  void single(int, int, int, int, double, double, double, int, One &);
+
+ private:
+  double cut_global,temperature;
+  int seed;
+  double **cut;
+  double **a0,**gamma;
+  double **sigma;
+  RanMars *random;
+
+  void allocate();
+};
+
+#endif
--- a/src/DPD/style_dpd.h
+++ b/src/DPD/style_dpd.h
@ -0,0 +1,28 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef AtomInclude
+#include "atom_dpd.h"
+#endif
+
+#ifdef AtomClass
+AtomStyle(dpd,AtomDPD)
+#endif
+
+#ifdef PairInclude
+#include "pair_dpd.h"
+#endif
+
+#ifdef PairClass
+PairStyle(dpd,PairDPD)
+#endif
--- a/src/GRANULAR/Install.csh
+++ b/src/GRANULAR/Install.csh
@ -0,0 +1,58 @@
+# Install/unInstall package classes in LAMMPS
+
+# fix_shear_history.h must always be in src
+
+if ($1 == 1) then
+
+  cp style_granular.h ..
+
+  cp atom_granular.cpp ..
+  cp fix_freeze.cpp ..
+  cp fix_gran_diag.cpp ..
+  cp fix_insert.cpp ..
+  cp fix_nve_gran.cpp ..
+  cp fix_shear_history.cpp ..
+  cp fix_wall_gran.cpp ..
+  cp pair_gran_hertzian.cpp ..
+  cp pair_gran_history.cpp ..
+  cp pair_gran_no_history.cpp ..
+
+  cp atom_granular.h ..
+  cp fix_freeze.h ..
+  cp fix_gran_diag.h ..
+  cp fix_insert.h ..
+  cp fix_nve_gran.h ..
+#  cp fix_shear_history.h ..
+  cp fix_wall_gran.h ..
+  cp pair_gran_hertzian.h ..
+  cp pair_gran_history.h ..
+  cp pair_gran_no_history.h ..
+
+else if ($1 == 0) then
+
+  rm ../style_granular.h
+  touch ../style_granular.h
+
+  rm ../atom_granular.cpp
+  rm ../fix_freeze.cpp
+  rm ../fix_gran_diag.cpp
+  rm ../fix_insert.cpp
+  rm ../fix_nve_gran.cpp
+  rm ../fix_shear_history.cpp
+  rm ../fix_wall_gran.cpp
+  rm ../pair_gran_hertzian.cpp
+  rm ../pair_gran_history.cpp
+  rm ../pair_gran_no_history.cpp
+
+  rm ../atom_granular.h
+  rm ../fix_freeze.h
+  rm ../fix_gran_diag.h
+  rm ../fix_insert.h
+  rm ../fix_nve_gran.h
+#  rm ../fix_shear_history.h
+  rm ../fix_wall_gran.h
+  rm ../pair_gran_hertzian.h
+  rm ../pair_gran_history.h
+  rm ../pair_gran_no_history.h
+
+endif
--- a/src/GRANULAR/atom_granular.cpp
+++ b/src/GRANULAR/atom_granular.cpp
@ -0,0 +1,322 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "string.h"
+#include "atom_granular.h"
+#include "domain.h"
+#include "modify.h"
+#include "fix.h"
+
+/* ---------------------------------------------------------------------- */
+
+AtomGranular::AtomGranular(int narg, char **arg) : Atom(narg, arg) {}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomGranular::copy(int i, int j)
+{
+  tag[j] = tag[i];
+  type[j] = type[i];
+  mask[j] = mask[i];
+  image[j] = image[i];
+  x[j][0] = x[i][0];
+  x[j][1] = x[i][1];
+  x[j][2] = x[i][2];
+  v[j][0] = v[i][0];
+  v[j][1] = v[i][1];
+  v[j][2] = v[i][2];
+  phix[j][0] = phix[i][0];
+  phix[j][1] = phix[i][1];
+  phix[j][2] = phix[i][2];
+  phiv[j][0] = phiv[i][0];
+  phiv[j][1] = phiv[i][1];
+  phiv[j][2] = phiv[i][2];
+  radius[j] = radius[i];
+  density[j] = density[i];
+  rmass[j] = rmass[i];
+
+  if (nextra_grow)
+    for (int iextra = 0; iextra < nextra_grow; iextra++) 
+      modify->fix[extra_grow[iextra]]->copy_arrays(i,j);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomGranular::pack_comm(int n, int *list, double *buf, int *pbc_flags)
+{
+  int i,j,m;
+
+  m = 0;
+  if (pbc_flags[0] == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = x[j][0];
+      buf[m++] = x[j][1];
+      buf[m++] = x[j][2];
+      buf[m++] = v[j][0];
+      buf[m++] = v[j][1];
+      buf[m++] = v[j][2];
+      buf[m++] = phiv[j][0];
+      buf[m++] = phiv[j][1];
+      buf[m++] = phiv[j][2];
+    }
+  } else {
+    double xprd = domain->xprd;
+    double yprd = domain->yprd;
+    double zprd = domain->zprd;
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = x[j][0] + pbc_flags[1]*xprd;
+      buf[m++] = x[j][1] + pbc_flags[2]*yprd;
+      buf[m++] = x[j][2] + pbc_flags[3]*zprd;
+      buf[m++] = v[j][0];
+      buf[m++] = v[j][1];
+      buf[m++] = v[j][2];
+      buf[m++] = phiv[j][0];
+      buf[m++] = phiv[j][1];
+      buf[m++] = phiv[j][2];
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomGranular::unpack_comm(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    x[i][0] = buf[m++];
+    x[i][1] = buf[m++];
+    x[i][2] = buf[m++];
+    v[i][0] = buf[m++];
+    v[i][1] = buf[m++];
+    v[i][2] = buf[m++];
+    phiv[i][0] = buf[m++];
+    phiv[i][1] = buf[m++];
+    phiv[i][2] = buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomGranular::pack_reverse(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    buf[m++] = f[i][0];
+    buf[m++] = f[i][1];
+    buf[m++] = f[i][2];
+    buf[m++] = phia[i][0];
+    buf[m++] = phia[i][1];
+    buf[m++] = phia[i][2];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomGranular::unpack_reverse(int n, int *list, double *buf)
+{
+  int i,j,m;
+
+  m = 0;
+  for (i = 0; i < n; i++) {
+    j = list[i];
+    f[j][0] += buf[m++];
+    f[j][1] += buf[m++];
+    f[j][2] += buf[m++];
+    phia[j][0] += buf[m++];
+    phia[j][1] += buf[m++];
+    phia[j][2] += buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomGranular::pack_border(int n, int *list, double *buf, int *pbc_flags)
+{
+  int i,j,m;
+
+  m = 0;
+  if (pbc_flags[0] == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = x[j][0];
+      buf[m++] = x[j][1];
+      buf[m++] = x[j][2];
+      buf[m++] = tag[j];
+      buf[m++] = type[j];
+      buf[m++] = mask[j];
+      buf[m++] = v[j][0];
+      buf[m++] = v[j][1];
+      buf[m++] = v[j][2];
+      buf[m++] = phiv[j][0];
+      buf[m++] = phiv[j][1];
+      buf[m++] = phiv[j][2];
+      buf[m++] = radius[j];
+      buf[m++] = rmass[j];
+    }
+  } else {
+    double xprd = domain->xprd;
+    double yprd = domain->yprd;
+    double zprd = domain->zprd;
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = x[j][0] + pbc_flags[1]*xprd;
+      buf[m++] = x[j][1] + pbc_flags[2]*yprd;
+      buf[m++] = x[j][2] + pbc_flags[3]*zprd;
+      buf[m++] = tag[j];
+      buf[m++] = type[j];
+      buf[m++] = mask[j];
+      buf[m++] = v[j][0];
+      buf[m++] = v[j][1];
+      buf[m++] = v[j][2];
+      buf[m++] = phiv[j][0];
+      buf[m++] = phiv[j][1];
+      buf[m++] = phiv[j][2];
+      buf[m++] = radius[j];
+      buf[m++] = rmass[j];
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomGranular::unpack_border(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    if (i == nmax) grow(0);
+    x[i][0] = buf[m++];
+    x[i][1] = buf[m++];
+    x[i][2] = buf[m++];
+    tag[i] = static_cast<int> (buf[m++]);
+    type[i] = static_cast<int> (buf[m++]);
+    mask[i] = static_cast<int> (buf[m++]);
+    v[i][0] = buf[m++];
+    v[i][1] = buf[m++];
+    v[i][2] = buf[m++];
+    phiv[i][0] = buf[m++];
+    phiv[i][1] = buf[m++];
+    phiv[i][2] = buf[m++];
+    radius[i] = buf[m++];
+    rmass[i] = buf[m++];
+  }
+}
+
+/* ----------------------------------------------------------------------
+   pack all atom quantities for shipping to another proc
+   xyz must be 1st 3 values, so that comm::exchange can test on them
+------------------------------------------------------------------------- */
+
+int AtomGranular::pack_exchange(int i, double *buf)
+{
+  int m = 1;
+  buf[m++] = x[i][0];
+  buf[m++] = x[i][1];
+  buf[m++] = x[i][2];
+  buf[m++] = v[i][0];
+  buf[m++] = v[i][1];
+  buf[m++] = v[i][2];
+  buf[m++] = phix[i][0];
+  buf[m++] = phix[i][1];
+  buf[m++] = phix[i][2];
+  buf[m++] = phiv[i][0];
+  buf[m++] = phiv[i][1];
+  buf[m++] = phiv[i][2];
+  buf[m++] = radius[i];
+  buf[m++] = density[i];
+  buf[m++] = rmass[i];
+  buf[m++] = tag[i];
+  buf[m++] = type[i];
+  buf[m++] = mask[i];
+  buf[m++] = image[i];
+
+  if (nextra_grow)
+    for (int iextra = 0; iextra < nextra_grow; iextra++) 
+      m += modify->fix[extra_grow[iextra]]->pack_exchange(i,&buf[m]);
+
+  buf[0] = m;
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomGranular::unpack_exchange(double *buf)
+{
+  if (nlocal == nmax) grow(0);
+
+  int m = 1;
+  x[nlocal][0] = buf[m++];
+  x[nlocal][1] = buf[m++];
+  x[nlocal][2] = buf[m++];
+  v[nlocal][0] = buf[m++];
+  v[nlocal][1] = buf[m++];
+  v[nlocal][2] = buf[m++];
+  phix[nlocal][0] = buf[m++];
+  phix[nlocal][1] = buf[m++];
+  phix[nlocal][2] = buf[m++];
+  phiv[nlocal][0] = buf[m++];
+  phiv[nlocal][1] = buf[m++];
+  phiv[nlocal][2] = buf[m++];
+  radius[nlocal] = buf[m++];
+  density[nlocal] = buf[m++];
+  rmass[nlocal] = buf[m++];
+  tag[nlocal] = static_cast<int> (buf[m++]);
+  type[nlocal] = static_cast<int> (buf[m++]);
+  mask[nlocal] = static_cast<int> (buf[m++]);
+  image[nlocal] = static_cast<int> (buf[m++]);
+
+  if (nextra_grow)
+    for (int iextra = 0; iextra < nextra_grow; iextra++) 
+      m += modify->fix[extra_grow[iextra]]->unpack_exchange(nlocal,&buf[m]);
+
+  nlocal++;
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   unpack vels & phis specific to granular data file
+------------------------------------------------------------------------- */
+
+void AtomGranular::unpack_vels(int n, char *buf)
+{
+  int m,tagtmp;
+  double vxtmp,vytmp,vztmp,phivxtmp,phivytmp,phivztmp;
+  char *next;
+
+  for (int i = 0; i < n; i++) {
+    next = strchr(buf,'\n');
+    *next = '\0';
+    sscanf(buf,"%d %lg %lg %lg %lg %lg %lg",
+	   &tagtmp,&vxtmp,&vytmp,&vztmp,
+	   &phivxtmp,&phivytmp,&phivztmp);
+    if ((m = map(tagtmp)) >= 0) {
+      v[m][0] = vxtmp;
+      v[m][1] = vytmp;
+      v[m][2] = vztmp;
+      phiv[m][0] = phivxtmp;
+      phiv[m][1] = phivytmp;
+      phiv[m][2] = phivztmp;
+    }
+    buf = next + 1;
+  }
+}
--- a/src/GRANULAR/atom_granular.h
+++ b/src/GRANULAR/atom_granular.h
@ -0,0 +1,36 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef ATOM_GRANULAR_H
+#define ATOM_GRANULAR_H
+
+#include "atom.h"
+
+class AtomGranular : public Atom {
+ public:
+  AtomGranular(int, char **);
+  ~AtomGranular() {}
+  void copy(int, int);
+  void pack_comm(int, int *, double *, int *);
+  void unpack_comm(int, int, double *);
+  void pack_reverse(int, int, double *);
+  void unpack_reverse(int, int *, double *);
+  void pack_border(int, int *, double *, int *);
+  void unpack_border(int, int, double *);
+  int pack_exchange(int, double *);
+  int unpack_exchange(double *);
+
+  void unpack_vels(int, char *);
+};
+
+#endif
--- a/src/GRANULAR/fix_freeze.cpp
+++ b/src/GRANULAR/fix_freeze.cpp
@ -0,0 +1,77 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "string.h"
+#include "fix_freeze.h"
+#include "atom.h"
+#include "modify.h"
+#include "comm.h"
+#include "error.h"
+
+/* ---------------------------------------------------------------------- */
+
+FixFreeze::FixFreeze(int narg, char **arg) : Fix(narg, arg)
+{
+  if (narg != 3) error->all("Illegal fix freeze command");
+
+  if (atom->check_style("granular") == 0)
+    error->all("Must use fix freeze with atom style granular");
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixFreeze::setmask()
+{
+  int mask = 0;
+  mask |= POST_FORCE;
+  return mask;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixFreeze::init()
+{
+  // error if more than one freeze fix
+
+  int count = 0;
+  for (int i = 0; i < modify->nfix; i++)
+    if (strcmp(modify->fix[i]->style,"freeze") == 0) count++;
+  if (count > 1) error->all("More than one freeze fix");
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixFreeze::setup()
+{
+  post_force(1);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixFreeze::post_force(int vflag)
+{
+  double **f = atom->f;
+  double **phia = atom->phia;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  for (int i = 0; i < nlocal; i++)
+    if (mask[i] & groupbit) {
+      f[i][0] = 0.0;
+      f[i][1] = 0.0;
+      f[i][2] = 0.0;
+      phia[i][0] = 0.0;
+      phia[i][1] = 0.0;
+      phia[i][2] = 0.0;
+    }
+}
--- a/src/GRANULAR/fix_freeze.h
+++ b/src/GRANULAR/fix_freeze.h
@ -0,0 +1,29 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef FIX_FREEZE_H
+#define FIX_FREEZE_H
+
+#include "fix.h"
+
+class FixFreeze : public Fix {
+ public:
+  FixFreeze(int, char **);
+  ~FixFreeze() {}
+  int setmask();
+  void init();
+  void setup();
+  void post_force(int);
+};
+
+#endif
--- a/src/GRANULAR/fix_gran_diag.cpp
+++ b/src/GRANULAR/fix_gran_diag.cpp
@ -0,0 +1,969 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Leo Silbert (SNL), Gary Grest (SNL)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdlib.h"
+#include "fix_gran_diag.h"
+#include "update.h"
+#include "force.h"
+#include "pair_gran_no_history.h"
+#include "pair_gran_history.h"
+#include "pair_gran_hertzian.h"
+#include "atom.h"
+#include "neighbor.h"
+#include "comm.h"
+#include "domain.h"
+#include "error.h"
+
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+
+#define NO_HISTORY 1
+#define HISTORY    2
+#define HERTZIAN   3
+
+/* ---------------------------------------------------------------------- */
+
+FixGranDiag::FixGranDiag(int narg, char **arg) : Fix(narg, arg)
+{
+  if (narg != 6) error->all("Illegal fix gran/diag command");
+  nevery = atoi(arg[3]);
+  if (nevery <= 0) error->all("Illegal fix gran/diag command");
+  first = 1;
+
+  if (atom->check_style("granular") == 0)
+    error->all("Must use fix gran/diag with atom style granular");
+
+  MPI_Comm_rank(world,&me);
+  if (me == 0) {
+    char *file = new char[128];
+
+    sprintf(file,"%s.den",arg[4]);
+    fpden = fopen(file,"w");
+    if (fpden == NULL) {
+      char str[128];
+      sprintf(str,"Cannot open fix gran/diag file %s",file);
+      error->one(str);
+    }
+
+    sprintf(file,"%s.vel",arg[4]);
+    fpvel = fopen(file,"w");
+    if (fpvel == NULL) {
+      char str[128];
+      sprintf(str,"Cannot open fix gran/diag file %s",file);
+      error->one(str);
+    }
+
+    sprintf(file,"%s.str",arg[4]);
+    fpstr = fopen(file,"w");
+    if (fpstr == NULL) {
+      char str[128];
+      sprintf(str,"Cannot open fix gran/diag file %s",file);
+      error->one(str);
+    }
+
+    delete [] file;
+  }
+
+  stepz = atof(arg[5]);
+  stepinv = 1.0/stepz;
+  PI = 4.0*atan(1.0);
+
+  maxlayers = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+FixGranDiag::~FixGranDiag()
+{
+  deallocate();
+
+  if (me == 0) {
+    fclose(fpden);
+    fclose(fpvel);
+    fclose(fpstr);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixGranDiag::setmask()
+{
+  int mask = 0;
+  mask |= END_OF_STEP;
+  return mask;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixGranDiag::init()
+{
+  dt = update->dt;
+
+  // set constants from pair style
+
+  Pair *anypair;
+  if (anypair = force->pair_match("gran/no_history")) {
+    pairstyle = NO_HISTORY;
+    xkk = ((PairGranNoHistory *) anypair)->xkk;
+    xkkt = ((PairGranNoHistory *) anypair)->xkkt;
+    xmu = ((PairGranNoHistory *) anypair)->xmu;
+    gamman_dl = ((PairGranNoHistory *) anypair)->gamman_dl;
+    gammas_dl = ((PairGranNoHistory *) anypair)->gammas_dl;
+    freeze_group_bit = ((PairGranNoHistory *) anypair)->freeze_group_bit;
+  } else if (anypair = force->pair_match("gran/history")) {
+    pairstyle = HISTORY;
+    xkk = ((PairGranHistory *) anypair)->xkk;
+    xkkt = ((PairGranHistory *) anypair)->xkkt;
+    xmu = ((PairGranHistory *) anypair)->xmu;
+    gamman_dl = ((PairGranHistory *) anypair)->gamman_dl;
+    gammas_dl = ((PairGranHistory *) anypair)->gammas_dl;
+    freeze_group_bit = ((PairGranNoHistory *) anypair)->freeze_group_bit;
+  } else if (anypair = force->pair_match("gran/hertzian")) {
+    pairstyle = HERTZIAN;
+    xkk = ((PairGranHertzian *) anypair)->xkk;
+    xkkt = ((PairGranHertzian *) anypair)->xkkt;
+    xmu = ((PairGranHertzian *) anypair)->xmu;
+    gamman_dl = ((PairGranHertzian *) anypair)->gamman_dl;
+    gammas_dl = ((PairGranHertzian *) anypair)->gammas_dl;
+    freeze_group_bit = ((PairGranNoHistory *) anypair)->freeze_group_bit;
+  } else
+    error->all("Must use fix gran/diag with granular pair style");
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixGranDiag::setup()
+{
+  if (first) end_of_step();
+  first = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixGranDiag::end_of_step()
+{
+  int i,m;
+
+  // set bottom of box for binning purposes
+
+  boxzlo = domain->boxzlo;
+
+  // update ghost atom info
+  // else ghost x/v is out-of-date at end of timestep
+
+  comm->communicate();
+
+  // insure accumulator arrays are correct length
+  // add 10 for buffer
+
+  nlayers = static_cast<int> (domain->zprd / stepz) + 10;
+  if (nlayers > maxlayers) {
+    deallocate();
+    maxlayers = nlayers;
+    allocate();
+  }
+
+  // zero all accumulators
+
+  for (i = 0; i < nlayers; i++) {
+    numdens[i] = 0;
+    dendens[i] = 0.0;
+    velx[i] = vely[i] = velz[i] = 0.0;
+    velxx[i] = velyy[i] = velzz[i] = velxy[i] = velxz[i] = velyz[i] = 0.0;
+    sigxx[i] = sigyy[i] = sigzz[i] = sigxy[i] = sigxz[i] = sigyz[i] = 0.0;
+  }
+
+  // density/velocity accumulation by atom
+  // assign to layer based on atom distance from z bottom of domain
+
+  int overflow = 0;
+
+  double **x = atom->x;
+  double **v = atom->v;
+  double *rmass = atom->rmass;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  for (i = 0; i < nlocal; i++)
+    if (mask[i] & groupbit) {
+      m = static_cast<int> ((x[i][2]-boxzlo) * stepinv);
+      if (m >= 0 && m < nlayers) {
+	numdens[m]++;
+	dendens[m] += rmass[i];
+	velx[m] += v[i][0];
+	vely[m] += v[i][1];
+	velz[m] += v[i][2];
+	velxx[m] += v[i][0]*v[i][0];
+	velyy[m] += v[i][1]*v[i][1];
+	velzz[m] += v[i][2]*v[i][2];
+	velxy[m] += v[i][0]*v[i][1];
+	velxz[m] += v[i][0]*v[i][2];
+	velyz[m] += v[i][1]*v[i][2];
+      } else overflow++;
+    }
+
+  // m = largest layer # with any counts
+  // nmax = # of layers up to m
+      
+  for (m = nlayers-1; m >= 0; m--) if (numdens[m]) break;
+  int nmax = m + 1;
+
+  int tmp = nmax;
+  MPI_Allreduce(&tmp,&nmax,1,MPI_INT,MPI_MAX,world);
+
+  // overflow = total # of atoms out-of-bounds of layer arrays
+  
+  tmp = overflow;
+  MPI_Allreduce(&tmp,&overflow,1,MPI_INT,MPI_SUM,world);
+
+  // sum contributions across procs
+
+  int *isum = new int[nmax];
+  double *dsum = new double[nmax];
+  
+  MPI_Allreduce(numdens,isum,nmax,MPI_INT,MPI_SUM,world);
+  for (i = 0; i < nmax; i++) numdens[i] = isum[i];
+
+  MPI_Allreduce(dendens,dsum,nmax,MPI_DOUBLE,MPI_SUM,world);
+  for (i = 0; i < nmax; i++) dendens[i] = dsum[i];
+  
+  MPI_Allreduce(velx,dsum,nmax,MPI_DOUBLE,MPI_SUM,world);
+  for (i = 0; i < nmax; i++) velx[i] = dsum[i];
+  MPI_Allreduce(vely,dsum,nmax,MPI_DOUBLE,MPI_SUM,world);
+  for (i = 0; i < nmax; i++) vely[i] = dsum[i];
+  MPI_Allreduce(velz,dsum,nmax,MPI_DOUBLE,MPI_SUM,world);
+  for (i = 0; i < nmax; i++) velz[i] = dsum[i];
+  
+  MPI_Allreduce(velxx,dsum,nmax,MPI_DOUBLE,MPI_SUM,world);
+  for (i = 0; i < nmax; i++) velxx[i] = dsum[i];
+  MPI_Allreduce(velyy,dsum,nmax,MPI_DOUBLE,MPI_SUM,world);
+  for (i = 0; i < nmax; i++) velyy[i] = dsum[i];
+  MPI_Allreduce(velzz,dsum,nmax,MPI_DOUBLE,MPI_SUM,world);
+  for (i = 0; i < nmax; i++) velzz[i] = dsum[i];
+  MPI_Allreduce(velxy,dsum,nmax,MPI_DOUBLE,MPI_SUM,world);
+  for (i = 0; i < nmax; i++) velxy[i] = dsum[i];
+  MPI_Allreduce(velxz,dsum,nmax,MPI_DOUBLE,MPI_SUM,world);
+  for (i = 0; i < nmax; i++) velxz[i] = dsum[i];
+  MPI_Allreduce(velyz,dsum,nmax,MPI_DOUBLE,MPI_SUM,world);
+  for (i = 0; i < nmax; i++) velyz[i] = dsum[i];
+
+  // compute contribution to stress by every atom pair
+
+  if (pairstyle == NO_HISTORY) stress_no_history();
+  else if (pairstyle == HISTORY) stress_history();
+  else if (pairstyle == HERTZIAN) stress_hertzian();
+
+  // sum contributions across procs
+
+  MPI_Allreduce(sigxx,dsum,nmax,MPI_DOUBLE,MPI_SUM,world);
+  for (i = 0; i < nmax; i++) sigxx[i] = dsum[i];
+  MPI_Allreduce(sigyy,dsum,nmax,MPI_DOUBLE,MPI_SUM,world);
+  for (i = 0; i < nmax; i++) sigyy[i] = dsum[i];
+  MPI_Allreduce(sigzz,dsum,nmax,MPI_DOUBLE,MPI_SUM,world);
+  for (i = 0; i < nmax; i++) sigzz[i] = dsum[i];
+
+  MPI_Allreduce(sigxy,dsum,nmax,MPI_DOUBLE,MPI_SUM,world);
+  for (i = 0; i < nmax; i++) sigxy[i] = dsum[i];
+  MPI_Allreduce(sigxz,dsum,nmax,MPI_DOUBLE,MPI_SUM,world);
+  for (i = 0; i < nmax; i++) sigxz[i] = dsum[i];
+  MPI_Allreduce(sigyz,dsum,nmax,MPI_DOUBLE,MPI_SUM,world);
+  for (i = 0; i < nmax; i++) sigyz[i] = dsum[i];
+
+  delete [] isum;
+  delete [] dsum;
+
+  // density/velocity/stress by layer
+
+  double velxxd1,velyyd1,velzzd1,velxyd1,velxzd1,velyzd1;
+  double volxy = domain->xprd * domain->yprd;
+
+  for (m = 0; m < nmax; m++) {
+    if (numdens[m] == 0) numdens[m] = 1;
+    dendens[m] = stepinv*dendens[m]/volxy;
+
+    velx11[m] = velx[m]/numdens[m];
+    vely11[m] = vely[m]/numdens[m];
+    velz11[m] = velz[m]/numdens[m];
+    velxx11[m] = velxx[m]/numdens[m];
+    velyy11[m] = velyy[m]/numdens[m];
+    velzz11[m] = velzz[m]/numdens[m];
+    velxy11[m] = velxy[m]/numdens[m];
+    velxz11[m] = velxz[m]/numdens[m];
+    velyz11[m] = velyz[m]/numdens[m];
+
+    velxxd1 = velxx11[m] - velx11[m]*velx11[m];
+    velyyd1 = velyy11[m] - vely11[m]*vely11[m];
+    velzzd1 = velzz11[m] - velz11[m]*velz11[m];
+    velxyd1 = velxy11[m] - velx11[m]*vely11[m];
+    velxzd1 = velxz11[m] - velx11[m]*velz11[m];
+    velyzd1 = velyz11[m] - vely11[m]*velz11[m];
+
+    velfxx[m] = velxxd1 * dendens[m];
+    velfyy[m] = velyyd1 * dendens[m];
+    velfzz[m] = velzzd1 * dendens[m];
+    velfxy[m] = velxyd1 * dendens[m];
+    velfxz[m] = velxzd1 * dendens[m];
+    velfyz[m] = velyzd1 * dendens[m];
+
+    sigx2[m] = sigxx[m]/(2.0*volxy*stepz) + velxxd1*dendens[m];
+    sigy2[m] = sigyy[m]/(2.0*volxy*stepz) + velyyd1*dendens[m];
+    sigz2[m] = sigzz[m]/(2.0*volxy*stepz) + velzzd1*dendens[m];
+    sigxy2[m] = sigxy[m]/(2.0*volxy*stepz) + velxyd1*dendens[m];
+    sigxz2[m] = sigxz[m]/(2.0*volxy*stepz) + velxzd1*dendens[m];
+    sigyz2[m] = sigyz[m]/(2.0*volxy*stepz) + velyzd1*dendens[m];
+  }
+
+  // write out density profile
+
+  if (me == 0) {
+    fprintf(fpden,"ITEM: TIMESTEP\n");
+    fprintf(fpden,"%d\n",update->ntimestep);
+    fprintf(fpden,"ITEM: NUMBER OF LAYERS / OVERFLOWS\n");
+    fprintf(fpden,"%d %d\n",nmax,overflow);
+    fprintf(fpden,"ITEM: DENSITY BY LAYER\n");
+    for (m = 0; m < nmax; m++)
+      fprintf(fpden,"%d %g %g\n",m+1,(m+1)*stepz+boxzlo,dendens[m]*PI/6.0);
+  }
+
+  // write out velocity profile
+
+  if (me == 0) {
+    fprintf(fpvel,"ITEM: TIMESTEP\n");
+    fprintf(fpvel,"%d\n",update->ntimestep);
+    fprintf(fpvel,"ITEM: NUMBER OF LAYERS / OVERFLOWS\n");
+    fprintf(fpvel,"%d %d\n",nmax,overflow);
+    fprintf(fpvel,"ITEM: VELOCITY BY LAYER\n");
+    for (m = 0; m < nmax; m++)
+      fprintf(fpvel,"%d %g %g %g %g %g %g %g %g %g %g\n",
+	      m+1,(m+1)*stepz+boxzlo,
+	      velx11[m],vely11[m],velz11[m],
+	      velxx11[m],velyy11[m],velzz11[m],
+	      velxy11[m],velxz11[m],velyz11[m]);
+  }
+
+  // write out stress profile
+
+  if (me == 0) {
+    fprintf(fpstr,"ITEM: TIMESTEP\n");
+    fprintf(fpstr,"%d\n",update->ntimestep);
+    fprintf(fpstr,"ITEM: NUMBER OF LAYERS / OVERFLOWS\n");
+    fprintf(fpstr,"%d %d\n",nmax,overflow);
+    fprintf(fpstr,"ITEM: STRESS BY LAYER\n");
+    for (m = 0; m < nmax; m++)
+      fprintf(fpstr,"%d %g %g %g %g %g %g %g %g %g %g %g %g %g\n",
+	      m+1,(m+1)*stepz+boxzlo,
+              sigx2[m],sigy2[m],sigz2[m],
+              sigxy2[m],sigxz2[m],sigyz2[m],
+              velfxx[m],velfyy[m],velfzz[m],
+              velfxy[m],velfxz[m],velfyz[m]);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixGranDiag::allocate()
+{
+  numdens = new int[maxlayers];
+  dendens = new double[maxlayers];
+
+  velx = new double[maxlayers];
+  vely = new double[maxlayers];
+  velz = new double[maxlayers];
+  velxx = new double[maxlayers];
+  velyy = new double[maxlayers];
+  velzz = new double[maxlayers];
+  velxy = new double[maxlayers];
+  velxz = new double[maxlayers];
+  velyz = new double[maxlayers];
+
+  velx11 = new double[maxlayers];
+  vely11 = new double[maxlayers];
+  velz11 = new double[maxlayers];
+  velxx11 = new double[maxlayers];
+  velyy11 = new double[maxlayers];
+  velzz11 = new double[maxlayers];
+  velxy11 = new double[maxlayers];
+  velxz11 = new double[maxlayers];
+  velyz11 = new double[maxlayers];
+
+  sigxx = new double[maxlayers];
+  sigyy = new double[maxlayers];
+  sigzz = new double[maxlayers];
+  sigxy = new double[maxlayers];
+  sigxz = new double[maxlayers];
+  sigyz = new double[maxlayers];
+
+  sigx2 = new double[maxlayers];
+  sigy2 = new double[maxlayers];
+  sigz2 = new double[maxlayers];
+  sigxy2 = new double[maxlayers];
+  sigxz2 = new double[maxlayers];
+  sigyz2 = new double[maxlayers];
+
+  velfxx = new double[maxlayers];
+  velfyy = new double[maxlayers];
+  velfzz = new double[maxlayers];
+  velfxy = new double[maxlayers];
+  velfxz = new double[maxlayers];
+  velfyz = new double[maxlayers];
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixGranDiag::deallocate()
+{
+  if (maxlayers == 0) return;
+
+  delete [] numdens;  delete [] dendens;
+
+  delete [] velx;  delete [] vely;  delete [] velz;
+  delete [] velxx; delete [] velyy; delete [] velzz;
+  delete [] velxy; delete [] velxz; delete [] velyz;
+
+  delete [] velx11;   delete [] vely11;   delete [] velz11;
+  delete [] velxx11;  delete [] velyy11;  delete [] velzz11;
+  delete [] velxy11;  delete [] velxz11;  delete [] velyz11;
+
+  delete [] sigxx;  delete [] sigyy;  delete [] sigzz;
+  delete [] sigxy;  delete [] sigxz;  delete [] sigyz;
+
+  delete [] sigx2;   delete [] sigy2;   delete [] sigz2;
+  delete [] sigxy2;  delete [] sigxz2;  delete [] sigyz2;
+
+  delete [] velfxx;  delete [] velfyy;  delete [] velfzz;
+  delete [] velfxy;  delete [] velfxz;  delete [] velfyz;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixGranDiag::stress_no_history()
+{
+  int i,j,k,m,numneigh;
+  double xtmp,ytmp,ztmp,delx,dely,delz;
+  double radi,radj,radsum,rsq,r;
+  double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3;
+  double wr1,wr2,wr3;
+  double vtr1,vtr2,vtr3,vrel;
+  double xmeff,damp,ccel,ccelx,ccely,ccelz;
+  double fn,fs,ft,fs1,fs2,fs3;
+  int *neighs;
+
+  double **x = atom->x;
+  double **v = atom->v;
+  double **phiv = atom->phiv;
+  double *radius = atom->radius;
+  double *rmass = atom->rmass;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  int newton_pair = force->newton_pair;
+
+  // loop over all neighbors of my atoms
+  // store stress for both atoms i and j
+
+  for (i = 0; i < nlocal; i++) {
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    radi = radius[i];
+    neighs = neighbor->firstneigh[i];
+    numneigh = neighbor->numneigh[i];
+
+    for (k = 0; k < numneigh; k++) {
+      j = neighs[k];
+
+      // skip if neither atom is in fix group
+
+      if (!(mask[i] & groupbit) && !(mask[j] & groupbit)) continue;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      radj = radius[j];
+      radsum = radi + radj;
+
+      if (rsq <  radsum*radsum) {
+
+	r = sqrt(rsq);
+
+	// relative translational velocity
+
+	vr1 = v[i][0] - v[j][0];
+	vr2 = v[i][1] - v[j][1];
+	vr3 = v[i][2] - v[j][2];
+
+	vr1 *= dt;
+	vr2 *= dt;
+	vr3 *= dt;
+
+	//  normal component
+
+	vnnr = vr1*delx + vr2*dely + vr3*delz;
+	vn1 = delx*vnnr / rsq;
+	vn2 = dely*vnnr / rsq;
+	vn3 = delz*vnnr / rsq;
+
+	// tangential component
+
+	vt1 = vr1 - vn1;
+	vt2 = vr2 - vn2;
+	vt3 = vr3 - vn3;
+
+	//  relative rotational velocity
+
+	wr1 = radi*phiv[i][0] + radj*phiv[j][0];
+	wr2 = radi*phiv[i][1] + radj*phiv[j][1];
+	wr3 = radi*phiv[i][2] + radj*phiv[j][2];
+
+	wr1 *= dt/r;
+	wr2 *= dt/r;
+	wr3 *= dt/r;
+
+	// normal damping term
+	// this definition of DAMP includes the extra 1/r term
+
+	xmeff = rmass[i]*rmass[j] / (rmass[i]+rmass[j]);
+	if (mask[i] & freeze_group_bit) xmeff = rmass[j];
+	if (mask[j] & freeze_group_bit) xmeff = rmass[i];
+	damp = xmeff*gamman_dl*vnnr/rsq;
+	ccel = xkk*(radsum-r)/r - damp;
+
+	// relative velocities
+
+	vtr1 = vt1 - (delz*wr2-dely*wr3);
+	vtr2 = vt2 - (delx*wr3-delz*wr1);
+	vtr3 = vt3 - (dely*wr1-delx*wr2);
+	vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3;
+	vrel = sqrt(vrel);
+
+	// force normalization
+
+	fn = xmu * fabs(ccel*r);
+	fs = xmeff*gammas_dl*vrel;
+	if (vrel != 0.0) ft = MIN(fn,fs) / vrel;
+	else ft = 0.0;
+
+	// shear friction forces
+
+	fs1 = -ft*vtr1;
+	fs2 = -ft*vtr2;
+	fs3 = -ft*vtr3;
+
+	// forces
+
+	ccelx = delx*ccel + fs1;
+	ccely = dely*ccel + fs2;
+	ccelz = delz*ccel + fs3;
+
+	// stress contribution of atom pair to z-layers
+	// atom i always contributes
+	// atom j contributes if newton_pair is on or if owned by this proc
+
+	m = static_cast<int> ((x[i][2]-boxzlo) * stepinv);
+	if (m >= 0 && m < nlayers) {
+	  sigxx[m] += delx*ccelx;
+	  sigyy[m] += dely*ccely;
+	  sigzz[m] += delz*ccelz;
+	  sigxy[m] += delx*ccely;
+	  sigxz[m] += delx*ccelz;
+	  sigyz[m] += dely*ccelz;
+	}
+	
+	if (newton_pair || j < nlocal) {
+	  m = static_cast<int> ((x[j][2]-boxzlo) * stepinv);
+	  if (m >= 0 && m < nlayers) {
+	    sigxx[m] += delx*ccelx;
+	    sigyy[m] += dely*ccely;
+	    sigzz[m] += delz*ccelz;
+	    sigxy[m] += delx*ccely;
+	    sigxz[m] += delx*ccelz;
+	    sigyz[m] += dely*ccelz;
+	  }
+	}
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixGranDiag::stress_history()
+{
+  int i,j,k,m,numneigh;
+  double xtmp,ytmp,ztmp,delx,dely,delz;
+  double radi,radj,radsum,rsq,r;
+  double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3;
+  double wr1,wr2,wr3;
+  double vtr1,vtr2,vtr3,vrel,shrx,shry,shrz;
+  double xmeff,damp,ccel,ccelx,ccely,ccelz;
+  double fn,fs,fs1,fs2,fs3;
+  double shrmag,rsht;
+  int *neighs;
+  double *firstshear,*shear;
+
+  double **x = atom->x;
+  double **v = atom->v;
+  double **phiv = atom->phiv;
+  double *radius = atom->radius;
+  double *rmass = atom->rmass;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  int newton_pair = force->newton_pair;
+
+  // loop over all neighbors of my atoms
+  // store stress on both atoms i and j
+
+  for (i = 0; i < nlocal; i++) {
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    radi = radius[i];
+    neighs = neighbor->firstneigh[i];
+    firstshear = neighbor->firstshear[i];
+    numneigh = neighbor->numneigh[i];
+
+    for (k = 0; k < numneigh; k++) {
+      j = neighs[k];
+
+      // skip if neither atom is in fix group
+
+      if (!(mask[i] & groupbit) && !(mask[j] & groupbit)) continue;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      radj = radius[j];
+      radsum = radi + radj;
+
+      if (rsq <  radsum*radsum) {
+
+	r = sqrt(rsq);
+
+	// relative translational velocity
+
+	vr1 = v[i][0] - v[j][0];
+	vr2 = v[i][1] - v[j][1];
+	vr3 = v[i][2] - v[j][2];
+
+	vr1 *= dt;
+	vr2 *= dt;
+	vr3 *= dt;
+
+	//  normal component
+
+	vnnr = vr1*delx + vr2*dely + vr3*delz;
+	vn1 = delx*vnnr / rsq;
+	vn2 = dely*vnnr / rsq;
+	vn3 = delz*vnnr / rsq;
+
+	// tangential component
+
+	vt1 = vr1 - vn1;
+	vt2 = vr2 - vn2;
+	vt3 = vr3 - vn3;
+
+	//  relative rotational velocity
+
+	wr1 = radi*phiv[i][0] + radj*phiv[j][0];
+	wr2 = radi*phiv[i][1] + radj*phiv[j][1];
+	wr3 = radi*phiv[i][2] + radj*phiv[j][2];
+
+	wr1 *= dt/r;
+	wr2 *= dt/r;
+	wr3 *= dt/r;
+
+	// normal damping term
+	// this definition of DAMP includes the extra 1/r term
+
+	xmeff = rmass[i]*rmass[j] / (rmass[i]+rmass[j]);
+	if (mask[i] & freeze_group_bit) xmeff = rmass[j];
+	if (mask[j] & freeze_group_bit) xmeff = rmass[i];
+	damp = xmeff*gamman_dl*vnnr/rsq;
+	ccel = xkk*(radsum-r)/r - damp;
+
+	// relative velocities
+
+	vtr1 = vt1 - (delz*wr2-dely*wr3);
+	vtr2 = vt2 - (delx*wr3-delz*wr1);
+	vtr3 = vt3 - (dely*wr1-delx*wr2);
+	vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3;
+	vrel = sqrt(vrel);
+
+	// shear history effects
+	// shrmag = magnitude of shear
+	// do not update shear history since not timestepping
+
+	shear = &firstshear[3*k];
+	shrx = shear[0] + vtr1;
+	shry = shear[1] + vtr2;
+	shrz = shear[2] + vtr3;
+	shrmag = sqrt(shrx*shrx + shry*shry + shrz*shrz);
+
+	// rotate shear displacements correctly
+
+	rsht = shrx*delx + shry*dely + shrz*delz;
+	rsht /= rsq;
+        shrx -= rsht*delx;
+        shry -= rsht*dely;
+        shrz -= rsht*delz;
+
+	// tangential forces
+
+	fs1 = - (xkkt*shrx + xmeff*gammas_dl*vtr1);
+	fs2 = - (xkkt*shry + xmeff*gammas_dl*vtr2);
+	fs3 = - (xkkt*shrz + xmeff*gammas_dl*vtr3);
+
+	// force normalization
+	// rescale frictional displacements and forces if needed
+
+	fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3);
+	fn = xmu * fabs(ccel*r);
+
+	if (fs > fn) {
+	  if (shrmag != 0.0) {
+	    fs1 *= fn/fs;
+	    fs2 *= fn/fs;
+	    fs3 *= fn/fs;
+	  } else {
+	    fs1 = 0.0;
+	    fs2 = 0.0;
+	    fs3 = 0.0;
+	  }
+	}
+
+	// forces
+
+	ccelx = delx*ccel + fs1;
+	ccely = dely*ccel + fs2;
+	ccelz = delz*ccel + fs3;
+
+	// stress contribution of atom pair to z-layers
+	// atom i always contributes
+	// atom j contributes if newton_pair is on or if owned by this proc
+
+	m = static_cast<int> ((x[i][2]-boxzlo) * stepinv);
+	if (m >= 0 && m < nlayers) {
+	  sigxx[m] += delx*ccelx;
+	  sigyy[m] += dely*ccely;
+	  sigzz[m] += delz*ccelz;
+	  sigxy[m] += delx*ccely;
+	  sigxz[m] += delx*ccelz;
+	  sigyz[m] += dely*ccelz;
+	}
+	
+	if (newton_pair || j < nlocal) {
+	  m = static_cast<int> ((x[j][2]-boxzlo) * stepinv);
+	  if (m >= 0 && m < nlayers) {
+	    sigxx[m] += delx*ccelx;
+	    sigyy[m] += dely*ccely;
+	    sigzz[m] += delz*ccelz;
+	    sigxy[m] += delx*ccely;
+	    sigxz[m] += delx*ccelz;
+	    sigyz[m] += dely*ccelz;
+	  }
+	}
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixGranDiag::stress_hertzian()
+{
+  int i,j,k,m,numneigh;
+  double xtmp,ytmp,ztmp,delx,dely,delz;
+  double radi,radj,radsum,rsq,r;
+  double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3;
+  double wr1,wr2,wr3;
+  double vtr1,vtr2,vtr3,vrel,shrx,shry,shrz;
+  double xmeff,damp,ccel,ccelx,ccely,ccelz;
+  double fn,fs,fs1,fs2,fs3;
+  double shrmag,rsht,rhertz;
+  int *neighs;
+  double *firstshear,*shear;
+
+  double **x = atom->x;
+  double **v = atom->v;
+  double **phiv = atom->phiv;
+  double *radius = atom->radius;
+  double *rmass = atom->rmass;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  int newton_pair = force->newton_pair;
+
+  // loop over all neighbors of my atoms
+  // store stress on both atoms i and j
+
+  for (i = 0; i < nlocal; i++) {
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    radi = radius[i];
+    neighs = neighbor->firstneigh[i];
+    firstshear = neighbor->firstshear[i];
+    numneigh = neighbor->numneigh[i];
+
+    for (k = 0; k < numneigh; k++) {
+      j = neighs[k];
+
+      // skip if neither atom is in fix group
+
+      if (!(mask[i] & groupbit) && !(mask[j] & groupbit)) continue;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      radj = radius[j];
+      radsum = radi + radj;
+
+      if (rsq <  radsum*radsum) {
+
+	r = sqrt(rsq);
+
+	// relative translational velocity
+
+	vr1 = v[i][0] - v[j][0];
+	vr2 = v[i][1] - v[j][1];
+	vr3 = v[i][2] - v[j][2];
+
+	vr1 *= dt;
+	vr2 *= dt;
+	vr3 *= dt;
+
+	//  normal component
+
+	vnnr = vr1*delx + vr2*dely + vr3*delz;
+	vn1 = delx*vnnr / rsq;
+	vn2 = dely*vnnr / rsq;
+	vn3 = delz*vnnr / rsq;
+
+	// tangential component
+
+	vt1 = vr1 - vn1;
+	vt2 = vr2 - vn2;
+	vt3 = vr3 - vn3;
+
+	//  relative rotational velocity
+
+	wr1 = radi*phiv[i][0] + radj*phiv[j][0];
+	wr2 = radi*phiv[i][1] + radj*phiv[j][1];
+	wr3 = radi*phiv[i][2] + radj*phiv[j][2];
+
+	wr1 *= dt/r;
+	wr2 *= dt/r;
+	wr3 *= dt/r;
+
+	// normal damping term
+	// this definition of DAMP includes the extra 1/r term
+
+	xmeff = rmass[i]*rmass[j] / (rmass[i]+rmass[j]);
+	if (mask[i] & freeze_group_bit) xmeff = rmass[j];
+	if (mask[j] & freeze_group_bit) xmeff = rmass[i];
+	damp = xmeff*gamman_dl*vnnr/rsq;
+	ccel = xkk*(radsum-r)/r - damp;
+	rhertz = sqrt(radsum - r);
+	ccel = rhertz * ccel;
+
+	// relative velocities
+
+	vtr1 = vt1 - (delz*wr2-dely*wr3);
+	vtr2 = vt2 - (delx*wr3-delz*wr1);
+	vtr3 = vt3 - (dely*wr1-delx*wr2);
+	vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3;
+	vrel = sqrt(vrel);
+
+	// shear history effects
+	// shrmag = magnitude of shear
+	// do not update shear history since not timestepping
+
+	shear = &firstshear[3*k];
+	shrx = shear[0] + vtr1;
+	shry = shear[1] + vtr2;
+	shrz = shear[2] + vtr3;
+	shrmag = sqrt(shrx*shrx + shry*shry + shrz*shrz);
+
+	// rotate shear displacements correctly
+
+	rsht = shrx*delx + shry*dely + shrz*delz;
+	rsht /= rsq;
+        shrx -= rsht*delx;
+        shry -= rsht*dely;
+        shrz -= rsht*delz;
+
+	// tangential forces
+
+        fs1 = -rhertz * (xkkt*shrx + xmeff*gammas_dl*vtr1);
+        fs2 = -rhertz * (xkkt*shry + xmeff*gammas_dl*vtr2);
+        fs3 = -rhertz * (xkkt*shrz + xmeff*gammas_dl*vtr3);
+
+	// force normalization
+	// rescale frictional displacements and forces if needed
+
+	fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3);
+	fn = xmu * fabs(ccel*r);
+
+	if (fs > fn) {
+	  if (shrmag != 0.0) {
+	    fs1 *= fn/fs;
+	    fs2 *= fn/fs;
+	    fs3 *= fn/fs;
+	  } else {
+	    fs1 = 0.0;
+	    fs2 = 0.0;
+	    fs3 = 0.0;
+	  }
+	}
+
+	// forces
+
+	ccelx = delx*ccel + fs1;
+	ccely = dely*ccel + fs2;
+	ccelz = delz*ccel + fs3;
+
+	// stress contribution of atom pair to z-layers
+	// atom i always contributes
+	// atom j contributes if newton_pair is on or if owned by this proc
+
+	m = static_cast<int> ((x[i][2]-boxzlo) * stepinv);
+	if (m >= 0 && m < nlayers) {
+	  sigxx[m] += delx*ccelx;
+	  sigyy[m] += dely*ccely;
+	  sigzz[m] += delz*ccelz;
+	  sigxy[m] += delx*ccely;
+	  sigxz[m] += delx*ccelz;
+	  sigyz[m] += dely*ccelz;
+	}
+	
+	if (newton_pair || j < nlocal) {
+	  m = static_cast<int> ((x[j][2]-boxzlo) * stepinv);
+	  if (m >= 0 && m < nlayers) {
+	    sigxx[m] += delx*ccelx;
+	    sigyy[m] += dely*ccely;
+	    sigzz[m] += delz*ccelz;
+	    sigxy[m] += delx*ccely;
+	    sigxz[m] += delx*ccelz;
+	    sigyz[m] += dely*ccelz;
+	  }
+	}
+      }
+    }
+  }
+}
--- a/src/GRANULAR/fix_gran_diag.h
+++ b/src/GRANULAR/fix_gran_diag.h
@ -0,0 +1,53 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef FIX_GRAN_DIAG_H
+#define FIX_GRAN_DIAG_H
+
+#include "stdio.h"
+#include "fix.h"
+
+class FixGranDiag : public Fix {
+ public:
+  FixGranDiag(int, char **);
+  ~FixGranDiag();
+  int setmask();
+  void init();
+  void setup();
+  void end_of_step();
+
+ private:
+  int me,first,pairstyle,nlayers,maxlayers;
+  FILE *fpden,*fpvel,*fpstr;
+  double stepz,stepinv,PI,boxzlo;
+  double dt,xkk,xkkt,xmu,gamman_dl,gammas_dl;
+  int freeze_group_bit;
+
+  int *numdens;
+  double *dendens;
+  double *velx,*vely,*velz;
+  double *velxx,*velyy,*velzz,*velxy,*velxz,*velyz;
+  double *sigxx,*sigyy,*sigzz,*sigxy,*sigxz,*sigyz;
+  double *velx11,*vely11,*velz11;
+  double *velxx11,*velyy11,*velzz11,*velxy11,*velxz11,*velyz11;
+  double *velfxx,*velfyy,*velfzz,*velfxy,*velfxz,*velfyz;
+  double *sigx2,*sigy2,*sigz2,*sigxy2,*sigxz2,*sigyz2;
+
+  void allocate();
+  void deallocate();
+  void stress_no_history();
+  void stress_history();
+  void stress_hertzian();
+};
+
+#endif
--- a/src/GRANULAR/fix_insert.cpp
+++ b/src/GRANULAR/fix_insert.cpp
@ -0,0 +1,534 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdlib.h"
+#include "string.h"
+#include "fix_insert.h"
+#include "atom.h"
+#include "force.h"
+#include "update.h"
+#include "modify.h"
+#include "fix_gravity.h"
+#include "fix_shear_history.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "region.h"
+#include "region_block.h"
+#include "region_cylinder.h"
+#include "random_park.h"
+#include "memory.h"
+#include "error.h"
+
+#define EPSILON 0.001
+
+/* ---------------------------------------------------------------------- */
+
+FixInsert::FixInsert(int narg, char **arg) : Fix(narg, arg)
+{
+  if (narg < 6) error->all("Illegal fix insert command");
+
+  if (atom->check_style("granular") == 0)
+    error->all("Must use fix insert with atom style granular");
+
+  // required args
+
+  ninsert = atoi(arg[3]);
+  ntype = atoi(arg[4]);
+  seed = atoi(arg[5]);
+
+  PI = 4.0*atan(1.0);
+
+  // option defaults
+
+  int iregion = -1;
+  radius_lo = radius_hi = 0.5;
+  density_lo = density_hi = 1.0;
+  volfrac = 0.25;
+  maxattempt = 50;
+  rate = 0.0;
+  vxlo = vxhi = vylo = vyhi = vy = vz = 0.0;
+
+  // optional args
+
+  int iarg = 6;
+  while (iarg < narg) {
+    if (strcmp(arg[iarg],"region") == 0) {
+      if (iarg+2 > narg) error->all("Illegal fix insert command");
+      for (iregion = 0; iregion < domain->nregion; iregion++)
+	if (strcmp(arg[iarg+1],domain->regions[iregion]->id) == 0) break;
+      if (iregion == domain->nregion) 
+	error->all("Fix insert region ID does not exist");
+      iarg += 2;
+    } else if (strcmp(arg[iarg],"diam") == 0) {
+      if (iarg+3 > narg) error->all("Illegal fix insert command");
+      radius_lo = 0.5 * atof(arg[iarg+1]);
+      radius_hi = 0.5 * atof(arg[iarg+2]);
+      iarg += 3;
+    } else if (strcmp(arg[iarg],"dens") == 0) {
+      if (iarg+3 > narg) error->all("Illegal fix insert command");
+      density_lo = atof(arg[iarg+1]);
+      density_hi = atof(arg[iarg+2]);
+      iarg += 3;
+    } else if (strcmp(arg[iarg],"vol") == 0) {
+      if (iarg+3 > narg) error->all("Illegal fix insert command");
+      volfrac = atof(arg[iarg+1]);
+      maxattempt = atoi(arg[iarg+2]);
+      iarg += 3;
+    } else if (strcmp(arg[iarg],"rate") == 0) {
+      if (iarg+2 > narg) error->all("Illegal fix insert command");
+      rate = atof(arg[iarg+1]);
+      iarg += 2;
+    } else if (strcmp(arg[iarg],"vel") == 0) {
+      if (force->dimension == 3) {
+	if (iarg+6 > narg) error->all("Illegal fix insert command");
+	vxlo = atof(arg[iarg+1]);
+	vxhi = atof(arg[iarg+2]);
+	vylo = atof(arg[iarg+3]);
+	vyhi = atof(arg[iarg+4]);
+	vz = atof(arg[iarg+5]);
+	iarg += 6;
+      } else {
+	if (iarg+4 > narg) error->all("Illegal fix insert command");
+	vxlo = atof(arg[iarg+1]);
+	vxhi = atof(arg[iarg+2]);
+	vy = atof(arg[iarg+3]);
+	vz = 0.0;
+	iarg += 4;
+      }
+    } else error->all("Illegal fix insert command");
+  }
+
+  // error check that a valid region was specified
+
+  if (iregion == -1) error->all("Must specify a region in fix insert");
+
+  // error checks on region
+
+  if (domain->regions[iregion]->interior == 0)
+    error->all("Must use region with side = in with fix insert");
+
+  if (strcmp(domain->regions[iregion]->style,"block") == 0) {
+    region_style = 1;
+    xlo = ((RegBlock *) domain->regions[iregion])->xlo;
+    xhi = ((RegBlock *) domain->regions[iregion])->xhi;
+    ylo = ((RegBlock *) domain->regions[iregion])->ylo;
+    yhi = ((RegBlock *) domain->regions[iregion])->yhi;
+    zlo = ((RegBlock *) domain->regions[iregion])->zlo;
+    zhi = ((RegBlock *) domain->regions[iregion])->zhi;
+    if (xlo < domain->boxxlo || xhi > domain->boxxhi || 
+	ylo < domain->boxylo || yhi > domain->boxyhi || 
+	zlo < domain->boxzlo || zhi > domain->boxzhi)
+      error->all("Insertion region extends outside simulation box");
+  } else if (strcmp(domain->regions[iregion]->style,"cylinder") == 0) {
+    region_style = 2;
+    char axis = ((RegCylinder *) domain->regions[iregion])->axis;
+    xc = ((RegCylinder *) domain->regions[iregion])->c1;
+    yc = ((RegCylinder *) domain->regions[iregion])->c2;
+    rc = ((RegCylinder *) domain->regions[iregion])->radius;
+    zlo = ((RegCylinder *) domain->regions[iregion])->lo;
+    zhi = ((RegCylinder *) domain->regions[iregion])->hi;
+    if (axis != 'z')
+      error->all("Must use a z-axis cylinder with fix insert");
+    if (xc-rc < domain->boxxlo || xc+rc > domain->boxxhi || 
+	yc-rc < domain->boxylo || yc+rc > domain->boxyhi || 
+	zlo < domain->boxzlo || zhi > domain->boxzhi)
+      error->all("Insertion region extends outside simulation box");
+  } else error->all("Must use a block or cylinder region with fix insert");
+
+  if (region_style == 2 && force->dimension == 2)
+    error->all("Must use a block region with fix insert for 2d simulations");
+
+  // random number generator, same for all procs
+
+  random = new RanPark(seed);
+
+  // allgather arrays
+
+  MPI_Comm_rank(world,&me);
+  MPI_Comm_size(world,&nprocs);
+  recvcounts = new int[nprocs];
+  displs = new int[nprocs];
+
+  // nfreq = timesteps between insertions
+  // should be time for a particle to fall from top of insertion region
+  //   to bottom, taking into account that the region may be moving
+  // 1st insertion on next timestep
+
+  double v_relative,delta;
+  double g = 1.0;
+  if (force->dimension == 3) {
+    v_relative = vz - rate;
+    delta = v_relative + sqrt(v_relative*v_relative + 2.0*g*(zhi-zlo)) / g;
+  } else {
+    v_relative = vy - rate;
+    delta = v_relative + sqrt(v_relative*v_relative + 2.0*g*(yhi-ylo)) / g;
+  }
+  nfreq = static_cast<int> (delta/update->dt + 0.5);
+
+  force_reneighbor = 1;
+  next_reneighbor = update->ntimestep + 1;
+  nfirst = next_reneighbor;
+  ninserted = 0;
+
+  // nper = # to insert each time
+  // depends on specified volume fraction
+  // volume = volume of insertion region
+  // volume_one = volume of inserted particle (with max possible radius)
+  // in 3d, insure dy >= 1, for quasi-2d simulations
+
+  double volume,volume_one;
+  if (force->dimension == 3) {
+    if (region_style == 1) {
+      double dy = yhi - ylo;
+      if (dy < 1.0) dy = 1.0;
+      volume = (xhi-xlo) * dy * (zhi-zlo);
+    } else volume = PI*rc*rc * (zhi-zlo);
+    volume_one = 4.0/3.0 * PI * radius_hi*radius_hi*radius_hi;
+  } else {
+    volume = (xhi-xlo) * (yhi-ylo);
+    volume_one = PI * radius_hi*radius_hi;
+  }
+
+  nper = static_cast<int> (volfrac*volume/volume_one);
+  int nfinal = update->ntimestep + 1 + (ninsert-1)/nper * nfreq;
+
+  // print stats
+
+  if (me == 0) {
+    if (screen)
+      fprintf(screen,
+	      "Particle insertion: %d every %d steps, %d by step %d\n",
+	      nper,nfreq,ninsert,nfinal);
+    if (logfile)
+      fprintf(logfile,
+	      "Particle insertion: %d every %d steps, %d by step %d\n",
+	      nper,nfreq,ninsert,nfinal);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+FixInsert::~FixInsert()
+{
+  delete random;
+  delete [] recvcounts;
+  delete [] displs;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixInsert::setmask()
+{
+  int mask = 0;
+  mask |= PRE_EXCHANGE;
+  return mask;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixInsert::init()
+{
+  // insure gravity fix exists
+  // for 3d must point in -z, for 2d must point in -y
+  // else insertion cannot work
+
+  int ifix;
+  for (ifix = 0; ifix < modify->nfix; ifix++)
+    if (strcmp(modify->fix[ifix]->style,"gravity") == 0) break;
+  if (ifix == modify->nfix) 
+    error->all("Must use fix gravity with fix insert");
+
+  double phi = ((FixGravity *) modify->fix[ifix])->phi;
+  double theta = ((FixGravity *) modify->fix[ifix])->theta;
+  double PI = 2.0 * asin(1.0);
+  double degree2rad = 2.0*PI / 360.0;
+  double xgrav = sin(degree2rad * theta) * cos(degree2rad * phi);
+  double ygrav = sin(degree2rad * theta) * sin(degree2rad * phi);
+  double zgrav = cos(degree2rad * theta);
+
+  if (force->dimension == 3) {
+    if (fabs(xgrav) > EPSILON || fabs(ygrav) > EPSILON ||
+	fabs(zgrav+1.0) > EPSILON)
+      error->all("Gravity must point in -z to use with fix insert in 3d");
+  } else {
+    if (fabs(xgrav) > EPSILON || fabs(ygrav+1.0) > EPSILON ||
+	fabs(zgrav) > EPSILON)
+      error->all("Gravity must point in -y to use with fix insert in 2d");
+  }
+
+  // check if a shear history fix exists
+
+  ifix_history = -1;
+  if (force->pair_match("gran/history") || force->pair_match("gran/hertzian"))
+    for (int i = 0; i < modify->nfix; i++)
+      if (strcmp(modify->fix[i]->style,"SHEAR_HISTORY") == 0) ifix_history = i;
+}
+
+/* ----------------------------------------------------------------------
+   perform particle insertion
+------------------------------------------------------------------------- */
+
+void FixInsert::pre_exchange()
+{
+  int i;
+
+  // just return if should not be called on this timestep
+
+  if (next_reneighbor != update->ntimestep) return;
+
+  // nnew = # to insert this timestep
+
+  int nnew = nper;
+  if (ninserted + nnew > ninsert) nnew = ninsert - ninserted;
+
+  // lo/hi current = z bounds of insertion region this timestep
+
+  if (force->dimension == 3) {
+    lo_current = zlo + (update->ntimestep - nfirst) * update->dt * rate;
+    hi_current = zhi + (update->ntimestep - nfirst) * update->dt * rate;
+  } else {
+    lo_current = ylo + (update->ntimestep - nfirst) * update->dt * rate;
+    hi_current = yhi + (update->ntimestep - nfirst) * update->dt * rate;
+  }
+
+  // ncount = # of my atoms that overlap the insertion region
+  // nprevious = total of ncount across all procs
+  
+  int ncount = 0;
+  for (i = 0; i < atom->nlocal; i++)
+    if (overlap(i)) ncount++;
+
+  int nprevious;
+  MPI_Allreduce(&ncount,&nprevious,1,MPI_INT,MPI_SUM,world);
+
+  // xmine is for my atoms
+  // xnear is for atoms from all procs + atoms to be inserted
+
+  double **xmine = 
+    memory->create_2d_double_array(ncount,4,"fix_insert:xmine");
+  double **xnear = 
+    memory->create_2d_double_array(nprevious+nnew,4,"fix_insert:xnear");
+  int nnear = nprevious;
+
+  // setup for allgatherv
+
+  int n = 4*ncount;
+  MPI_Allgather(&n,1,MPI_INT,recvcounts,1,MPI_INT,world);
+
+  displs[0] = 0;
+  for (int iproc = 1; iproc < nprocs; iproc++)
+    displs[iproc] = displs[iproc-1] + recvcounts[iproc-1];
+
+  // load up xmine array
+  
+  double **x = atom->x;
+  double *radius = atom->radius;
+
+  ncount = 0;
+  for (i = 0; i < atom->nlocal; i++)
+    if (overlap(i)) {
+      xmine[ncount][0] = x[i][0];
+      xmine[ncount][1] = x[i][1];
+      xmine[ncount][2] = x[i][2];
+      xmine[ncount][3] = radius[i];
+      ncount++;
+    }
+
+  // perform allgatherv to acquire list of nearby particles on all procs
+
+  double *ptr = NULL;
+  if (ncount) ptr = xmine[0];
+  MPI_Allgatherv(ptr,4*ncount,MPI_DOUBLE,
+		 xnear[0],recvcounts,displs,MPI_DOUBLE,world);
+
+  // insert new atoms into xnear list, one by one
+  // check against all nearby atoms and previously inserted ones
+  // if there is an overlap then try again at same z (3d) or y (2d) coord
+  // else insert by adding to xnear list
+  // max = maximum # of insertion attempts for all particles
+  // h = height, biased to give uniform distribution in time of insertion
+
+  int success;
+  double xtmp,ytmp,ztmp,radtmp,delx,dely,delz,rsq,radsum,rn,h;
+
+  int attempt = 0;
+  int max = nnew * maxattempt;
+  int ntotal = nprevious+nnew;
+
+  while (nnear < ntotal) {
+    rn = random->uniform();
+    h = hi_current - rn*rn * (hi_current-lo_current);
+    radtmp = radius_lo + random->uniform() * (radius_hi-radius_lo);
+    success = 0;
+    while (attempt < max) {
+      attempt++;
+      xyz_random(h,xtmp,ytmp,ztmp);
+      for (i = 0; i < nnear; i++) {
+	delx = xtmp - xnear[i][0];
+	dely = ytmp - xnear[i][1];
+	delz = ztmp - xnear[i][2];
+	rsq = delx*delx + dely*dely + delz*delz;
+	radsum = radtmp + xnear[i][3];
+	if (rsq <= radsum*radsum) break;
+      }
+      if (i == nnear) {
+	success = 1;
+	break;
+      }
+    }
+    if (success) {
+      xnear[nnear][0] = xtmp;
+      xnear[nnear][1] = ytmp;
+      xnear[nnear][2] = ztmp;
+      xnear[nnear][3] = radtmp;
+      nnear++;
+    } else break;
+  }
+
+  // warn if not all insertions were performed
+
+  ninserted += nnear-nprevious;
+  if (nnear - nprevious < nnew && me == 0)
+    error->warning("Less insertions than requested");
+
+  // add new atoms in my sub-box to my arrays
+  // initialize info about the atoms
+  // type, diameter, density set from fix parameters
+  // group mask set to "all" plus fix group
+  // z velocity set to what velocity would be if particle
+  //   had fallen from top of insertion region
+  // this gives continuous stream of atoms
+  // set npartner for new atoms to 0 (assume not touching any others)
+
+  int m;
+  double denstmp,vxtmp,vytmp,vztmp;
+  double g = 1.0;
+
+  for (i = nprevious; i < nnear; i++) {
+    xtmp = xnear[i][0];
+    ytmp = xnear[i][1];
+    ztmp = xnear[i][2];
+    radtmp = xnear[i][3];
+    denstmp = density_lo + random->uniform() * (density_hi-density_lo);
+    if (force->dimension == 3) {
+      vxtmp = vxlo + random->uniform() * (vxhi-vxlo);
+      vytmp = vylo + random->uniform() * (vyhi-vylo);
+      vztmp = vz - sqrt(2.0*g*(hi_current-ztmp));
+    } else {
+      vxtmp = vxlo + random->uniform() * (vxhi-vxlo);
+      vytmp = vy - sqrt(2.0*g*(hi_current-ytmp));
+      vztmp = 0.0;
+    }
+
+    if (xtmp >= domain->subxlo && xtmp < domain->subxhi &&
+	ytmp >= domain->subylo && ytmp < domain->subyhi &&
+	ztmp >= domain->subzlo && ztmp < domain->subzhi) {
+      atom->create_one(ntype,xtmp,ytmp,ztmp);
+      m = atom->nlocal - 1;
+      atom->type[m] = ntype;
+      atom->radius[m] = radtmp;
+      atom->density[m] = denstmp;
+      if (force->dimension == 3) 
+	atom->rmass[m] = 4.0*PI/3.0 * radtmp*radtmp*radtmp * denstmp;
+      else
+	atom->rmass[m] = PI * radtmp*radtmp * denstmp;
+      atom->mask[m] = 1 | groupbit;
+      atom->v[m][0] = vxtmp;
+      atom->v[m][1] = vytmp;
+      atom->v[m][2] = vztmp;
+      if (ifix_history >= 0)
+	((FixShearHistory *) modify->fix[ifix_history])->npartner[m] = 0;
+    }
+  }
+
+  // tag # of new particles grow beyond all previous atoms
+  // reset global natoms
+  // if global map exists, reset it
+
+  atom->tag_extend();
+  atom->natoms += nnear - nprevious;
+  if (atom->map_style) {
+    atom->map_init();
+    atom->map_set();
+  }
+
+  // free local memory
+
+  memory->destroy_2d_double_array(xmine);
+  memory->destroy_2d_double_array(xnear);
+
+  // next timestep to insert
+
+  if (ninserted < ninsert) next_reneighbor += nfreq;
+  else next_reneighbor = 0;
+}
+
+/* ----------------------------------------------------------------------
+   check if particle i could overlap with a particle inserted into region
+   return 1 if yes, 0 if no
+   use maximum diameter for inserted particle
+------------------------------------------------------------------------- */
+
+int FixInsert::overlap(int i)
+{
+  double delta = radius_hi + atom->radius[i];
+  double **x = atom->x;
+
+  if (force->dimension == 3) {
+    if (region_style == 1) {
+      if (x[i][0] < xlo-delta || x[i][0] > xhi+delta ||
+	  x[i][1] < ylo-delta || x[i][1] > yhi+delta ||
+	  x[i][2] < lo_current-delta || x[i][2] > hi_current+delta) return 0;
+    } else {
+      if (x[i][2] < lo_current-delta || x[i][2] > hi_current+delta) return 0;
+      double delx = x[i][0] - xc;
+      double dely = x[i][1] - yc;
+      double rsq = delx*delx + dely*dely;
+      double r = rc + delta;
+      if (rsq > r*r) return 0;
+    }
+  } else {
+      if (x[i][0] < xlo-delta || x[i][0] > xhi+delta ||
+	  x[i][1] < lo_current-delta || x[i][1] > hi_current+delta) return 0;
+  }
+
+  return 1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixInsert::xyz_random(double h, double &x, double &y, double &z)
+{
+  if (force->dimension == 3) {
+    if (region_style == 1) {
+      x = xlo + random->uniform() * (xhi-xlo);
+      y = ylo + random->uniform() * (yhi-ylo);
+      z = h;
+    } else {
+      double r1,r2;
+      while (1) {
+	r1 = random->uniform() - 0.5;
+	r2 = random->uniform() - 0.5;
+	if (r1*r1 + r2*r2 < 0.25) break;
+      }
+      x = xc + 2.0*r1*rc;
+      y = yc + 2.0*r2*rc;
+      z = h;
+    }
+  } else {
+    x = xlo + random->uniform() * (xhi-xlo);
+    y = h;
+    z = 0.0;
+  }
+}
--- a/src/GRANULAR/fix_insert.h
+++ b/src/GRANULAR/fix_insert.h
@ -0,0 +1,58 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef FIX_INSERT_H
+#define FIX_INSERT_H
+
+#include "fix.h"
+
+class RanPark;
+
+class FixInsert : public Fix {
+  friend class PairGranHistory;
+  friend class PairGranHertzian;
+  friend class PairGranNoHistory;
+
+ public:
+  FixInsert(int, char **);
+  ~FixInsert();
+  int setmask();
+  void init();
+  void pre_exchange();
+
+ private:
+  int ninsert,ntype,seed;
+  double radius_lo,radius_hi;
+  double density_lo,density_hi;
+  double volfrac;
+  int maxattempt;
+  int region_style;
+  double rate;
+  double vxlo,vxhi,vylo,vyhi,vy,vz;
+  double xlo,xhi,ylo,yhi,zlo,zhi;
+  double xc,yc,rc;
+
+  int me,nprocs;
+  int *recvcounts,*displs;
+  double PI;
+  int nfreq,nfirst,ninserted,nper;
+  double lo_current,hi_current;
+  int ifix_history;
+
+  RanPark *random;
+
+  int overlap(int);
+  void xyz_random(double, double &, double &, double &);
+};
+
+#endif
--- a/src/GRANULAR/fix_nve_gran.cpp
+++ b/src/GRANULAR/fix_nve_gran.cpp
@ -0,0 +1,123 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "stdio.h"
+#include "string.h"
+#include "fix_nve_gran.h"
+#include "atom.h"
+#include "update.h"
+#include "force.h"
+#include "error.h"
+
+// moments of inertia for sphere and disk
+
+#define INERTIA3D 0.4
+#define INERTIA2D 0.5
+
+/* ---------------------------------------------------------------------- */
+
+FixNVEGran::FixNVEGran(int narg, char **arg) : Fix(narg, arg)
+{
+  if (narg != 3) error->all("Illegal fix nve/gran command");
+
+  if (atom->check_style("granular") == 0)
+    error->all("Must use fix nve/gran with atom style granular");
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixNVEGran::setmask()
+{
+  int mask = 0;
+  mask |= INITIAL_INTEGRATE;
+  mask |= FINAL_INTEGRATE;
+  return mask;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixNVEGran::init()
+{
+  dtv = update->dt;
+  dtf = 0.5 * update->dt * force->ftm2v;
+  if (force->dimension == 3)
+    dtfphi = 0.5 * update->dt * force->ftm2v / INERTIA3D;
+  else
+    dtfphi = 0.5 * update->dt * force->ftm2v / INERTIA2D;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixNVEGran::initial_integrate()
+{
+  double dtfm;
+
+  double **x = atom->x;
+  double **v = atom->v;
+  double **f = atom->f;
+  double **phix = atom->phix;
+  double **phiv = atom->phiv;
+  double **phia = atom->phia;
+  double *rmass = atom->rmass;
+  double *radius = atom->radius;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  for (int i = 0; i < nlocal; i++) {
+    if (mask[i] & groupbit) {
+      dtfm = dtf / rmass[i];
+      v[i][0] += dtfm * f[i][0];
+      v[i][1] += dtfm * f[i][1];
+      v[i][2] += dtfm * f[i][2];
+      x[i][0] += dtv * v[i][0];
+      x[i][1] += dtv * v[i][1];
+      x[i][2] += dtv * v[i][2];
+      dtfm = dtfphi / (radius[i]*radius[i]*rmass[i]);
+      phiv[i][0] += dtfm * phia[i][0];
+      phiv[i][1] += dtfm * phia[i][1];
+      phiv[i][2] += dtfm * phia[i][2];
+      phix[i][0] += dtv * phiv[i][0];
+      phix[i][1] += dtv * phiv[i][1];
+      phix[i][2] += dtv * phiv[i][2];
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixNVEGran::final_integrate()
+{
+  double dtfm;
+
+  double **v = atom->v;
+  double **f = atom->f;
+  double **phiv = atom->phiv;
+  double **phia = atom->phia;
+  double *rmass = atom->rmass;
+  double *radius = atom->radius;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  for (int i = 0; i < nlocal; i++) {
+    if (mask[i] & groupbit) {
+      dtfm = dtf / rmass[i];
+      v[i][0] += dtfm * f[i][0];
+      v[i][1] += dtfm * f[i][1];
+      v[i][2] += dtfm * f[i][2];
+      dtfm = dtfphi / (radius[i]*radius[i]*rmass[i]);
+      phiv[i][0] += dtfm * phia[i][0];
+      phiv[i][1] += dtfm * phia[i][1];
+      phiv[i][2] += dtfm * phia[i][2];
+    }
+  }
+}
--- a/src/GRANULAR/fix_nve_gran.h
+++ b/src/GRANULAR/fix_nve_gran.h
@ -0,0 +1,32 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef FIX_NVE_GRAN_H
+#define FIX_NVE_GRAN_H
+
+#include "fix.h"
+
+class FixNVEGran : public Fix {
+ public:
+  FixNVEGran(int, char **);
+  ~FixNVEGran() {}
+  int setmask();
+  void init();
+  void initial_integrate();
+  void final_integrate();
+
+ private:
+  double dtv,dtf,dtfphi;
+};
+
+#endif
--- a/src/GRANULAR/fix_shear_history.cpp
+++ b/src/GRANULAR/fix_shear_history.cpp
@ -0,0 +1,282 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "string.h"
+#include "stdio.h"
+#include "fix_shear_history.h"
+#include "atom.h"
+#include "neighbor.h"
+#include "force.h"
+#include "update.h"
+#include "modify.h"
+#include "memory.h"
+#include "error.h"
+
+#define MAXTOUCH 15
+
+/* ---------------------------------------------------------------------- */
+
+FixShearHistory::FixShearHistory(int narg, char **arg) : Fix(narg, arg)
+{
+  restart_peratom = 1;
+
+  // perform initial allocation of atom-based arrays
+  // register with atom class
+
+  npartner = NULL;
+  partner = NULL;
+  shearpartner = NULL;
+  grow_arrays(atom->nmax);
+  atom->add_callback(0);
+  atom->add_callback(1);
+
+  // initialize npartner to 0 so neighbor list creation is OK the 1st time
+
+  int nlocal = atom->nlocal;
+  for (int i = 0; i < nlocal; i++) npartner[i] = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+FixShearHistory::~FixShearHistory()
+{
+  // if atom class still exists:
+  //   unregister this fix so atom class doesn't invoke it any more
+
+  if (atom) atom->delete_callback(id,0);
+  if (atom) atom->delete_callback(id,1);
+
+  // delete locally stored arrays
+
+  memory->sfree(npartner);
+  memory->destroy_2d_int_array(partner);
+  memory->destroy_3d_double_array(shearpartner);
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixShearHistory::setmask()
+{
+  int mask = 0;
+  mask |= PRE_EXCHANGE;
+  return mask;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixShearHistory::init()
+{
+  if (atom->tag_enable == 0) 
+    error->all("Pair style granular with history requires atoms have IDs");
+}
+
+/* ----------------------------------------------------------------------
+   copy shear partner info from neighbor lists to atom arrays
+   so can be exchanged with atoms
+------------------------------------------------------------------------- */
+
+void FixShearHistory::pre_exchange()
+{
+  int i,j,k,m;
+
+  // zero npartners for all current atoms
+
+  int nlocal = atom->nlocal;
+  for (i = 0; i < nlocal; i++) npartner[i] = 0;
+
+  // copy shear info from neighbor list atoms to atom arrays
+  // nlocal = nlocal_neighbor = nlocal when neighbor list last built,
+  //   which might be pre-insert on this step
+
+  int numneigh;
+  int *neighs,*touch;
+  double *firstshear,*shear;
+  int *tag = atom->tag;
+  nlocal = neighbor->nlocal_neighbor;
+
+  for (i = 0; i < nlocal; i++) {
+    neighs = neighbor->firstneigh[i];
+    touch = neighbor->firsttouch[i];
+    firstshear = neighbor->firstshear[i];
+    numneigh = neighbor->numneigh[i];
+    for (k = 0; k < numneigh; k++) {
+      if (touch[k]) {
+	shear = &firstshear[3*k];
+	j = neighs[k];
+	if (npartner[i] < MAXTOUCH) {
+	  m = npartner[i];
+	  partner[i][m] = tag[j];
+	  shearpartner[i][m][0] = shear[0];
+	  shearpartner[i][m][1] = shear[1];
+	  shearpartner[i][m][2] = shear[2];
+	}
+	npartner[i]++;
+	if (j < nlocal) {
+	  if (npartner[j] < MAXTOUCH) {
+	    m = npartner[j];
+	    partner[j][m] = tag[i];
+	    shearpartner[j][m][0] = -shear[0];
+	    shearpartner[j][m][1] = -shear[1];
+	    shearpartner[j][m][2] = -shear[2];
+	  }
+	  npartner[j]++;
+	}
+      }
+    }
+  }
+
+  // test for too many touching neighbors
+
+  int flag = 0;
+  for (i = 0; i < nlocal; i++)
+    if (npartner[i] >= MAXTOUCH) flag = 1;
+  int flag_all;
+  MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
+  if (flag_all) error->all("Too many touching neighbors - boost MAXTOUCH");
+}
+
+/* ----------------------------------------------------------------------
+   memory usage of local atom-based arrays
+------------------------------------------------------------------------- */
+
+int FixShearHistory::memory_usage()
+{
+  int nmax = atom->nmax;
+  int bytes = nmax * sizeof(int);
+  bytes += nmax*MAXTOUCH * sizeof(int);
+  bytes += nmax*MAXTOUCH*3 * sizeof(double);
+  return bytes;
+}
+
+/* ----------------------------------------------------------------------
+   allocate local atom-based arrays
+------------------------------------------------------------------------- */
+
+void FixShearHistory::grow_arrays(int nmax)
+{
+  npartner = (int *) memory->srealloc(npartner,nmax*sizeof(int),
+				      "shear_history:npartner");
+  partner = memory->grow_2d_int_array(partner,nmax,MAXTOUCH,
+				      "shear_history:partner");
+  shearpartner = 
+    memory->grow_3d_double_array(shearpartner,nmax,MAXTOUCH,3,
+				 "shear_history:shearpartner");
+}
+
+/* ----------------------------------------------------------------------
+   copy values within local atom-based arrays
+------------------------------------------------------------------------- */
+
+void FixShearHistory::copy_arrays(int i, int j)
+{
+  npartner[j] = npartner[i];
+  for (int m = 0; m < npartner[j]; m++) {
+    partner[j][m] = partner[i][m];
+    shearpartner[j][m][0] = shearpartner[i][m][0];
+    shearpartner[j][m][1] = shearpartner[i][m][1];
+    shearpartner[j][m][2] = shearpartner[i][m][2];
+  }
+}
+
+/* ----------------------------------------------------------------------
+   pack values in local atom-based arrays for exchange with another proc
+------------------------------------------------------------------------- */
+
+int FixShearHistory::pack_exchange(int i, double *buf)
+{
+  int m = 0;
+  buf[m++] = npartner[i];
+  for (int n = 0; n < npartner[i]; n++) {
+    buf[m++] = partner[i][n];
+    buf[m++] = shearpartner[i][n][0];
+    buf[m++] = shearpartner[i][n][1];
+    buf[m++] = shearpartner[i][n][2];
+  }
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   unpack values in local atom-based arrays from exchange with another proc
+------------------------------------------------------------------------- */
+
+int FixShearHistory::unpack_exchange(int nlocal, double *buf)
+{
+  int m = 0;
+  npartner[nlocal] = static_cast<int> (buf[m++]);
+  for (int n = 0; n < npartner[nlocal]; n++) {
+    partner[nlocal][n] = static_cast<int> (buf[m++]);
+    shearpartner[nlocal][n][0] = buf[m++];
+    shearpartner[nlocal][n][1] = buf[m++];
+    shearpartner[nlocal][n][2] = buf[m++];
+  }
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   pack values in local atom-based arrays for restart file
+------------------------------------------------------------------------- */
+
+int FixShearHistory::pack_restart(int i, double *buf)
+{
+  int m = 0;
+  buf[m++] = 4*npartner[i] + 2;
+  buf[m++] = npartner[i];
+  for (int n = 0; n < npartner[i]; n++) {
+    buf[m++] = partner[i][n];
+    buf[m++] = shearpartner[i][n][0];
+    buf[m++] = shearpartner[i][n][1];
+    buf[m++] = shearpartner[i][n][2];
+  }
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   unpack values from atom->extra array to restart the fix
+------------------------------------------------------------------------- */
+
+void FixShearHistory::unpack_restart(int nlocal, int nth)
+{
+  double **extra = atom->extra;
+
+  // skip to Nth set of extra values
+
+  int m = 0;
+  for (int i = 0; i < nth; i++) m += static_cast<int> (extra[nlocal][m]);
+  m++;
+
+  npartner[nlocal] = static_cast<int> (extra[nlocal][m++]);
+  for (int n = 0; n < npartner[nlocal]; n++) {
+    partner[nlocal][n] = static_cast<int> (extra[nlocal][m++]);
+    shearpartner[nlocal][n][0] = extra[nlocal][m++];
+    shearpartner[nlocal][n][1] = extra[nlocal][m++];
+    shearpartner[nlocal][n][2] = extra[nlocal][m++];
+  }
+}
+
+/* ----------------------------------------------------------------------
+   maxsize of any atom's restart data
+------------------------------------------------------------------------- */
+
+int FixShearHistory::maxsize_restart()
+{
+  return 4*MAXTOUCH + 2;
+}
+
+/* ----------------------------------------------------------------------
+   size of atom nlocal's restart data
+------------------------------------------------------------------------- */
+
+int FixShearHistory::size_restart(int nlocal)
+{
+  return 4*npartner[nlocal] + 2;
+}
--- a/src/GRANULAR/fix_wall_gran.cpp
+++ b/src/GRANULAR/fix_wall_gran.cpp
@ -0,0 +1,750 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Leo Silbert (SNL), Gary Grest (SNL)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdlib.h"
+#include "string.h"
+#include "fix_wall_gran.h"
+#include "atom.h"
+#include "update.h"
+#include "force.h"
+#include "modify.h"
+#include "pair_gran_no_history.h"
+#include "pair_gran_history.h"
+#include "pair_gran_hertzian.h"
+#include "memory.h"
+#include "error.h"
+
+#define XPLANE    1
+#define YPLANE    2
+#define ZPLANE    3
+#define ZCYLINDER 4
+
+#define NO_HISTORY 1
+#define HISTORY    2
+#define HERTZIAN   3
+
+#define BIG 1.0e20
+
+#define MIN(A,B) ((A) < (B)) ? (A) : (B)
+#define MAX(A,B) ((A) > (B)) ? (A) : (B)
+
+/* ---------------------------------------------------------------------- */
+
+FixWallGran::FixWallGran(int narg, char **arg) : Fix(narg, arg)
+{
+  if (narg < 4) error->all("Illegal fix wall/gran command");
+  
+  if (atom->check_style("granular") == 0)
+    error->all("Must use fix wall/gran with atom style granular");
+
+  restart_peratom = 1;
+  
+  int iarg;
+  if (strcmp(arg[3],"xplane") == 0) {
+    iarg = 8;
+    if (narg < iarg) error->all("Illegal fix wall/gran command");
+    wallstyle = XPLANE;
+    if (strcmp(arg[4],"NULL") == 0) lo = -BIG;
+    else lo = atof(arg[4]);
+    if (strcmp(arg[5],"NULL") == 0) hi = BIG;
+    else hi = atof(arg[5]);
+    gamman = atof(arg[6]);
+    xmu = atof(arg[7]);
+  } else if (strcmp(arg[3],"yplane") == 0) {
+    iarg = 8;
+    if (narg < iarg) error->all("Illegal fix wall/gran command");
+    wallstyle = YPLANE;
+    if (strcmp(arg[4],"NULL") == 0) lo = -BIG;
+    else lo = atof(arg[4]);
+    if (strcmp(arg[5],"NULL") == 0) hi = BIG;
+    else hi = atof(arg[5]);
+    gamman = atof(arg[6]);
+    xmu = atof(arg[7]);
+  } else if (strcmp(arg[3],"zplane") == 0) {
+    iarg = 8;
+    if (narg < iarg) error->all("Illegal fix wall/gran command");
+    wallstyle = ZPLANE;
+    if (strcmp(arg[4],"NULL") == 0) lo = -BIG;
+    else lo = atof(arg[4]);
+    if (strcmp(arg[5],"NULL") == 0) hi = BIG;
+    else hi = atof(arg[5]);
+    gamman = atof(arg[6]);
+    xmu = atof(arg[7]);
+  } else if (strcmp(arg[3],"zcylinder") == 0) {
+    iarg = 7;
+    if (narg < iarg) error->all("Illegal fix wall/gran command");
+    wallstyle = ZCYLINDER;
+    lo = hi = 0.0;
+    cylradius = atof(arg[4]);
+    gamman = atof(arg[5]);
+    xmu = atof(arg[6]);
+  }
+  
+  // check for trailing keyword/values
+
+  wiggle = 0;
+
+  while (iarg < narg) {
+    if (strcmp(arg[iarg],"wiggle") == 0) {
+      if (iarg+4 > narg) error->all("Illegal fix wall/gran command");
+      if (strcmp(arg[iarg+1],"x") == 0) axis = 0;
+      else if (strcmp(arg[iarg+1],"y") == 0) axis = 1;
+      else if (strcmp(arg[iarg+1],"z") == 0) axis = 2;
+      else error->all("Illegal fix wall/gran command");
+      amplitude = atof(arg[iarg+2]);
+      period = atof(arg[iarg+3]);
+      wiggle = 1;
+      iarg += 4;
+    } else error->all("Illegal fix wall/gran command");
+  }
+
+  if (wallstyle == ZCYLINDER && wiggle)
+    if (axis != 2) error->all("Can only wiggle zcylinder wall in z dim");
+
+  // setup oscillations
+
+  if (wiggle) {
+    double PI = 4.0 * atan(1.0);
+    omega = 2.0*PI / period;
+    time_origin = update->ntimestep;
+  }
+
+  // perform initial allocation of atom-based arrays
+  // register with atom class
+
+  shear = NULL;
+  grow_arrays(atom->nmax);
+  atom->add_callback(0);
+  atom->add_callback(1);
+
+  // initialize as if particle is not touching wall
+
+  int nlocal = atom->nlocal;
+  for (int i = 0; i < nlocal; i++)
+    shear[i][0] = shear[i][1] = shear[i][2] = 0.0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+FixWallGran::~FixWallGran()
+{
+  // if atom class still exists:
+  //   unregister this fix so atom class doesn't invoke it any more
+
+  if (atom) atom->delete_callback(id,0);
+  if (atom) atom->delete_callback(id,1);
+
+  // delete locally stored arrays
+
+  memory->destroy_2d_double_array(shear);
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixWallGran::setmask()
+{
+  int mask = 0;
+  mask |= POST_FORCE;
+  return mask;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixWallGran::init()
+{
+  // set constants that depend on pair style
+
+  Pair *anypair;
+  if (anypair = force->pair_match("gran/no_history")) {
+    historystyle = 0;
+    pairstyle = NO_HISTORY;
+    xkk = ((PairGranNoHistory *) anypair)->xkk;
+    xkkt = ((PairGranNoHistory *) anypair)->xkkt;
+  } else if (anypair = force->pair_match("gran/history")) {
+    historystyle = 1;
+    pairstyle = HISTORY;
+    xkk = ((PairGranHistory *) anypair)->xkk;
+    xkkt = ((PairGranHistory *) anypair)->xkkt;
+  } else if (anypair = force->pair_match("gran/hertzian")) {
+    historystyle = 1;
+    pairstyle = HERTZIAN;
+    xkk = ((PairGranHertzian *) anypair)->xkk;
+    xkkt = ((PairGranHertzian *) anypair)->xkkt;
+  } else
+    error->all("Fix wall/gran can only be used with granular pair style");
+
+  // friction coeffs
+
+  dt = update->dt;
+  gamman_dl = gamman/dt;
+  gammas_dl = 0.5*gamman_dl;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixWallGran::setup()
+{
+  post_force(1);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixWallGran::post_force(int vflag)
+{
+  double vwall[3],dx,dy,dz,del1,del2,delxy,delr,rsq;
+
+  // set position of wall to initial settings and velocity to 0.0
+  // if wiggle, set wall position and velocity accordingly
+
+  double wlo = lo;
+  double whi = hi;
+  vwall[0] = vwall[1] = vwall[2] = 0.0;
+  if (wiggle) {
+    double arg = omega * (update->ntimestep - time_origin) * dt;
+    wlo = lo + amplitude - amplitude*cos(arg);
+    whi = hi + amplitude - amplitude*cos(arg);
+    vwall[axis] = dt * amplitude*omega*sin(arg);
+  }
+
+  // loop over all my atoms
+  // rsq = distance from wall
+  // dx,dy,dz = signed distance from wall
+  //   in cylinder case
+  // skip atom if not close enough to wall
+  //   if wall was set to NULL, it's skipped since lo/hi are infinity
+  // compute force and torque on atom if close enough to wall
+  //   via wall potential matched to pair potential
+  // set shear if pair potential stores history
+
+  double **x = atom->x;
+  double **v = atom->v;
+  double **f = atom->f;
+  double **phiv = atom->phiv;
+  double **phia = atom->phia;
+  double *radius = atom->radius;
+  double *rmass = atom->rmass;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  for (int i = 0; i < nlocal; i++) {
+    if (mask[i] & groupbit) {
+
+      dx = dy = dz = 0.0;
+
+      if (wallstyle == XPLANE) {
+	del1 = x[i][0] - wlo;
+	del2 = whi - x[i][0];
+	if (del1 < del2) dx = del1;
+	else dx = -del2;
+      } else if (wallstyle == YPLANE) {
+	del1 = x[i][1] - wlo;
+	del2 = whi - x[i][1];
+	if (del1 < del2) dy = del1;
+	else dy = -del2;
+      } else if (wallstyle == ZPLANE) {
+	del1 = x[i][2] - wlo;
+	del2 = whi - x[i][2];
+	if (del1 < del2) dz = del1;
+	else dz = -del2;
+      } else if (wallstyle == ZCYLINDER) {
+        delxy = sqrt(x[i][0]*x[i][0] + x[i][1]*x[i][1]);
+	delr = cylradius - delxy;
+	if (delr > radius[i]) dz = cylradius;
+	else {
+	  dx = -delr/delxy * x[i][0];
+	  dy = -delr/delxy * x[i][1];
+	}
+      }
+
+      rsq = dx*dx + dy*dy + dz*dz;
+
+      if (rsq > radius[i]*radius[i]) {
+	if (historystyle) {
+	  shear[i][0] = 0.0;
+	  shear[i][1] = 0.0;
+	  shear[i][2] = 0.0;
+	}
+      } else {
+	if (pairstyle == NO_HISTORY)
+	  no_history(rsq,dx,dy,dz,vwall,v[i],f[i],phiv[i],phia[i],
+		     radius[i],rmass[i]);
+	else if (pairstyle == HISTORY)
+	  history(rsq,dx,dy,dz,vwall,v[i],f[i],phiv[i],phia[i],
+		  radius[i],rmass[i],shear[i]);
+	else if (pairstyle == HERTZIAN)
+	  hertzian(rsq,dx,dy,dz,vwall,v[i],f[i],phiv[i],phia[i],
+		   radius[i],rmass[i],shear[i]);
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixWallGran::no_history(double rsq, double dx, double dy, double dz,
+			     double *vwall, double *v,
+			     double *f, double *phiv, double *phia,
+			     double radius, double mass)
+{
+  double r,vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3;
+  double wr1,wr2,wr3,xmeff,damp,ccel,vtr1,vtr2,vtr3,vrel;
+  double fn,fs,ft,fs1,fs2,fs3,ccelx,ccely,ccelz,tor1,tor2,tor3;
+
+  r = sqrt(rsq);
+
+  // relative translational velocity
+
+  vr1 = v[0] - vwall[0];
+  vr2 = v[1] - vwall[1];
+  vr3 = v[2] - vwall[2];
+
+  vr1 *= dt;
+  vr2 *= dt;
+  vr3 *= dt;
+
+  // normal component
+
+  vnnr = vr1*dx + vr2*dy + vr3*dz;
+  vn1 = dx*vnnr / rsq;
+  vn2 = dy*vnnr / rsq;
+  vn3 = dz*vnnr / rsq;
+
+  // tangential component
+
+  vt1 = vr1 - vn1;
+  vt2 = vr2 - vn2;
+  vt3 = vr3 - vn3;
+
+  // relative rotational velocity
+
+  wr1 = radius*phiv[0];
+  wr2 = radius*phiv[1];
+  wr3 = radius*phiv[2];
+
+  wr1 *= dt/r;
+  wr2 *= dt/r;
+  wr3 *= dt/r;
+
+  // normal damping term
+  // this definition of DAMP includes the extra 1/r term
+
+  xmeff = mass;
+  damp = xmeff*gamman_dl*vnnr/rsq;
+  ccel = xkk*(radius-r)/r - damp;
+
+  // relative velocities
+
+  vtr1 = vt1 - (dz*wr2-dy*wr3);
+  vtr2 = vt2 - (dx*wr3-dz*wr1);
+  vtr3 = vt3 - (dy*wr1-dx*wr2);
+  vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3;
+  vrel = sqrt(vrel);
+
+  // force normalization
+
+  fn = xmu * fabs(ccel*r);
+  fs = xmeff*gammas_dl*vrel;
+  if (vrel != 0.0) ft = MIN(fn,fs) / vrel;
+  else ft = 0.0;
+
+  // shear friction forces
+
+  fs1 = -ft*vtr1;
+  fs2 = -ft*vtr2;
+  fs3 = -ft*vtr3;
+
+  // force components
+
+  ccelx = dx*ccel + fs1;
+  ccely = dy*ccel + fs2;
+  ccelz = dz*ccel + fs3;
+
+  // forces
+
+  f[0] += ccelx;
+  f[1] += ccely;
+  f[2] += ccelz;
+
+  // torques
+
+  tor1 = dy*fs3 - dz*fs2;
+  tor2 = dz*fs1 - dx*fs3;
+  tor3 = dx*fs2 - dy*fs1;
+  phia[0] -= radius*tor1;
+  phia[1] -= radius*tor2;
+  phia[2] -= radius*tor3;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixWallGran::history(double rsq, double dx, double dy, double dz,
+			  double *vwall, double *v,
+			  double *f, double *phiv, double *phia,
+			  double radius, double mass, double *shear)
+{
+  double r,vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3;
+  double wr1,wr2,wr3,xmeff,damp,ccel,vtr1,vtr2,vtr3,vrel;
+  double fn,fs,fs1,fs2,fs3,ccelx,ccely,ccelz,tor1,tor2,tor3;
+  double shrmag,rsht;
+
+  r = sqrt(rsq);
+
+  // relative translational velocity
+
+  vr1 = v[0] - vwall[0];
+  vr2 = v[1] - vwall[1];
+  vr3 = v[2] - vwall[2];
+
+  vr1 *= dt;
+  vr2 *= dt;
+  vr3 *= dt;
+
+  // normal component
+
+  vnnr = vr1*dx + vr2*dy + vr3*dz;
+  vn1 = dx*vnnr / rsq;
+  vn2 = dy*vnnr / rsq;
+  vn3 = dz*vnnr / rsq;
+
+  // tangential component
+
+  vt1 = vr1 - vn1;
+  vt2 = vr2 - vn2;
+  vt3 = vr3 - vn3;
+
+  // relative rotational velocity
+
+  wr1 = radius*phiv[0];
+  wr2 = radius*phiv[1];
+  wr3 = radius*phiv[2];
+
+  wr1 *= dt/r;
+  wr2 *= dt/r;
+  wr3 *= dt/r;
+
+  // normal damping term
+  // this definition of DAMP includes the extra 1/r term
+
+  xmeff = mass;
+  damp = xmeff*gamman_dl*vnnr/rsq;
+  ccel = xkk*(radius-r)/r - damp;
+
+  // relative velocities
+
+  vtr1 = vt1 - (dz*wr2-dy*wr3);
+  vtr2 = vt2 - (dx*wr3-dz*wr1);
+  vtr3 = vt3 - (dy*wr1-dx*wr2);
+  vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3;
+  vrel = sqrt(vrel);
+
+  // shear history effects
+
+  shear[0] += vtr1;
+  shear[1] += vtr2;
+  shear[2] += vtr3;
+  shrmag = sqrt(shear[0]*shear[0] + shear[1]*shear[1] + shear[2]*shear[2]);
+
+  // rotate shear displacements correctly
+
+  rsht = shear[0]*dx + shear[1]*dy + shear[2]*dz;
+  rsht = rsht/rsq;
+  shear[0] -= rsht*dx;
+  shear[1] -= rsht*dy;
+  shear[2] -= rsht*dz;
+
+  // tangential forces
+
+  fs1 = - (xkkt*shear[0] + xmeff*gammas_dl*vtr1);
+  fs2 = - (xkkt*shear[1] + xmeff*gammas_dl*vtr2);
+  fs3 = - (xkkt*shear[2] + xmeff*gammas_dl*vtr3);
+
+  // force normalization
+
+  fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3);
+  fn = xmu * fabs(ccel*r);
+
+  // shrmag is magnitude of shearwall
+  // rescale frictional displacements and forces if needed
+
+  if (fs > fn) {
+    if (shrmag != 0.0) {
+      shear[0] = (fn/fs) * (shear[0] + xmeff*gammas_dl*vtr1/xkkt) - 
+	xmeff*gammas_dl*vtr1/xkkt;
+      shear[1] = (fn/fs) * (shear[1] + xmeff*gammas_dl*vtr2/xkkt) -
+	xmeff*gammas_dl*vtr2/xkkt;
+      shear[2] = (fn/fs) * (shear[2] + xmeff*gammas_dl*vtr3/xkkt) -
+	xmeff*gammas_dl*vtr3/xkkt;
+      fs1 = fs1 * fn / fs ;
+      fs2 = fs2 * fn / fs;
+      fs3 = fs3 * fn / fs;
+    } else fs1 = fs2 = fs3 = 0.0;
+  }
+
+  ccelx = dx*ccel + fs1;
+  ccely = dy*ccel + fs2;
+  ccelz = dz*ccel + fs3;
+
+  // forces
+
+  f[0] += ccelx;
+  f[1] += ccely;
+  f[2] += ccelz;
+
+  // torques
+
+  tor1 = dy*fs3 - dz*fs2;
+  tor2 = dz*fs1 - dx*fs3;
+  tor3 = dx*fs2 - dy*fs1;
+  phia[0] -= radius*tor1;
+  phia[1] -= radius*tor2;
+  phia[2] -= radius*tor3;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixWallGran::hertzian(double rsq, double dx, double dy, double dz,
+			   double *vwall, double *v,
+			   double *f, double *phiv, double *phia,
+			   double radius, double mass, double *shear)
+{
+  double r,vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3;
+  double wr1,wr2,wr3,xmeff,damp,ccel,vtr1,vtr2,vtr3,vrel;
+  double fn,fs,fs1,fs2,fs3,ccelx,ccely,ccelz,tor1,tor2,tor3;
+  double shrmag,rsht,rhertz;
+
+  r = sqrt(rsq);
+
+  // relative translational velocity
+
+  vr1 = v[0] - vwall[0];
+  vr2 = v[1] - vwall[1];
+  vr3 = v[2] - vwall[2];
+
+  vr1 *= dt;
+  vr2 *= dt;
+  vr3 *= dt;
+
+  // normal component
+
+  vnnr = vr1*dx + vr2*dy + vr3*dz;
+  vn1 = dx*vnnr / rsq;
+  vn2 = dy*vnnr / rsq;
+  vn3 = dz*vnnr / rsq;
+
+  // tangential component
+
+  vt1 = vr1 - vn1;
+  vt2 = vr2 - vn2;
+  vt3 = vr3 - vn3;
+
+  // relative rotational velocity
+
+  wr1 = radius*phiv[0];
+  wr2 = radius*phiv[1];
+  wr3 = radius*phiv[2];
+
+  wr1 *= dt/r;
+  wr2 *= dt/r;
+  wr3 *= dt/r;
+
+  // normal damping term
+  // this definition of DAMP includes the extra 1/r term
+
+  xmeff = mass;
+  damp = xmeff*gamman_dl*vnnr/rsq;
+  ccel = xkk*(radius-r)/r - damp;
+  rhertz = sqrt(radius - r);
+  ccel = rhertz * ccel;
+
+  // relative velocities
+
+  vtr1 = vt1 - (dz*wr2-dy*wr3);
+  vtr2 = vt2 - (dx*wr3-dz*wr1);
+  vtr3 = vt3 - (dy*wr1-dx*wr2);
+  vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3;
+  vrel = sqrt(vrel);
+
+  // shear history effects
+
+  shear[0] += vtr1;
+  shear[1] += vtr2;
+  shear[2] += vtr3;
+  shrmag = sqrt(shear[0]*shear[0] + shear[1]*shear[1] + shear[2]*shear[2]);
+
+  // rotate shear displacements correctly
+
+  rsht = shear[0]*dx + shear[1]*dy + shear[2]*dz;
+  rsht = rsht/rsq;
+  shear[0] -= rsht*dx;
+  shear[1] -= rsht*dy;
+  shear[2] -= rsht*dz;
+
+  // tangential forces
+
+  fs1 = -rhertz * (xkkt*shear[0] + xmeff*gammas_dl*vtr1);
+  fs2 = -rhertz * (xkkt*shear[1] + xmeff*gammas_dl*vtr2);
+  fs3 = -rhertz * (xkkt*shear[2] + xmeff*gammas_dl*vtr3);
+
+  // force normalization
+
+  fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3);
+  fn = xmu * fabs(ccel*r);
+
+  // shrmag is magnitude of shearwall
+  // rescale frictional displacements and forces if needed
+
+  if (fs > fn) {
+    if (shrmag != 0.0) {
+      shear[0] = (fn/fs) * (shear[0] + xmeff*gammas_dl*vtr1/xkkt) - 
+	xmeff*gammas_dl*vtr1/xkkt;
+      shear[1] = (fn/fs) * (shear[1] + xmeff*gammas_dl*vtr2/xkkt) -
+	xmeff*gammas_dl*vtr2/xkkt;
+      shear[2] = (fn/fs) * (shear[2] + xmeff*gammas_dl*vtr3/xkkt) -
+	xmeff*gammas_dl*vtr3/xkkt;
+      fs1 = fs1 * fn / fs ;
+      fs2 = fs2 * fn / fs;
+      fs3 = fs3 * fn / fs;
+    } else fs1 = fs2 = fs3 = 0.0;
+  }
+
+  ccelx = dx*ccel + fs1;
+  ccely = dy*ccel + fs2;
+  ccelz = dz*ccel + fs3;
+
+  // forces
+
+  f[0] += ccelx;
+  f[1] += ccely;
+  f[2] += ccelz;
+
+  // torques
+
+  tor1 = dy*fs3 - dz*fs2;
+  tor2 = dz*fs1 - dx*fs3;
+  tor3 = dx*fs2 - dy*fs1;
+  phia[0] -= radius*tor1;
+  phia[1] -= radius*tor2;
+  phia[2] -= radius*tor3;
+}
+
+/* ----------------------------------------------------------------------
+   memory usage of local atom-based arrays 
+------------------------------------------------------------------------- */
+
+int FixWallGran::memory_usage()
+{
+  int nmax = atom->nmax;
+  int bytes = nmax * sizeof(int);
+  bytes += 3*nmax * sizeof(double);
+  return bytes;
+}
+
+/* ----------------------------------------------------------------------
+   allocate local atom-based arrays 
+------------------------------------------------------------------------- */
+
+void FixWallGran::grow_arrays(int nmax)
+{
+  shear = memory->grow_2d_double_array(shear,nmax,3,"fix_wall_gran:shear");
+}
+
+/* ----------------------------------------------------------------------
+   copy values within local atom-based arrays 
+------------------------------------------------------------------------- */
+
+void FixWallGran::copy_arrays(int i, int j)
+{
+  shear[j][0] = shear[i][0];
+  shear[j][1] = shear[i][1];
+  shear[j][2] = shear[i][2];
+}
+
+/* ----------------------------------------------------------------------
+   pack values in local atom-based arrays for exchange with another proc 
+------------------------------------------------------------------------- */
+
+int FixWallGran::pack_exchange(int i, double *buf)
+{
+  buf[0] = shear[i][0];
+  buf[1] = shear[i][1];
+  buf[2] = shear[i][2];
+  return 3;
+}
+
+/* ----------------------------------------------------------------------
+   unpack values into local atom-based arrays after exchange 
+------------------------------------------------------------------------- */
+
+int FixWallGran::unpack_exchange(int nlocal, double *buf)
+{
+  shear[nlocal][0] = buf[0];
+  shear[nlocal][1] = buf[1];
+  shear[nlocal][2] = buf[2];
+  return 3;
+}
+
+/* ----------------------------------------------------------------------
+   pack values in local atom-based arrays for restart file 
+------------------------------------------------------------------------- */
+
+int FixWallGran::pack_restart(int i, double *buf)
+{
+  int m = 0;
+  buf[m++] = 4;
+  buf[m++] = shear[i][0];
+  buf[m++] = shear[i][1];
+  buf[m++] = shear[i][2];
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   unpack values from atom->extra array to restart the fix 
+------------------------------------------------------------------------- */
+
+void FixWallGran::unpack_restart(int nlocal, int nth)
+{
+  double **extra = atom->extra;
+
+  // skip to Nth set of extra values
+
+  int m = 0;
+  for (int i = 0; i < nth; i++) m += static_cast<int> (extra[nlocal][m]);
+  m++;
+
+  shear[nlocal][0] = extra[nlocal][m++];
+  shear[nlocal][1] = extra[nlocal][m++];
+  shear[nlocal][2] = extra[nlocal][m++];
+}
+
+/* ----------------------------------------------------------------------
+   maxsize of any atom's restart data 
+------------------------------------------------------------------------- */
+
+int FixWallGran::maxsize_restart()
+{
+  return 4;
+}
+
+/* ----------------------------------------------------------------------
+   size of atom nlocal's restart data 
+------------------------------------------------------------------------- */
+
+int FixWallGran::size_restart(int nlocal)
+{
+  return 4;
+}
--- a/src/GRANULAR/fix_wall_gran.h
+++ b/src/GRANULAR/fix_wall_gran.h
@ -0,0 +1,58 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef FIX_WALL_GRAN_H
+#define FIX_WALL_GRAN_H
+
+#include "fix.h"
+
+class FixWallGran : public Fix {
+ public:
+  FixWallGran(int, char **);
+  ~FixWallGran();
+  int setmask();
+  void init();
+  void setup();
+  void post_force(int);
+
+  int memory_usage();
+  void grow_arrays(int);
+  void copy_arrays(int, int);
+  int pack_exchange(int, double *);
+  int unpack_exchange(int, double *);
+  int pack_restart(int, double *);
+  void unpack_restart(int, int);
+  int size_restart(int);
+  int maxsize_restart();
+
+ private:
+  int wallstyle,pairstyle,historystyle,wiggle,axis;
+  double xkk,xkkt,gamman,xmu;
+  double lo,hi,cylradius;
+  double dt,gamman_dl,gammas_dl;
+  double amplitude,period,omega,time_origin;
+
+  int *touch;
+  double **shear;
+
+  void no_history(double, double, double, double, double *,
+		  double *, double *, double *, double *, double, double);
+  void history(double, double, double, double, double *,
+	       double *, double *, double *, double *, double, double,
+	       double *);
+  void hertzian(double, double, double, double, double *,
+		double *, double *, double *, double *, double, double,
+		double *);
+};
+
+#endif
--- a/src/GRANULAR/pair_gran_hertzian.cpp
+++ b/src/GRANULAR/pair_gran_hertzian.cpp
@ -0,0 +1,220 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Leo Silbert (SNL), Gary Grest (SNL)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "string.h"
+#include "pair_gran_hertzian.h"
+#include "atom.h"
+#include "force.h"
+#include "neighbor.h"
+
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+
+/* ---------------------------------------------------------------------- */
+
+void PairGranHertzian::compute(int eflag, int vflag)
+{
+  int i,j,k,numneigh;
+  double xtmp,ytmp,ztmp,delx,dely,delz;
+  double radi,radj,radsum,rsq,r,rinv;
+  double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3;
+  double wr1,wr2,wr3;
+  double vtr1,vtr2,vtr3,vrel;
+  double xmeff,damp,ccel,ccelx,ccely,ccelz,tor1,tor2,tor3;
+  double fn,fs,fs1,fs2,fs3;
+  double shrmag,rsht,rhertz;
+  int *neighs,*touch;
+  double *firstshear,*shear;
+
+  double **f = atom->f;
+  double **x = atom->x;
+  double **v = atom->v;
+  double **phiv = atom->phiv;
+  double **phia = atom->phia;
+  double *radius = atom->radius;
+  double *rmass = atom->rmass;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  int newton_pair = force->newton_pair;
+
+  // loop over neighbors of my atoms
+
+  for (i = 0; i < nlocal; i++) {
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    radi = radius[i];
+    neighs = neighbor->firstneigh[i];
+    touch = neighbor->firsttouch[i];
+    firstshear = neighbor->firstshear[i];
+    numneigh = neighbor->numneigh[i];
+
+    for (k = 0; k < numneigh; k++) {
+      j = neighs[k];
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      radj = radius[j];
+      radsum = radi + radj;
+
+      if (rsq >= radsum*radsum) {
+
+	// unset touching neighbors
+
+        touch[k] = 0;
+	shear = &firstshear[3*k];
+        shear[0] = 0.0;
+        shear[1] = 0.0;
+        shear[2] = 0.0;
+
+      } else {
+	r = sqrt(rsq);
+
+	// relative translational velocity
+
+	vr1 = v[i][0] - v[j][0];
+	vr2 = v[i][1] - v[j][1];
+	vr3 = v[i][2] - v[j][2];
+
+	vr1 *= dt;
+	vr2 *= dt;
+	vr3 *= dt;
+
+	// normal component
+
+	vnnr = vr1*delx + vr2*dely + vr3*delz;
+	vn1 = delx*vnnr / rsq;
+	vn2 = dely*vnnr / rsq;
+	vn3 = delz*vnnr / rsq;
+
+	// tangential component
+
+	vt1 = vr1 - vn1;
+	vt2 = vr2 - vn2;
+	vt3 = vr3 - vn3;
+
+	// relative rotational velocity
+
+	wr1 = radi*phiv[i][0] + radj*phiv[j][0];
+	wr2 = radi*phiv[i][1] + radj*phiv[j][1];
+	wr3 = radi*phiv[i][2] + radj*phiv[j][2];
+
+	wr1 *= dt/r;
+	wr2 *= dt/r;
+	wr3 *= dt/r;
+
+	// normal damping term
+	// this definition of DAMP includes the extra 1/r term
+
+	xmeff = rmass[i]*rmass[j] / (rmass[i]+rmass[j]);
+	if (mask[i] & freeze_group_bit) xmeff = rmass[j];
+	if (mask[j] & freeze_group_bit) xmeff = rmass[i];
+	damp = xmeff*gamman_dl*vnnr/rsq;
+	ccel = xkk*(radsum-r)/r - damp;
+	rhertz = sqrt(radsum - r);
+	ccel = rhertz * ccel;
+
+	// relative velocities
+
+	vtr1 = vt1 - (delz*wr2-dely*wr3);
+	vtr2 = vt2 - (delx*wr3-delz*wr1);
+	vtr3 = vt3 - (dely*wr1-delx*wr2);
+	vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3;
+	vrel = sqrt(vrel);
+
+	// shear history effects
+	// shrmag = magnitude of shear
+
+	touch[k] = 1;
+	shear = &firstshear[3*k];
+        shear[0] += vtr1;
+        shear[1] += vtr2;
+        shear[2] += vtr3;
+        shrmag = sqrt(shear[0]*shear[0] + shear[1]*shear[1] + 
+		      shear[2]*shear[2]);
+
+	// rotate shear displacements correctly
+
+	rsht = shear[0]*delx + shear[1]*dely + shear[2]*delz;
+	rsht /= rsq;
+        shear[0] -= rsht*delx;
+        shear[1] -= rsht*dely;
+        shear[2] -= rsht*delz;
+
+	// tangential forces
+
+        fs1 = -rhertz * (xkkt*shear[0] + xmeff*gammas_dl*vtr1);
+        fs2 = -rhertz * (xkkt*shear[1] + xmeff*gammas_dl*vtr2);
+        fs3 = -rhertz * (xkkt*shear[2] + xmeff*gammas_dl*vtr3);
+
+	// force normalization
+	// rescale frictional displacements and forces if needed
+
+	fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3);
+	fn = xmu * fabs(ccel*r);
+
+	if (fs > fn) {
+	  if (shrmag != 0.0) {
+	    shear[0] = (fn/fs) * (shear[0] + xmeff*gammas_dl*vtr1/xkkt) -
+	      xmeff*gammas_dl*vtr1/xkkt;
+	    shear[1] = (fn/fs) * (shear[1] + xmeff*gammas_dl*vtr2/xkkt) -
+	      xmeff*gammas_dl*vtr2/xkkt;
+	    shear[2] = (fn/fs) * (shear[2] + xmeff*gammas_dl*vtr3/xkkt) -
+	      xmeff*gammas_dl*vtr3/xkkt;
+	    fs1 *= fn/fs;
+	    fs2 *= fn/fs;
+	    fs3 *= fn/fs;
+	  } else {
+	    fs1 = 0.0;
+	    fs2 = 0.0;
+	    fs3 = 0.0;
+	  }
+	}
+
+	// forces & torques
+
+	ccelx = delx*ccel + fs1;
+	ccely = dely*ccel + fs2;
+	ccelz = delz*ccel + fs3;
+	f[i][0] += ccelx;
+	f[i][1] += ccely;
+	f[i][2] += ccelz;
+
+	rinv = 1/r;
+	tor1 = rinv * (dely*fs3 - delz*fs2);
+	tor2 = rinv * (delz*fs1 - delx*fs3);
+	tor3 = rinv * (delx*fs2 - dely*fs1);
+	phia[i][0] -= radi*tor1;
+	phia[i][1] -= radi*tor2;
+	phia[i][2] -= radi*tor3;
+
+	if (newton_pair || j < nlocal) {
+	  f[j][0] -= ccelx;
+	  f[j][1] -= ccely;
+	  f[j][2] -= ccelz;
+	  phia[j][0] -= radj*tor1;
+	  phia[j][1] -= radj*tor2;
+	  phia[j][2] -= radj*tor3;
+	}
+      }
+    }
+  }
+}
--- a/src/GRANULAR/pair_gran_hertzian.h
+++ b/src/GRANULAR/pair_gran_hertzian.h
@ -0,0 +1,24 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef PAIR_GRAN_HERTZIAN_H
+#define PAIR_GRAN_HERTZIAN_H
+
+#include "pair_gran_history.h"
+
+class PairGranHertzian : public PairGranHistory {
+ public:
+  void compute(int, int);
+};
+
+#endif
--- a/src/GRANULAR/pair_gran_history.cpp
+++ b/src/GRANULAR/pair_gran_history.cpp
@ -0,0 +1,454 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Leo Silbert (SNL), Gary Grest (SNL)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "pair_gran_history.h"
+#include "atom.h"
+#include "domain.h"
+#include "force.h"
+#include "update.h"
+#include "modify.h"
+#include "fix.h"
+#include "fix_insert.h"
+#include "comm.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "error.h"
+
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+
+/* ---------------------------------------------------------------------- */
+
+PairGranHistory::PairGranHistory()
+{
+  single_enable = 0;
+
+  for (int i = 0; i < 6; i++) virial[i] = 0.0;
+  ifix_history = -1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairGranHistory::~PairGranHistory()
+{
+  if (ifix_history >= 0) modify->delete_fix("SHEAR_HISTORY");
+
+  if (allocated) {
+    memory->destroy_2d_int_array(setflag);
+    memory->destroy_2d_double_array(cutsq);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairGranHistory::compute(int eflag, int vflag)
+{
+  int i,j,k,numneigh;
+  double xtmp,ytmp,ztmp,delx,dely,delz;
+  double radi,radj,radsum,rsq,r,rinv;
+  double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3;
+  double wr1,wr2,wr3;
+  double vtr1,vtr2,vtr3,vrel;
+  double xmeff,damp,ccel,ccelx,ccely,ccelz,tor1,tor2,tor3;
+  double fn,fs,fs1,fs2,fs3;
+  double shrmag,rsht;
+  int *neighs,*touch;
+  double *firstshear,*shear;
+
+  double **f = atom->f;
+  double **x = atom->x;
+  double **v = atom->v;
+  double **phiv = atom->phiv;
+  double **phia = atom->phia;
+  double *radius = atom->radius;
+  double *rmass = atom->rmass;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  int newton_pair = force->newton_pair;
+
+  // loop over neighbors of my atoms
+
+  for (i = 0; i < nlocal; i++) {
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    radi = radius[i];
+    neighs = neighbor->firstneigh[i];
+    touch = neighbor->firsttouch[i];
+    firstshear = neighbor->firstshear[i];
+    numneigh = neighbor->numneigh[i];
+
+    for (k = 0; k < numneigh; k++) {
+      j = neighs[k];
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      radj = radius[j];
+      radsum = radi + radj;
+
+      if (rsq >= radsum*radsum) {
+
+	// unset touching neighbors
+
+        touch[k] = 0;
+	shear = &firstshear[3*k];
+        shear[0] = 0.0;
+        shear[1] = 0.0;
+        shear[2] = 0.0;
+
+      } else {
+	r = sqrt(rsq);
+
+	// relative translational velocity
+
+	vr1 = v[i][0] - v[j][0];
+	vr2 = v[i][1] - v[j][1];
+	vr3 = v[i][2] - v[j][2];
+
+ 	vr1 *= dt;
+ 	vr2 *= dt;
+ 	vr3 *= dt;
+
+	// normal component
+
+	vnnr = vr1*delx + vr2*dely + vr3*delz;
+	vn1 = delx*vnnr / rsq;
+	vn2 = dely*vnnr / rsq;
+	vn3 = delz*vnnr / rsq;
+
+	// tangential component
+
+	vt1 = vr1 - vn1;
+	vt2 = vr2 - vn2;
+	vt3 = vr3 - vn3;
+
+	// relative rotational velocity
+
+	wr1 = radi*phiv[i][0] + radj*phiv[j][0];
+	wr2 = radi*phiv[i][1] + radj*phiv[j][1];
+	wr3 = radi*phiv[i][2] + radj*phiv[j][2];
+
+	wr1 *= dt/r;
+	wr2 *= dt/r;
+	wr3 *= dt/r;
+
+	// normal damping term
+	// this definition of DAMP includes the extra 1/r term
+
+	xmeff = rmass[i]*rmass[j] / (rmass[i]+rmass[j]);
+	if (mask[i] & freeze_group_bit) xmeff = rmass[j];
+	if (mask[j] & freeze_group_bit) xmeff = rmass[i];
+	damp = xmeff*gamman_dl*vnnr/rsq;
+	ccel = xkk*(radsum-r)/r - damp;
+
+	// relative velocities
+
+	vtr1 = vt1 - (delz*wr2-dely*wr3);
+	vtr2 = vt2 - (delx*wr3-delz*wr1);
+	vtr3 = vt3 - (dely*wr1-delx*wr2);
+	vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3;
+	vrel = sqrt(vrel);
+
+	// shear history effects
+	// shrmag = magnitude of shear
+
+	touch[k] = 1;
+	shear = &firstshear[3*k];
+        shear[0] += vtr1;
+        shear[1] += vtr2;
+        shear[2] += vtr3;
+        shrmag = sqrt(shear[0]*shear[0] + shear[1]*shear[1] +
+		      shear[2]*shear[2]);
+
+	// rotate shear displacements correctly
+
+	rsht = shear[0]*delx + shear[1]*dely + shear[2]*delz;
+	rsht /= rsq;
+        shear[0] -= rsht*delx;
+        shear[1] -= rsht*dely;
+        shear[2] -= rsht*delz;
+
+	// tangential forces
+
+	fs1 = - (xkkt*shear[0] + xmeff*gammas_dl*vtr1);
+	fs2 = - (xkkt*shear[1] + xmeff*gammas_dl*vtr2);
+	fs3 = - (xkkt*shear[2] + xmeff*gammas_dl*vtr3);
+
+	// force normalization
+	// rescale frictional displacements and forces if needed
+
+	fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3);
+	fn = xmu * fabs(ccel*r);
+
+	if (fs > fn) {
+	  if (shrmag != 0.0) {
+	    shear[0] = (fn/fs) * (shear[0] + xmeff*gammas_dl*vtr1/xkkt) -
+	      xmeff*gammas_dl*vtr1/xkkt;
+	    shear[1] = (fn/fs) * (shear[1] + xmeff*gammas_dl*vtr2/xkkt) -
+	      xmeff*gammas_dl*vtr2/xkkt;
+	    shear[2] = (fn/fs) * (shear[2] + xmeff*gammas_dl*vtr3/xkkt) -
+	      xmeff*gammas_dl*vtr3/xkkt;
+	    fs1 *= fn/fs;
+	    fs2 *= fn/fs;
+	    fs3 *= fn/fs;
+	  } else {
+	    fs1 = 0.0;
+	    fs2 = 0.0;
+	    fs3 = 0.0;
+	  }
+	}
+
+	// forces & torques
+
+	ccelx = delx*ccel + fs1;
+	ccely = dely*ccel + fs2;
+	ccelz = delz*ccel + fs3;
+	f[i][0] += ccelx;
+	f[i][1] += ccely;
+	f[i][2] += ccelz;
+
+	rinv = 1/r;
+	tor1 = rinv * (dely*fs3 - delz*fs2);
+	tor2 = rinv * (delz*fs1 - delx*fs3);
+	tor3 = rinv * (delx*fs2 - dely*fs1);
+	phia[i][0] -= radi*tor1;
+	phia[i][1] -= radi*tor2;
+	phia[i][2] -= radi*tor3;
+
+	if (newton_pair || j < nlocal) {
+	  f[j][0] -= ccelx;
+	  f[j][1] -= ccely;
+	  f[j][2] -= ccelz;
+	  phia[j][0] -= radj*tor1;
+	  phia[j][1] -= radj*tor2;
+	  phia[j][2] -= radj*tor3;
+	}
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays
+------------------------------------------------------------------------- */
+
+void PairGranHistory::allocate()
+{
+  allocated = 1;
+  int n = atom->ntypes;
+
+  setflag = memory->create_2d_int_array(n+1,n+1,"pair:setflag");
+  for (int i = 1; i <= n; i++)
+    for (int j = i; j <= n; j++)
+      setflag[i][j] = 0;
+
+  cutsq = memory->create_2d_double_array(n+1,n+1,"pair:cutsq");
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void PairGranHistory::settings(int narg, char **arg)
+{
+  if (domain->box_exist == 0)
+    error->all("Pair_style granular command before simulation box is defined");
+  if (narg != 4) error->all("Illegal pair_style command");
+
+  xkk = atof(arg[0]);
+  gamman = atof(arg[1]);
+  xmu = atof(arg[2]);
+  dampflag = atoi(arg[3]);
+
+  // granular styles do not use pair_coeff, so set setflag for everything now
+
+  if (!allocated) allocate();
+
+  int i,j;
+  for (i = 1; i <= atom->ntypes; i++)
+    for (j = i; j <= atom->ntypes; j++)
+      setflag[i][j] = 1;
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more type pairs
+------------------------------------------------------------------------- */
+
+void PairGranHistory::coeff(int narg, char **arg)
+{
+  error->all("Granular pair styles do not use pair_coeff settings");
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+double PairGranHistory::init_one(int i, int j)
+{
+  if (!allocated) allocate();
+
+  // return dummy value used in neighbor setup,
+  // but not in actual neighbor calculation
+  // since particles have variable radius
+
+  return 1.0;
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairGranHistory::init_style()
+{
+  int i;
+
+  xkkt = xkk * 2.0/7.0;
+  dt = update->dt;
+  double gammas = 0.5*gamman;
+  if (dampflag == 0) gammas = 0.0;
+  gamman_dl = gamman/dt;
+  gammas_dl = gammas/dt;
+
+  // check that atom style is granular
+  // else compute() will update illegal arrays
+
+  if (atom->check_style("granular") == 0)
+    error->all("Must use atom style granular with pair style granular");
+
+  // for pair choices with shear history:
+  //   check if newton flag is valid
+  //   if first init, create Fix needed for storing shear history
+
+  int history = 0;
+  if (force->pair_match("gran/history") || force->pair_match("gran/hertzian"))
+    history = 1;
+  
+  if (history && force->newton_pair == 1)
+    error->all("Potential with shear history requires newton pair off");
+
+  if (history && ifix_history == -1) {
+    char **fixarg = new char*[3];
+    fixarg[0] = "SHEAR_HISTORY";
+    fixarg[1] = "all";
+    fixarg[2] = "SHEAR_HISTORY";
+    modify->add_fix(3,fixarg);
+    delete [] fixarg;
+  }
+
+  // find associated SHEAR_HISTORY fix that must exist
+  // could have changed locations in fix list since created
+
+  if (history) {
+    for (i = 0; i < modify->nfix; i++)
+      if (strcmp(modify->fix[i]->style,"SHEAR_HISTORY") == 0) ifix_history = i;
+  }
+
+  // check for freeze Fix and set freeze_group_bit
+
+  for (i = 0; i < modify->nfix; i++)
+    if (strcmp(modify->fix[i]->style,"freeze") == 0) break;
+  if (i < modify->nfix) freeze_group_bit = modify->fix[i]->groupbit;
+  else freeze_group_bit = 0;
+
+  // set cutoff by largest particles
+  // maxrad_dynamic = radius of largest dynamic particle, including inserted
+  // maxrad_frozen = radius of largest dynamic particle
+  // include frozen-dynamic interactions
+  // do not include frozen-frozen interactions
+  // include future inserted particles as dynamic
+  // cutforce was already set in pair::init(), but this sets it correctly
+
+  double *radius = atom->radius;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  double maxrad_dynamic = 0.0;
+  for (i = 0; i < nlocal; i++)
+    if (!(mask[i] & freeze_group_bit))
+      maxrad_dynamic = MAX(maxrad_dynamic,radius[i]);
+  double mine = maxrad_dynamic;
+  MPI_Allreduce(&mine,&maxrad_dynamic,1,MPI_DOUBLE,MPI_MAX,world);
+
+  for (i = 0; i < modify->nfix; i++)
+    if (strcmp(modify->fix[i]->style,"insert") == 0)
+      maxrad_dynamic =
+	MAX(maxrad_dynamic,((FixInsert *) modify->fix[i])->radius_hi);
+  
+  double maxrad_frozen = 0.0;
+  for (i = 0; i < nlocal; i++)
+    if (mask[i] & freeze_group_bit)
+      maxrad_frozen = MAX(maxrad_frozen,radius[i]);
+  mine = maxrad_frozen;
+  MPI_Allreduce(&mine,&maxrad_frozen,1,MPI_DOUBLE,MPI_MAX,world);
+
+  cutforce = maxrad_dynamic + MAX(maxrad_dynamic,maxrad_frozen);
+}
+
+/* ----------------------------------------------------------------------
+  proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairGranHistory::write_restart(FILE *fp)
+{
+  write_restart_settings(fp);
+}
+
+/* ----------------------------------------------------------------------
+  proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairGranHistory::read_restart(FILE *fp)
+{
+  read_restart_settings(fp);
+  allocate();
+}
+
+/* ----------------------------------------------------------------------
+  proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairGranHistory::write_restart_settings(FILE *fp)
+{
+  fwrite(&xkk,sizeof(double),1,fp);
+  fwrite(&gamman,sizeof(double),1,fp);
+  fwrite(&xmu,sizeof(double),1,fp);
+  fwrite(&dampflag,sizeof(int),1,fp);
+}
+
+/* ----------------------------------------------------------------------
+  proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairGranHistory::read_restart_settings(FILE *fp)
+{
+  if (comm->me == 0) {
+    fread(&xkk,sizeof(double),1,fp);
+    fread(&gamman,sizeof(double),1,fp);
+    fread(&xmu,sizeof(double),1,fp);
+    fread(&dampflag,sizeof(int),1,fp);
+  }
+  MPI_Bcast(&xkk,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&gamman,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&xmu,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&dampflag,1,MPI_INT,0,world);
+}
--- a/src/GRANULAR/pair_gran_history.h
+++ b/src/GRANULAR/pair_gran_history.h
@ -0,0 +1,48 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef PAIR_GRAN_HISTORY_H
+#define PAIR_GRAN_HISTORY_H
+
+#include "pair.h"
+
+class PairGranHistory : public Pair {
+  friend class Neighbor;
+  friend class FixWallGran;
+  friend class FixGranDiag;
+  friend class FixInsert;
+
+ public:
+  PairGranHistory();
+  ~PairGranHistory();
+  virtual void compute(int, int);
+  void settings(int, char **);
+  void coeff(int, char **);
+  double init_one(int, int);
+  void init_style();
+  void write_restart(FILE *);
+  void read_restart(FILE *);
+  void write_restart_settings(FILE *);
+  void read_restart_settings(FILE *);
+
+ protected:
+  double xkk,xkkt,xmu;
+  int dampflag;
+  double gamman;
+  double dt,gamman_dl,gammas_dl;
+  int ifix_history,freeze_group_bit;
+
+  void allocate();
+};
+
+#endif
--- a/src/GRANULAR/pair_gran_no_history.cpp
+++ b/src/GRANULAR/pair_gran_no_history.cpp
@ -0,0 +1,168 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Leo Silbert (SNL), Gary Grest (SNL)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "string.h"
+#include "pair_gran_no_history.h"
+#include "atom.h"
+#include "force.h"
+#include "neighbor.h"
+
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+
+/* ---------------------------------------------------------------------- */
+
+void PairGranNoHistory::compute(int eflag, int vflag)
+{
+  int i,j,k,numneigh;
+  double xtmp,ytmp,ztmp,delx,dely,delz;
+  double radi,radj,radsum,rsq,r,rinv;
+  double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3;
+  double wr1,wr2,wr3;
+  double vtr1,vtr2,vtr3,vrel;
+  double xmeff,damp,ccel,ccelx,ccely,ccelz,tor1,tor2,tor3;
+  double fn,fs,ft,fs1,fs2,fs3;
+  int *neighs;
+
+  double **f = atom->f;
+  double **x = atom->x;
+  double **v = atom->v;
+  double **phiv = atom->phiv;
+  double **phia = atom->phia;
+  double *radius = atom->radius;
+  double *rmass = atom->rmass;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  int newton_pair = force->newton_pair;
+
+  // loop over neighbors of my atoms
+
+  for (i = 0; i < nlocal; i++) {
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    radi = radius[i];
+    neighs = neighbor->firstneigh[i];
+    numneigh = neighbor->numneigh[i];
+
+    for (k = 0; k < numneigh; k++) {
+      j = neighs[k];
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      radj = radius[j];
+      radsum = radi + radj;
+
+      if (rsq < radsum*radsum) {
+	r = sqrt(rsq);
+
+	// relative translational velocity
+
+	vr1 = v[i][0] - v[j][0];
+	vr2 = v[i][1] - v[j][1];
+	vr3 = v[i][2] - v[j][2];
+
+	vr1 *= dt;
+	vr2 *= dt;
+	vr3 *= dt;
+
+	// normal component
+
+	vnnr = vr1*delx + vr2*dely + vr3*delz;
+	vn1 = delx*vnnr / rsq;
+	vn2 = dely*vnnr / rsq;
+	vn3 = delz*vnnr / rsq;
+
+	// tangential component
+
+	vt1 = vr1 - vn1;
+	vt2 = vr2 - vn2;
+	vt3 = vr3 - vn3;
+
+	// relative rotational velocity
+
+	wr1 = radi*phiv[i][0] + radj*phiv[j][0];
+	wr2 = radi*phiv[i][1] + radj*phiv[j][1];
+	wr3 = radi*phiv[i][2] + radj*phiv[j][2];
+
+	wr1 *= dt/r;
+	wr2 *= dt/r;
+	wr3 *= dt/r;
+
+	// normal damping term
+	// this definition of DAMP includes the extra 1/r term
+
+	xmeff = rmass[i]*rmass[j] / (rmass[i]+rmass[j]);
+	if (mask[i] & freeze_group_bit) xmeff = rmass[j];
+	if (mask[j] & freeze_group_bit) xmeff = rmass[i];
+	damp = xmeff*gamman_dl*vnnr/rsq;
+	ccel = xkk*(radsum-r)/r - damp;
+
+	// relative velocities
+
+	vtr1 = vt1 - (delz*wr2-dely*wr3);
+	vtr2 = vt2 - (delx*wr3-delz*wr1);
+	vtr3 = vt3 - (dely*wr1-delx*wr2);
+	vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3;
+	vrel = sqrt(vrel);
+
+	// force normalization
+
+	fn = xmu * fabs(ccel*r);
+	fs = xmeff*gammas_dl*vrel;
+	if (vrel != 0.0) ft = MIN(fn,fs) / vrel;
+	else ft = 0.0;
+
+	// shear friction forces
+
+	fs1 = -ft*vtr1;
+	fs2 = -ft*vtr2;
+	fs3 = -ft*vtr3;
+
+	// forces & torques
+
+	ccelx = delx*ccel + fs1;
+	ccely = dely*ccel + fs2;
+	ccelz = delz*ccel + fs3;
+	f[i][0] += ccelx;
+	f[i][1] += ccely;
+	f[i][2] += ccelz;
+
+	rinv = 1/r;
+	tor1 = rinv * (dely*fs3 - delz*fs2);
+	tor2 = rinv * (delz*fs1 - delx*fs3);
+	tor3 = rinv * (delx*fs2 - dely*fs1);
+	phia[i][0] -= radi*tor1;
+	phia[i][1] -= radi*tor2;
+	phia[i][2] -= radi*tor3;
+
+	if (newton_pair || j < nlocal) {
+	  f[j][0] -= ccelx;
+	  f[j][1] -= ccely;
+	  f[j][2] -= ccelz;
+	  phia[j][0] -= radj*tor1;
+	  phia[j][1] -= radj*tor2;
+	  phia[j][2] -= radj*tor3;
+	}
+      }
+    }
+  }
+}
--- a/src/GRANULAR/pair_gran_no_history.h
+++ b/src/GRANULAR/pair_gran_no_history.h
@ -0,0 +1,24 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef PAIR_GRAN_NO_HISTORY_H
+#define PAIR_GRAN_NO_HISTORY_H
+
+#include "pair_gran_history.h"
+
+class PairGranNoHistory : public PairGranHistory {
+ public:
+  void compute(int, int);
+};
+
+#endif
--- a/src/GRANULAR/style_granular.h
+++ b/src/GRANULAR/style_granular.h
@ -0,0 +1,50 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef AtomInclude
+#include "atom_granular.h"
+#endif
+
+#ifdef AtomClass
+AtomStyle(granular,AtomGranular)
+# endif
+
+#ifdef FixInclude
+#include "fix_freeze.h"
+#include "fix_gran_diag.h"
+#include "fix_insert.h"
+#include "fix_nve_gran.h"
+#include "fix_shear_history.h"
+#include "fix_wall_gran.h"
+#endif
+
+#ifdef FixClass
+FixStyle(freeze,FixFreeze)
+FixStyle(gran/diag,FixGranDiag)
+FixStyle(insert,FixInsert)
+FixStyle(nve/gran,FixNVEGran)
+FixStyle(SHEAR_HISTORY,FixShearHistory)
+FixStyle(wall/gran,FixWallGran)
+#endif
+
+#ifdef PairInclude
+#include "pair_gran_hertzian.h"
+#include "pair_gran_history.h"
+#include "pair_gran_no_history.h"
+#endif
+
+#ifdef PairClass
+PairStyle(gran/hertzian,PairGranHertzian)
+PairStyle(gran/history,PairGranHistory)
+PairStyle(gran/no_history,PairGranNoHistory)
+#endif
--- a/src/KSPACE/Install.csh
+++ b/src/KSPACE/Install.csh
@ -0,0 +1,62 @@
+# Install/unInstall package classes in LAMMPS
+
+# pair_lj_charmm_coul_long.h must always be in src
+
+if ($1 == 1) then
+
+  cp style_kspace.h ..
+
+  cp ewald.cpp ..
+  cp pppm.cpp ..
+  cp pppm_tip4p.cpp ..
+  cp pair_buck_coul_long.cpp ..
+  cp pair_lj_cut_coul_long.cpp ..
+  cp pair_lj_cut_coul_long_tip4p.cpp ..
+  cp pair_lj_charmm_coul_long.cpp ..
+  cp fft3d.cpp ..
+  cp fft3d_wrap.cpp ..
+  cp remap.cpp ..
+  cp remap_wrap.cpp ..
+
+  cp ewald.h ..
+  cp pppm.h ..
+  cp pppm_tip4p.h ..
+  cp pair_buck_coul_long.h ..
+  cp pair_lj_cut_coul_long.h ..
+  cp pair_lj_cut_coul_long_tip4p.h ..
+#  cp pair_lj_charmm_coul_long.h ..
+  cp fft3d.h ..
+  cp fft3d_wrap.h ..
+  cp remap.h ..
+  cp remap_wrap.h ..
+
+else if ($1 == 0) then
+
+  rm ../style_kspace.h
+  touch ../style_kspace.h
+
+  rm ../ewald.cpp
+  rm ../pppm.cpp
+  rm ../pppm_tip4p.cpp
+  rm ../pair_buck_coul_long.cpp
+  rm ../pair_lj_cut_coul_long.cpp
+  rm ../pair_lj_cut_coul_long_tip4p.cpp
+  rm ../pair_lj_charmm_coul_long.cpp
+  rm ../fft3d.cpp
+  rm ../fft3d_wrap.cpp
+  rm ../remap.cpp
+  rm ../remap_wrap.cpp
+
+  rm ../ewald.h
+  rm ../pppm.h
+  rm ../pppm_tip4p.h
+  rm ../pair_buck_coul_long.h
+  rm ../pair_lj_cut_coul_long.h
+  rm ../pair_lj_cut_coul_long_tip4p.h
+#  rm ../pair_lj_charmm_coul_long.h
+  rm ../fft3d.h
+  rm ../fft3d_wrap.h
+  rm ../remap.h
+  rm ../remap_wrap.h
+
+endif
--- a/src/KSPACE/ewald.cpp
+++ b/src/KSPACE/ewald.cpp
@ -0,0 +1,846 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL)
+------------------------------------------------------------------------- */
+
+#include "mpi.h"
+#include "stdlib.h"
+#include "stdio.h"
+#include "math.h"
+#include "ewald.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "pair_buck_coul_long.h"
+#include "pair_lj_cut_coul_long.h"
+#include "pair_lj_charmm_coul_long.h"
+#include "pair_lj_class2_coul_long.h"
+#include "pair_table.h"
+#include "domain.h"
+#include "memory.h"
+#include "error.h"
+
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+
+/* ---------------------------------------------------------------------- */
+
+Ewald::Ewald(int narg, char **arg) : KSpace(narg, arg)
+{
+  if (narg != 1) error->all("Illegal kspace_style ewald command");
+
+  precision = atof(arg[0]);
+  PI = 4.0*atan(1.0);
+
+  kmax = 0;
+  kxvecs = kyvecs = kzvecs = NULL;
+  ug = NULL;
+  eg = vg = NULL;
+  sfacrl = sfacim = sfacrl_all = sfacim_all = NULL;
+
+  nmax = 0;
+  ek = NULL;
+  cs = sn = NULL;
+
+  kcount = 0;
+}
+
+/* ----------------------------------------------------------------------
+   free all memory 
+------------------------------------------------------------------------- */
+
+Ewald::~Ewald()
+{
+  deallocate();
+  memory->destroy_2d_double_array(ek);
+  memory->destroy_3d_double_array(cs,-kmax_created);
+  memory->destroy_3d_double_array(sn,-kmax_created);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void Ewald::init()
+{
+  if (comm->me == 0) {
+    if (screen) fprintf(screen,"Ewald initialization ...\n");
+    if (logfile) fprintf(logfile,"Ewald initialization ...\n");
+  }
+
+  // error check
+
+  if (force->dimension == 2) error->all("Cannot use Ewald with 2d simulation");
+
+  if (slabflag == 0 && domain->nonperiodic > 0)
+    error->all("Cannot use nonperiodic boundaries with Ewald");
+  if (slabflag == 1) {
+    if (domain->xperiodic != 1 || domain->yperiodic != 1 || 
+	domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
+      error->all("Incorrect boundaries with slab Ewald");
+  }
+
+  // insure use of pair_style with long-range Coulombics
+  // set cutoff to short-range Coulombic cutoff
+
+  qqrd2e = force->qqrd2e;
+
+  double cutoff;
+
+  Pair *anypair;
+  if (force->pair == NULL) 
+    error->all("KSpace style is incompatible with Pair style");
+  else if (anypair = force->pair_match("buck/coul/long"))
+    cutoff = ((PairBuckCoulLong *) anypair)->cut_coul;
+  else if (anypair = force->pair_match("lj/cut/coul/long"))
+    cutoff = ((PairLJCutCoulLong *) anypair)->cut_coul;
+  else if (anypair = force->pair_match("lj/charmm/coul/long"))
+    cutoff = ((PairLJCharmmCoulLong *) anypair)->cut_coul;
+  else if (anypair = force->pair_match("lj/class2/coul/long"))
+    cutoff = ((PairLJClass2CoulLong *) anypair)->cut_coul;
+  else if (anypair = force->pair_match("table"))
+    cutoff = ((PairTable *) anypair)->cut_coul();
+  else error->all("KSpace style is incompatible with Pair style");
+
+  // compute qsum & qsqsum
+
+  double tmp;
+
+  qsum = 0.0;
+  for (int i = 0; i < atom->nlocal; i++) qsum += atom->q[i];
+  MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  qsum = tmp;
+
+  qsqsum = 0.0;
+  for (int i = 0; i < atom->nlocal; i++) qsqsum += atom->q[i]*atom->q[i];
+  MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  qsqsum = tmp;
+
+  // setup K-space resolution
+
+  g_ewald = (1.35 - 0.15*log(precision))/cutoff;
+  gsqmx = -4.0*g_ewald*g_ewald*log(precision);
+
+  if (comm->me == 0) {
+    if (screen) fprintf(screen,"  G vector = %g\n",g_ewald);
+    if (logfile) fprintf(logfile,"  G vector = %g\n",g_ewald);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   adjust Ewald coeffs, called initially and whenever volume has changed 
+------------------------------------------------------------------------- */
+
+void Ewald::setup()
+{
+  // volume-dependent factors
+
+  double xprd = domain->xprd;
+  double yprd = domain->yprd;
+  double zprd = domain->zprd;
+  
+  // adjustment of z dimension for 2d slab Ewald
+  // 3d Ewald just uses zprd since slab_volfactor = 1.0
+
+  double zprd_slab = zprd*slab_volfactor;
+  volume = xprd * yprd * zprd_slab;
+
+  unitk[0] = 2.0*PI/xprd;
+  unitk[1] = 2.0*PI/yprd;
+  unitk[2] = 2.0*PI/zprd_slab;
+
+  // determine kmax
+  // function of current box size, precision, G_ewald (short-range cutoff)
+
+  int nkxmx = static_cast<int> ((g_ewald*xprd/PI) * sqrt(-log(precision)));
+  int nkymx = static_cast<int> ((g_ewald*yprd/PI) * sqrt(-log(precision)));
+  int nkzmx = static_cast<int> ((g_ewald*zprd_slab/PI) * sqrt(-log(precision)));
+
+  int kmax_old = kmax;
+  kmax = MAX(nkxmx,nkymx);
+  kmax = MAX(kmax,nkzmx);
+  kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax;
+
+  // if size has grown, reallocate k-dependent and nlocal-dependent arrays
+
+  if (kmax > kmax_old) {
+    deallocate();
+    allocate();
+
+    memory->destroy_2d_double_array(ek);
+    memory->destroy_3d_double_array(cs,-kmax_created);
+    memory->destroy_3d_double_array(sn,-kmax_created);
+    nmax = atom->nmax;
+    ek = memory->create_2d_double_array(nmax,3,"ewald:ek");
+    cs = memory->create_3d_double_array(-kmax,kmax,3,nmax,"ewald:cs");
+    sn = memory->create_3d_double_array(-kmax,kmax,3,nmax,"ewald:sn");
+    kmax_created = kmax;
+  }
+
+  // pre-compute Ewald coefficients
+
+  int kcount_old = kcount;
+  coeffs();
+
+  // if array sizes changed, print out new sizes
+
+  if (kmax != kmax_old || kcount != kcount_old) {
+    if (comm->me == 0) {
+      if (screen) fprintf(screen,"  vectors: actual 1d max = %d %d %d\n",
+			  kcount,kmax,kmax3d);
+      if (logfile) fprintf(logfile,"  vectors: actual 1d max = %d %d %d\n",
+			   kcount,kmax,kmax3d);
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   compute the Ewald long-range force, energy, virial 
+------------------------------------------------------------------------- */
+
+void Ewald::compute(int eflag, int vflag)
+{
+  int i,k,n;
+
+  energy = 0.0;
+  if (vflag) for (n = 0; n < 6; n++) virial[n] = 0.0;
+
+  // extend size of nlocal-dependent arrays if necessary
+
+  int nlocal = atom->nlocal;
+  if (nlocal > nmax) {
+    memory->destroy_2d_double_array(ek);
+    memory->destroy_3d_double_array(cs,-kmax_created);
+    memory->destroy_3d_double_array(sn,-kmax_created);
+    nmax = atom->nmax;
+    ek = memory->create_2d_double_array(nmax,3,"ewald:ek");
+    cs = memory->create_3d_double_array(-kmax,kmax,3,nmax,"ewald:cs");
+    sn = memory->create_3d_double_array(-kmax,kmax,3,nmax,"ewald:sn");
+    kmax_created = kmax;
+  }
+
+  // partial structure factors on each processor
+  // total structure factor by summing over procs
+
+  eik_dot_r();
+  MPI_Allreduce(sfacrl,sfacrl_all,kcount,MPI_DOUBLE,MPI_SUM,world);
+  MPI_Allreduce(sfacim,sfacim_all,kcount,MPI_DOUBLE,MPI_SUM,world);
+
+  // K-space portion of electric field
+  // double loop over K-vectors and local atoms
+
+  double **f = atom->f;
+  double *q = atom->q;
+
+  int kx,ky,kz;
+  double cypz,sypz,exprl,expim,partial;
+
+  for (i = 0; i < nlocal; i++) {
+    ek[i][0] = 0.0;
+    ek[i][1] = 0.0;
+    ek[i][2] = 0.0;
+  }
+
+  for (k = 0; k < kcount; k++) {
+    kx = kxvecs[k];
+    ky = kyvecs[k];
+    kz = kzvecs[k];
+
+    for (i = 0; i < nlocal; i++) {
+      cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i];
+      sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i];
+      exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz;
+      expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz;
+      partial = expim*sfacrl_all[k] - exprl*sfacim_all[k];
+      ek[i][0] += partial*eg[k][0];
+      ek[i][1] += partial*eg[k][1];
+      ek[i][2] += partial*eg[k][2];
+    }
+  }
+
+  // convert E-field to force
+
+  for (i = 0; i < nlocal; i++) {
+    f[i][0] += qqrd2e*q[i]*ek[i][0];
+    f[i][1] += qqrd2e*q[i]*ek[i][1];
+    f[i][2] += qqrd2e*q[i]*ek[i][2];
+  }
+ 
+  // energy if requested
+
+  if (eflag) {
+    for (k = 0; k < kcount; k++)
+      energy += ug[k] * (sfacrl_all[k]*sfacrl_all[k] + 
+			 sfacim_all[k]*sfacim_all[k]);
+    PI = 4.0*atan(1.0);
+    energy -= g_ewald*qsqsum/1.772453851 + 
+      0.5*PI*qsum*qsum / (g_ewald*g_ewald*volume);
+    energy *= qqrd2e;
+  }
+
+  // virial if requested
+
+  if (vflag) {
+    double uk;
+    for (k = 0; k < kcount; k++) {
+      uk = ug[k] * (sfacrl_all[k]*sfacrl_all[k] + sfacim_all[k]*sfacim_all[k]);
+      for (n = 0; n < 6; n++) virial[n] += uk*vg[k][n];
+    }
+    for (n = 0; n < 6; n++) virial[n] *= qqrd2e;
+  }
+
+  if (slabflag) slabcorr(eflag);
+  
+}
+
+/* ---------------------------------------------------------------------- */
+
+void Ewald::eik_dot_r()
+{
+  int i,k,l,m,n,ic;
+  double cstr1,sstr1,cstr2,sstr2,cstr3,sstr3,cstr4,sstr4;
+  double sqk,clpm,slpm;
+
+  double **x = atom->x;
+  double *q = atom->q;
+  int nlocal = atom->nlocal;
+
+  n = 0;
+
+  // (k,0,0), (0,l,0), (0,0,m)
+
+  for (ic = 0; ic < 3; ic++) {
+    sqk = unitk[ic]*unitk[ic];
+    if (sqk <= gsqmx) {
+      cstr1 = 0.0;
+      sstr1 = 0.0;
+      for (i = 0; i < nlocal; i++) {
+	cs[0][ic][i] = 1.0;
+	sn[0][ic][i] = 0.0;
+	cs[1][ic][i] = cos(unitk[ic]*x[i][ic]);
+	sn[1][ic][i] = sin(unitk[ic]*x[i][ic]);
+	cs[-1][ic][i] = cs[1][ic][i];
+	sn[-1][ic][i] = -sn[1][ic][i];
+	cstr1 += q[i]*cs[1][ic][i];
+	sstr1 += q[i]*sn[1][ic][i];
+      }
+      sfacrl[n] = cstr1;
+      sfacim[n++] = sstr1;
+    }
+  }
+
+  for (m = 2; m <= kmax; m++) {
+    for (ic = 0; ic < 3; ic++) {
+      sqk = m*unitk[ic] * m*unitk[ic];
+      if (sqk <= gsqmx) {
+	cstr1 = 0.0;
+	sstr1 = 0.0;
+	for (i = 0; i < nlocal; i++) {
+	  cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] - 
+	    sn[m-1][ic][i]*sn[1][ic][i];
+	  sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] + 
+	    cs[m-1][ic][i]*sn[1][ic][i];
+	  cs[-m][ic][i] = cs[m][ic][i];
+	  sn[-m][ic][i] = -sn[m][ic][i];
+	  cstr1 += q[i]*cs[m][ic][i];
+	  sstr1 += q[i]*sn[m][ic][i];
+	}
+	sfacrl[n] = cstr1;
+	sfacim[n++] = sstr1;
+      }
+    }
+  }
+
+  // 1 = (k,l,0), 2 = (k,-l,0)
+
+  for (k = 1; k <= kmax; k++) {
+    for (l = 1; l <= kmax; l++) {
+      sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]);
+      if (sqk <= gsqmx) {
+	cstr1 = 0.0;
+	sstr1 = 0.0;
+	cstr2 = 0.0;
+	sstr2 = 0.0;
+	for (i = 0; i < nlocal; i++) {
+	  cstr1 += q[i]*(cs[k][0][i]*cs[l][1][i] - sn[k][0][i]*sn[l][1][i]);
+	  sstr1 += q[i]*(sn[k][0][i]*cs[l][1][i] + cs[k][0][i]*sn[l][1][i]);
+	  cstr2 += q[i]*(cs[k][0][i]*cs[l][1][i] + sn[k][0][i]*sn[l][1][i]);
+	  sstr2 += q[i]*(sn[k][0][i]*cs[l][1][i] - cs[k][0][i]*sn[l][1][i]);
+	}
+	sfacrl[n] = cstr1;
+	sfacim[n++] = sstr1;
+	sfacrl[n] = cstr2;
+	sfacim[n++] = sstr2;
+      }
+    }
+  }
+
+  // 1 = (0,l,m), 2 = (0,l,-m)
+
+  for (l = 1; l <= kmax; l++) {
+    for (m = 1; m <= kmax; m++) {
+      sqk = (l*unitk[1] * l*unitk[1]) + (m*unitk[2] * m*unitk[2]);
+      if (sqk <= gsqmx) {
+	cstr1 = 0.0;
+	sstr1 = 0.0;
+	cstr2 = 0.0;
+	sstr2 = 0.0;
+	for (i = 0; i < nlocal; i++) {
+	  cstr1 += q[i]*(cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]);
+	  sstr1 += q[i]*(sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]);
+	  cstr2 += q[i]*(cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]);
+	  sstr2 += q[i]*(sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]);
+	}
+	sfacrl[n] = cstr1;
+	sfacim[n++] = sstr1;
+	sfacrl[n] = cstr2;
+	sfacim[n++] = sstr2;
+      }
+    }
+  }
+
+  // 1 = (k,0,m), 2 = (k,0,-m)
+
+  for (k = 1; k <= kmax; k++) {
+    for (m = 1; m <= kmax; m++) {
+      sqk = (k*unitk[0] * k*unitk[0]) + (m*unitk[2] * m*unitk[2]);
+      if (sqk <= gsqmx) {
+	cstr1 = 0.0;
+	sstr1 = 0.0;
+	cstr2 = 0.0;
+	sstr2 = 0.0;
+	for (i = 0; i < nlocal; i++) {
+	  cstr1 += q[i]*(cs[k][0][i]*cs[m][2][i] - sn[k][0][i]*sn[m][2][i]);
+	  sstr1 += q[i]*(sn[k][0][i]*cs[m][2][i] + cs[k][0][i]*sn[m][2][i]);
+	  cstr2 += q[i]*(cs[k][0][i]*cs[m][2][i] + sn[k][0][i]*sn[m][2][i]);
+	  sstr2 += q[i]*(sn[k][0][i]*cs[m][2][i] - cs[k][0][i]*sn[m][2][i]);
+	}
+	sfacrl[n] = cstr1;
+	sfacim[n++] = sstr1;
+	sfacrl[n] = cstr2;
+	sfacim[n++] = sstr2;
+      }
+    }
+  }
+
+  // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m)
+
+  for (k = 1; k <= kmax; k++) {
+    for (l = 1; l <= kmax; l++) {
+      for (m = 1; m <= kmax; m++) {
+	sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]) +
+	  (m*unitk[2] * m*unitk[2]);
+	if (sqk <= gsqmx) {
+	  cstr1 = 0.0;
+	  sstr1 = 0.0;
+	  cstr2 = 0.0;
+	  sstr2 = 0.0;
+	  cstr3 = 0.0;
+	  sstr3 = 0.0;
+	  cstr4 = 0.0;
+	  sstr4 = 0.0;
+	  for (i = 0; i < nlocal; i++) {
+	    clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i];
+	    slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i];
+	    cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+	    sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+	    
+	    clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i];
+	    slpm = -sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i];
+	    cstr2 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+	    sstr2 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+	    
+	    clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i];
+	    slpm = sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i];
+	    cstr3 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+	    sstr3 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+	    
+	    clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i];
+	    slpm = -sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i];
+	    cstr4 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+	    sstr4 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+	  }
+	  sfacrl[n] = cstr1;
+	  sfacim[n++] = sstr1;
+	  sfacrl[n] = cstr2;
+	  sfacim[n++] = sstr2;
+	  sfacrl[n] = cstr3;
+	  sfacim[n++] = sstr3;
+	  sfacrl[n] = cstr4;
+	  sfacim[n++] = sstr4;
+	}
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   pre-compute coefficients for each Ewald K-vector 
+------------------------------------------------------------------------- */
+
+void Ewald::coeffs()
+{
+  int k,l,m;
+  double sqk,vterm;
+
+  double unitkx = unitk[0];
+  double unitky = unitk[1];
+  double unitkz = unitk[2];
+  double g_ewald_sq_inv = 1.0 / (g_ewald*g_ewald);
+  double preu = 4.0*PI/volume;
+
+  kcount = 0;
+
+  // (k,0,0), (0,l,0), (0,0,m)
+
+  for (m = 1; m <= kmax; m++) {
+    sqk = (m*unitkx) * (m*unitkx);
+    if (sqk <= gsqmx) {
+      kxvecs[kcount] = m;
+      kyvecs[kcount] = 0;
+      kzvecs[kcount] = 0;
+      ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+      eg[kcount][0] = 2.0*unitkx*m*ug[kcount];
+      eg[kcount][1] = 0.0;
+      eg[kcount][2] = 0.0;
+      vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+      vg[kcount][0] = 1.0 + vterm*(unitkx*m)*(unitkx*m);
+      vg[kcount][1] = 1.0;
+      vg[kcount][2] = 1.0;
+      vg[kcount][3] = 0.0;
+      vg[kcount][4] = 0.0;
+      vg[kcount][5] = 0.0;
+      kcount++;
+    }
+    sqk = (m*unitky) * (m*unitky);
+    if (sqk <= gsqmx) {
+      kxvecs[kcount] = 0;
+      kyvecs[kcount] = m;
+      kzvecs[kcount] = 0;
+      ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+      eg[kcount][0] = 0.0;
+      eg[kcount][1] = 2.0*unitky*m*ug[kcount];
+      eg[kcount][2] = 0.0;
+      vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+      vg[kcount][0] = 1.0;
+      vg[kcount][1] = 1.0 + vterm*(unitky*m)*(unitky*m);
+      vg[kcount][2] = 1.0;
+      vg[kcount][3] = 0.0;
+      vg[kcount][4] = 0.0;
+      vg[kcount][5] = 0.0;
+      kcount++;
+    }
+    sqk = (m*unitkz) * (m*unitkz);
+    if (sqk <= gsqmx) {
+      kxvecs[kcount] = 0;
+      kyvecs[kcount] = 0;
+      kzvecs[kcount] = m;
+      ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+      eg[kcount][0] = 0.0;
+      eg[kcount][1] = 0.0;
+      eg[kcount][2] = 2.0*unitkz*m*ug[kcount];
+      vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+      vg[kcount][0] = 1.0;
+      vg[kcount][1] = 1.0;
+      vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m);
+      vg[kcount][3] = 0.0;
+      vg[kcount][4] = 0.0;
+      vg[kcount][5] = 0.0;
+      kcount++;
+    }
+  }
+
+  // 1 = (k,l,0), 2 = (k,-l,0)
+
+  for (k = 1; k <= kmax; k++) {
+    for (l = 1; l <= kmax; l++) {
+      sqk = (unitkx*k) * (unitkx*k) + (unitky*l) * (unitky*l);
+      if (sqk <= gsqmx) {
+	kxvecs[kcount] = k;
+	kyvecs[kcount] = l;
+	kzvecs[kcount] = 0;
+	ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+	eg[kcount][0] = 2.0*unitkx*k*ug[kcount];
+	eg[kcount][1] = 2.0*unitky*l*ug[kcount];
+	eg[kcount][2] = 0.0;
+	vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+	vg[kcount][0] = 1.0 + vterm*(unitkx*k)*(unitkx*k);
+	vg[kcount][1] = 1.0 + vterm*(unitky*l)*(unitky*l);
+	vg[kcount][2] = 1.0;
+	vg[kcount][3] = vterm*unitkx*k*unitky*l;
+	vg[kcount][4] = 0.0;
+	vg[kcount][5] = 0.0;
+	kcount++;
+
+	kxvecs[kcount] = k;
+	kyvecs[kcount] = -l;
+	kzvecs[kcount] = 0;
+	ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+	eg[kcount][0] = 2.0*unitkx*k*ug[kcount];
+	eg[kcount][1] = -2.0*unitky*l*ug[kcount];
+	eg[kcount][2] = 0.0;
+	vg[kcount][0] = 1.0 + vterm*(unitkx*k)*(unitkx*k);
+	vg[kcount][1] = 1.0 + vterm*(unitky*l)*(unitky*l);
+	vg[kcount][2] = 1.0;
+	vg[kcount][3] = -vterm*unitkx*k*unitky*l;
+	vg[kcount][4] = 0.0;
+	vg[kcount][5] = 0.0;
+	kcount++;;
+      }
+    }
+  }
+
+  // 1 = (0,l,m), 2 = (0,l,-m)
+
+  for (l = 1; l <= kmax; l++) {
+    for (m = 1; m <= kmax; m++) {
+      sqk = (unitky*l) * (unitky*l) + (unitkz*m) * (unitkz*m);
+      if (sqk <= gsqmx) {
+	kxvecs[kcount] = 0;
+	kyvecs[kcount] = l;
+	kzvecs[kcount] = m;
+	ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+	eg[kcount][0] =  0.0;
+	eg[kcount][1] =  2.0*unitky*l*ug[kcount];
+	eg[kcount][2] =  2.0*unitkz*m*ug[kcount];
+	vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+	vg[kcount][0] = 1.0;
+	vg[kcount][1] = 1.0 + vterm*(unitky*l)*(unitky*l);
+	vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m);
+	vg[kcount][3] = 0.0;
+	vg[kcount][4] = 0.0;
+	vg[kcount][5] = vterm*unitky*l*unitkz*m;
+	kcount++;
+
+	kxvecs[kcount] = 0;
+	kyvecs[kcount] = l;
+	kzvecs[kcount] = -m;
+	ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+	eg[kcount][0] =  0.0;
+	eg[kcount][1] =  2.0*unitky*l*ug[kcount];
+	eg[kcount][2] = -2.0*unitkz*m*ug[kcount];
+	vg[kcount][0] = 1.0;
+	vg[kcount][1] = 1.0 + vterm*(unitky*l)*(unitky*l);
+	vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m);
+	vg[kcount][3] = 0.0;
+	vg[kcount][4] = 0.0;
+	vg[kcount][5] = -vterm*unitky*l*unitkz*m;
+	kcount++;
+      }
+    }
+  }
+
+  // 1 = (k,0,m), 2 = (k,0,-m)
+
+  for (k = 1; k <= kmax; k++) {
+    for (m = 1; m <= kmax; m++) {
+      sqk = (unitkx*k) * (unitkx*k) + (unitkz*m) * (unitkz*m);
+      if (sqk <= gsqmx) {
+	kxvecs[kcount] = k;
+	kyvecs[kcount] = 0;
+	kzvecs[kcount] = m;
+	ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+	eg[kcount][0] =  2.0*unitkx*k*ug[kcount];
+	eg[kcount][1] =  0.0;
+	eg[kcount][2] =  2.0*unitkz*m*ug[kcount];
+	vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+	vg[kcount][0] = 1.0 + vterm*(unitkx*k)*(unitkx*k);
+	vg[kcount][1] = 1.0;
+	vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m);
+	vg[kcount][3] = 0.0;
+	vg[kcount][4] = vterm*unitkx*k*unitkz*m;
+	vg[kcount][5] = 0.0;
+	kcount++;
+
+	kxvecs[kcount] = k;
+	kyvecs[kcount] = 0;
+	kzvecs[kcount] = -m;
+	ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+	eg[kcount][0] =  2.0*unitkx*k*ug[kcount];
+	eg[kcount][1] =  0.0;
+	eg[kcount][2] = -2.0*unitkz*m*ug[kcount];
+	vg[kcount][0] = 1.0 + vterm*(unitkx*k)*(unitkx*k);
+	vg[kcount][1] = 1.0;
+	vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m);
+	vg[kcount][3] = 0.0;
+	vg[kcount][4] = -vterm*unitkx*k*unitkz*m;
+	vg[kcount][5] = 0.0;
+	kcount++;
+      }
+    }
+  }
+
+  // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m)
+
+  for (k = 1; k <= kmax; k++) {
+    for (l = 1; l <= kmax; l++) {
+      for (m = 1; m <= kmax; m++) {
+	sqk = (unitkx*k) * (unitkx*k) + (unitky*l) * (unitky*l) + 
+	  (unitkz*m) * (unitkz*m);
+	if (sqk <= gsqmx) {
+	  kxvecs[kcount] = k;
+	  kyvecs[kcount] = l;
+	  kzvecs[kcount] = m;
+	  ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+	  eg[kcount][0] = 2.0*unitkx*k*ug[kcount];
+	  eg[kcount][1] = 2.0*unitky*l*ug[kcount];
+	  eg[kcount][2] = 2.0*unitkz*m*ug[kcount];
+	  vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+	  vg[kcount][0] = 1.0 + vterm*(unitkx*k)*(unitkx*k);
+	  vg[kcount][1] = 1.0 + vterm*(unitky*l)*(unitky*l);
+	  vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m);
+	  vg[kcount][3] = vterm*unitkx*k*unitky*l;
+	  vg[kcount][4] = vterm*unitkx*k*unitkz*m;
+	  vg[kcount][5] = vterm*unitky*l*unitkz*m;
+	  kcount++;
+
+	  kxvecs[kcount] = k;
+	  kyvecs[kcount] = -l;
+	  kzvecs[kcount] = m;
+	  ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+	  eg[kcount][0] = 2.0*unitkx*k*ug[kcount];
+	  eg[kcount][1] = -2.0*unitky*l*ug[kcount];
+	  eg[kcount][2] = 2.0*unitkz*m*ug[kcount];
+	  vg[kcount][0] = 1.0 + vterm*(unitkx*k)*(unitkx*k);
+	  vg[kcount][1] = 1.0 + vterm*(unitky*l)*(unitky*l);
+	  vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m);
+	  vg[kcount][3] = -vterm*unitkx*k*unitky*l;
+	  vg[kcount][4] = vterm*unitkx*k*unitkz*m;
+	  vg[kcount][5] = -vterm*unitky*l*unitkz*m;
+	  kcount++;
+
+	  kxvecs[kcount] = k;
+	  kyvecs[kcount] = l;
+	  kzvecs[kcount] = -m;
+	  ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+	  eg[kcount][0] = 2.0*unitkx*k*ug[kcount];
+	  eg[kcount][1] = 2.0*unitky*l*ug[kcount];
+	  eg[kcount][2] = -2.0*unitkz*m*ug[kcount];
+	  vg[kcount][0] = 1.0 + vterm*(unitkx*k)*(unitkx*k);
+	  vg[kcount][1] = 1.0 + vterm*(unitky*l)*(unitky*l);
+	  vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m);
+	  vg[kcount][3] = vterm*unitkx*k*unitky*l;
+	  vg[kcount][4] = -vterm*unitkx*k*unitkz*m;
+	  vg[kcount][5] = -vterm*unitky*l*unitkz*m;
+	  kcount++;
+
+	  kxvecs[kcount] = k;
+	  kyvecs[kcount] = -l;
+	  kzvecs[kcount] = -m;
+	  ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+	  eg[kcount][0] = 2.0*unitkx*k*ug[kcount];
+	  eg[kcount][1] = -2.0*unitky*l*ug[kcount];
+	  eg[kcount][2] = -2.0*unitkz*m*ug[kcount];
+	  vg[kcount][0] = 1.0 + vterm*(unitkx*k)*(unitkx*k);
+	  vg[kcount][1] = 1.0 + vterm*(unitky*l)*(unitky*l);
+	  vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m);
+	  vg[kcount][3] = -vterm*unitkx*k*unitky*l;
+	  vg[kcount][4] = -vterm*unitkx*k*unitkz*m;
+	  vg[kcount][5] = vterm*unitky*l*unitkz*m;
+	  kcount++;;
+	}
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   allocate memory that depends on # of K-vectors 
+------------------------------------------------------------------------- */
+
+void Ewald::allocate()
+{
+  kxvecs = new int[kmax3d];
+  kyvecs = new int[kmax3d];
+  kzvecs = new int[kmax3d];
+
+  ug = new double[kmax3d];
+  eg = memory->create_2d_double_array(kmax3d,3,"ewald:eg");
+  vg = memory->create_2d_double_array(kmax3d,6,"ewald:vg");
+
+  sfacrl = new double[kmax3d];
+  sfacim = new double[kmax3d];
+  sfacrl_all = new double[kmax3d];
+  sfacim_all = new double[kmax3d];
+}
+
+/* ----------------------------------------------------------------------
+   deallocate memory that depends on # of K-vectors 
+------------------------------------------------------------------------- */
+
+void Ewald::deallocate()
+{
+  delete [] kxvecs;
+  delete [] kyvecs;
+  delete [] kzvecs;
+  
+  delete [] ug;
+  memory->destroy_2d_double_array(eg);
+  memory->destroy_2d_double_array(vg);
+
+  delete [] sfacrl;
+  delete [] sfacim;
+  delete [] sfacrl_all;
+  delete [] sfacim_all;
+}
+
+/* ----------------------------------------------------------------------
+   Slab-geometry correction term to dampen inter-slab interactions between
+   periodically repeating slabs.  Yields good approximation to 2-D Ewald if 
+   adequate empty space is left between repeating slabs (J. Chem. Phys. 
+   111, 3155).  Slabs defined here to be parallel to the xy plane. 
+------------------------------------------------------------------------- */
+
+void Ewald::slabcorr(int eflag)
+{
+  // compute local contribution to global dipole moment
+  
+  double *q = atom->q;
+  double **x = atom->x;
+  int nlocal = atom->nlocal;
+
+  double dipole = 0.0;
+  for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
+  
+  // sum local contributions to get global dipole moment
+
+  double dipole_all;
+  MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
+
+  // compute corrections
+  
+  double e_slabcorr = 2.0*PI*dipole_all*dipole_all/volume;
+  
+  if (eflag) energy += qqrd2e*e_slabcorr;
+
+  // add on force corrections
+
+  double ffact = -4.0*PI*dipole_all/volume; 
+  double **f = atom->f;
+
+  for (int i = 0; i < nlocal; i++) f[i][2] += qqrd2e*q[i]*ffact;
+}
+
+/* ----------------------------------------------------------------------
+   memory usage of local arrays 
+------------------------------------------------------------------------- */
+
+int Ewald::memory_usage()
+{
+  int bytes = 3 * kmax3d * sizeof(int);
+  bytes += (1 + 3 + 6) * kmax3d * sizeof(double);
+  bytes += 4 * kmax3d * sizeof(double);
+  bytes += nmax*3 * sizeof(double);
+  bytes += 2 * (2*kmax+1)*3*nmax * sizeof(double);
+  return bytes;
+}
--- a/src/KSPACE/ewald.h
+++ b/src/KSPACE/ewald.h
@ -0,0 +1,52 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef EWALD_H
+#define EWALD_H
+
+#include "kspace.h"
+
+class Ewald : public KSpace {
+ public:
+  Ewald(int, char **);
+  ~Ewald();
+  void init();
+  void setup();
+  void compute(int, int);
+  int memory_usage();
+
+ private:
+  double PI;
+  double precision;
+  int kcount,kmax,kmax3d,kmax_created;
+  double qqrd2e;
+  double gsqmx,qsum,qsqsum,volume;
+  int nmax;
+
+  double unitk[3];
+  int *kxvecs,*kyvecs,*kzvecs;
+  double *ug;
+  double **eg,**vg;
+  double **ek;
+  double *sfacrl,*sfacim,*sfacrl_all,*sfacim_all;
+  double ***cs,***sn;
+
+  void eik_dot_r();
+  void coeffs();
+  void allocate();
+  void deallocate();
+  void slabcorr(int);
+};
+
+#endif
+
--- a/src/KSPACE/fft3d.cpp
+++ b/src/KSPACE/fft3d.cpp
@ -0,0 +1,999 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Jim Shepherd (GA Tech) added SGI SCSL support
+------------------------------------------------------------------------- */
+
+#include "mpi.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "math.h"
+#include "fft3d.h"
+#include "remap.h"
+
+#define MIN(A,B) ((A) < (B)) ? (A) : (B)
+#define MAX(A,B) ((A) > (B)) ? (A) : (B)
+
+/* ----------------------------------------------------------------------
+   Data layout for 3d FFTs:
+
+   data set of Nfast x Nmid x Nslow elements is owned by P procs
+   on input, each proc owns a subsection of the elements
+   on output, each proc will own a (possibly different) subsection
+   my subsection must not overlap with any other proc's subsection,
+     i.e. the union of all proc's input (or output) subsections must
+     exactly tile the global Nfast x Nmid x Nslow data set
+   when called from C, all subsection indices are 
+     C-style from 0 to N-1 where N = Nfast or Nmid or Nslow
+   when called from F77, all subsection indices are 
+     F77-style from 1 to N where N = Nfast or Nmid or Nslow
+   a proc can own 0 elements on input or output
+     by specifying hi index < lo index
+   on both input and output, data is stored contiguously on a processor
+     with a fast-varying, mid-varying, and slow-varying index
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Perform 3d FFT 
+
+   Arguments:
+   in           starting address of input data on this proc
+   out          starting address of where output data for this proc
+                  will be placed (can be same as in)
+   flag         1 for forward FFT, -1 for inverse FFT
+   plan         plan returned by previous call to fft_3d_create_plan
+------------------------------------------------------------------------- */
+
+void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)
+{
+  int i,total,length,offset,num;
+  double norm;
+  FFT_DATA *data,*copy;
+
+  // system specific constants 
+
+#ifdef FFT_SCSL
+  int isys = 0;
+  FFT_PREC scalef = 1.0;
+#endif
+#ifdef FFT_DEC
+  char c = 'C';
+  char f = 'F';
+  char b = 'B';
+  int one = 1;
+#endif
+#ifdef FFT_T3E
+  int isys = 0;
+  double scalef = 1.0;
+#endif
+
+  // pre-remap to prepare for 1st FFTs if needed
+  // copy = loc for remap result 
+
+  if (plan->pre_plan) {
+    if (plan->pre_target == 0) copy = out;
+    else copy = plan->copy;
+    remap_3d((double *) in, (double *) copy, (double *) plan->scratch,
+	     plan->pre_plan);
+    data = copy;
+  }
+  else
+    data = in;
+
+  // 1d FFTs along fast axis 
+
+  total = plan->total1;
+  length = plan->length1;
+
+#ifdef FFT_SGI
+  for (offset = 0; offset < total; offset += length)
+    FFT_1D(flag,length,&data[offset],1,plan->coeff1);
+#endif
+#ifdef FFT_SCSL
+  for (offset = 0; offset < total; offset += length)
+    FFT_1D(flag,length,scalef,&data[offset],&data[offset],plan->coeff1,
+	   plan->work1,&isys);
+#endif
+#ifdef FFT_INTEL
+  for (offset = 0; offset < total; offset += length)
+    FFT_1D(&data[offset],&length,&flag,plan->coeff1);
+#endif
+#ifdef FFT_DEC
+  if (flag == -1)
+    for (offset = 0; offset < total; offset += length)
+      FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length,&one);
+  else
+    for (offset = 0; offset < total; offset += length)
+      FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length,&one);
+#endif
+#ifdef FFT_T3E
+  for (offset = 0; offset < total; offset += length)
+    FFT_1D(&flag,&length,&scalef,&data[offset],&data[offset],plan->coeff1,
+	   plan->work1,&isys);
+#endif
+#ifdef FFT_FFTW
+  if (flag == -1)
+    fftw(plan->plan_fast_forward,total/length,data,1,length,NULL,0,0);
+  else
+    fftw(plan->plan_fast_backward,total/length,data,1,length,NULL,0,0);
+#endif
+
+  // 1st mid-remap to prepare for 2nd FFTs
+  // copy = loc for remap result 
+
+  if (plan->mid1_target == 0) copy = out;
+  else copy = plan->copy;
+  remap_3d((double *) data, (double *) copy, (double *) plan->scratch,
+	   plan->mid1_plan);
+  data = copy;
+
+  // 1d FFTs along mid axis 
+
+  total = plan->total2;
+  length = plan->length2;
+
+#ifdef FFT_SGI
+  for (offset = 0; offset < total; offset += length)
+    FFT_1D(flag,length,&data[offset],1,plan->coeff2);
+#endif
+#ifdef FFT_SCSL
+  for (offset = 0; offset < total; offset += length)
+    FFT_1D(flag,length,scalef,&data[offset],&data[offset],plan->coeff2,
+	   plan->work2,&isys);
+#endif
+#ifdef FFT_INTEL
+  for (offset = 0; offset < total; offset += length)
+    FFT_1D(&data[offset],&length,&flag,plan->coeff2);
+#endif
+#ifdef FFT_DEC
+  if (flag == -1)
+    for (offset = 0; offset < total; offset += length)
+      FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length,&one);
+  else
+    for (offset = 0; offset < total; offset += length)
+      FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length,&one);
+#endif
+#ifdef FFT_T3E
+  for (offset = 0; offset < total; offset += length)
+    FFT_1D(&flag,&length,&scalef,&data[offset],&data[offset],plan->coeff2,
+	   plan->work2,&isys);
+#endif
+#ifdef FFT_FFTW
+  if (flag == -1)
+    fftw(plan->plan_mid_forward,total/length,data,1,length,NULL,0,0);
+  else
+    fftw(plan->plan_mid_backward,total/length,data,1,length,NULL,0,0);
+#endif
+
+  // 2nd mid-remap to prepare for 3rd FFTs
+  // copy = loc for remap result 
+
+  if (plan->mid2_target == 0) copy = out;
+  else copy = plan->copy;
+  remap_3d((double *) data, (double *) copy, (double *) plan->scratch,
+	   plan->mid2_plan);
+  data = copy;
+
+  // 1d FFTs along slow axis 
+
+  total = plan->total3;
+  length = plan->length3;
+
+#ifdef FFT_SGI
+  for (offset = 0; offset < total; offset += length)
+    FFT_1D(flag,length,&data[offset],1,plan->coeff3);
+#endif
+#ifdef FFT_SCSL
+  for (offset = 0; offset < total; offset += length)
+    FFT_1D(flag,length,scalef,&data[offset],&data[offset],plan->coeff3,
+	   plan->work3,&isys);
+#endif
+#ifdef FFT_INTEL
+  for (offset = 0; offset < total; offset += length)
+    FFT_1D(&data[offset],&length,&flag,plan->coeff3);
+#endif
+#ifdef FFT_DEC
+  if (flag == -1)
+    for (offset = 0; offset < total; offset += length)
+      FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length,&one);
+  else
+    for (offset = 0; offset < total; offset += length)
+      FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length,&one);
+#endif
+#ifdef FFT_T3E
+  for (offset = 0; offset < total; offset += length)
+    FFT_1D(&flag,&length,&scalef,&data[offset],&data[offset],plan->coeff3,
+	   plan->work3,&isys);
+#endif
+#ifdef FFT_FFTW
+  if (flag == -1)
+    fftw(plan->plan_slow_forward,total/length,data,1,length,NULL,0,0);
+  else
+    fftw(plan->plan_slow_backward,total/length,data,1,length,NULL,0,0);
+#endif
+
+  // post-remap to put data in output format if needed
+  // destination is always out 
+
+  if (plan->post_plan)
+    remap_3d((double *) data, (double *) out, (double *) plan->scratch,
+	     plan->post_plan);
+
+  // scaling if required 
+
+#ifndef FFT_T3E
+  if (flag == 1 && plan->scaled) {
+    norm = plan->norm;
+    num = plan->normnum;
+    for (i = 0; i < num; i++) {
+      out[i].re *= norm;
+      out[i].im *= norm;
+    }
+  }
+#endif
+
+#ifdef FFT_T3E
+  if (flag == 1 && plan->scaled) {
+    norm = plan->norm;
+    num = plan->normnum;
+    for (i = 0; i < num; i++) out[i] *= (norm,norm);
+  }
+#endif
+}
+
+/* ----------------------------------------------------------------------
+   Create plan for performing a 3d FFT 
+
+   Arguments:
+   comm                 MPI communicator for the P procs which own the data
+   nfast,nmid,nslow     size of global 3d matrix
+   in_ilo,in_ihi        input bounds of data I own in fast index
+   in_jlo,in_jhi        input bounds of data I own in mid index
+   in_klo,in_khi        input bounds of data I own in slow index
+   out_ilo,out_ihi      output bounds of data I own in fast index
+   out_jlo,out_jhi      output bounds of data I own in mid index
+   out_klo,out_khi      output bounds of data I own in slow index
+   scaled               0 = no scaling of result, 1 = scaling
+   permute              permutation in storage order of indices on output
+                          0 = no permutation
+			  1 = permute once = mid->fast, slow->mid, fast->slow
+			  2 = permute twice = slow->fast, fast->mid, mid->slow
+   nbuf                 returns size of internal storage buffers used by FFT
+------------------------------------------------------------------------- */
+
+struct fft_plan_3d *fft_3d_create_plan(
+       MPI_Comm comm, int nfast, int nmid, int nslow,
+       int in_ilo, int in_ihi, int in_jlo, int in_jhi,
+       int in_klo, int in_khi,
+       int out_ilo, int out_ihi, int out_jlo, int out_jhi,
+       int out_klo, int out_khi,
+       int scaled, int permute, int *nbuf)
+{
+  struct fft_plan_3d *plan;
+  int me,nprocs;
+  int i,num,flag,remapflag,fftflag;
+  int first_ilo,first_ihi,first_jlo,first_jhi,first_klo,first_khi;
+  int second_ilo,second_ihi,second_jlo,second_jhi,second_klo,second_khi;
+  int third_ilo,third_ihi,third_jlo,third_jhi,third_klo,third_khi;
+  int out_size,first_size,second_size,third_size,copy_size,scratch_size;
+  int np1,np2,ip1,ip2;
+  int list[50];
+
+  // system specific variables 
+
+#ifdef FFT_SCSL
+  FFT_DATA dummy_d[5];
+  FFT_PREC dummy_p[5];
+  int isign,isys;
+  FFT_PREC scalef;
+#endif
+#ifdef FFT_INTEL
+  FFT_DATA dummy;
+#endif
+#ifdef FFT_T3E
+  FFT_DATA dummy[5];
+  int isign,isys;
+  double scalef;
+#endif
+
+  // query MPI info 
+
+  MPI_Comm_rank(comm,&me);
+  MPI_Comm_size(comm,&nprocs);
+
+#ifdef FFT_NONE
+  if (me == 0) {
+    printf("ERROR: Cannot use FFTs with FFT_NONE set\n");
+    return NULL;
+  }
+#endif
+
+  // compute division of procs in 2 dimensions not on-processor 
+
+  bifactor(nprocs,&np1,&np2);
+  ip1 = me % np1;
+  ip2 = me/np1;
+
+  // allocate memory for plan data struct 
+
+  plan = (struct fft_plan_3d *) malloc(sizeof(struct fft_plan_3d));
+  if (plan == NULL) return NULL;
+
+  // remap from initial distribution to layout needed for 1st set of 1d FFTs
+  // not needed if all procs own entire fast axis initially
+  // first indices = distribution after 1st set of FFTs 
+
+  if (in_ilo == 0 && in_ihi == nfast-1)
+    flag = 0;
+  else
+    flag = 1;
+
+  MPI_Allreduce(&flag,&remapflag,1,MPI_INT,MPI_MAX,comm);
+
+  if (remapflag == 0) {
+    first_ilo = in_ilo;
+    first_ihi = in_ihi;
+    first_jlo = in_jlo;
+    first_jhi = in_jhi;
+    first_klo = in_klo;
+    first_khi = in_khi;
+    plan->pre_plan = NULL;
+  }
+  else {
+    first_ilo = 0;
+    first_ihi = nfast - 1;
+    first_jlo = ip1*nmid/np1;
+    first_jhi = (ip1+1)*nmid/np1 - 1;
+    first_klo = ip2*nslow/np2;
+    first_khi = (ip2+1)*nslow/np2 - 1;
+    plan->pre_plan =
+      remap_3d_create_plan(comm,in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi,
+			   first_ilo,first_ihi,first_jlo,first_jhi,
+			   first_klo,first_khi,
+			   FFT_PRECISION,0,0,2);
+    if (plan->pre_plan == NULL) return NULL;
+  }
+
+  // 1d FFTs along fast axis 
+
+  plan->length1 = nfast;
+  plan->total1 = nfast * (first_jhi-first_jlo+1) * (first_khi-first_klo+1);
+
+  // remap from 1st to 2nd FFT
+  // choose which axis is split over np1 vs np2 to minimize communication
+  // second indices = distribution after 2nd set of FFTs 
+
+  second_ilo = ip1*nfast/np1;
+  second_ihi = (ip1+1)*nfast/np1 - 1;
+  second_jlo = 0;
+  second_jhi = nmid - 1;
+  second_klo = ip2*nslow/np2;
+  second_khi = (ip2+1)*nslow/np2 - 1;
+  plan->mid1_plan =
+      remap_3d_create_plan(comm,
+			   first_ilo,first_ihi,first_jlo,first_jhi,
+			   first_klo,first_khi,
+			   second_ilo,second_ihi,second_jlo,second_jhi,
+			   second_klo,second_khi,
+			   FFT_PRECISION,1,0,2);
+  if (plan->mid1_plan == NULL) return NULL;
+
+  // 1d FFTs along mid axis 
+
+  plan->length2 = nmid;
+  plan->total2 = (second_ihi-second_ilo+1) * nmid * (second_khi-second_klo+1);
+
+  // remap from 2nd to 3rd FFT
+  // if final distribution is permute=2 with all procs owning entire slow axis
+  //   then this remapping goes directly to final distribution
+  //  third indices = distribution after 3rd set of FFTs 
+
+  if (permute == 2 && out_klo == 0 && out_khi == nslow-1)
+    flag = 0;
+  else
+    flag = 1;
+
+  MPI_Allreduce(&flag,&remapflag,1,MPI_INT,MPI_MAX,comm);
+
+  if (remapflag == 0) {
+    third_ilo = out_ilo;
+    third_ihi = out_ihi;
+    third_jlo = out_jlo;
+    third_jhi = out_jhi;
+    third_klo = out_klo;
+    third_khi = out_khi;
+  }
+  else {
+    third_ilo = ip1*nfast/np1;
+    third_ihi = (ip1+1)*nfast/np1 - 1;
+    third_jlo = ip2*nmid/np2;
+    third_jhi = (ip2+1)*nmid/np2 - 1;
+    third_klo = 0;
+    third_khi = nslow - 1;
+  }
+  
+  plan->mid2_plan =
+    remap_3d_create_plan(comm,
+			 second_jlo,second_jhi,second_klo,second_khi,
+			 second_ilo,second_ihi,
+			 third_jlo,third_jhi,third_klo,third_khi,
+			 third_ilo,third_ihi,
+			 FFT_PRECISION,1,0,2);
+  if (plan->mid2_plan == NULL) return NULL;
+
+  // 1d FFTs along slow axis 
+
+  plan->length3 = nslow;
+  plan->total3 = (third_ihi-third_ilo+1) * (third_jhi-third_jlo+1) * nslow;
+
+  // remap from 3rd FFT to final distribution
+  //  not needed if permute = 2 and third indices = out indices on all procs 
+
+  if (permute == 2 &&
+      out_ilo == third_ilo && out_ihi == third_ihi &&
+      out_jlo == third_jlo && out_jhi == third_jhi &&
+      out_klo == third_klo && out_khi == third_khi)
+    flag = 0;
+  else
+    flag = 1;
+
+  MPI_Allreduce(&flag,&remapflag,1,MPI_INT,MPI_MAX,comm);
+
+  if (remapflag == 0)
+    plan->post_plan = NULL;
+  else {
+    plan->post_plan =
+      remap_3d_create_plan(comm,
+			   third_klo,third_khi,third_ilo,third_ihi,
+			   third_jlo,third_jhi,
+			   out_klo,out_khi,out_ilo,out_ihi,
+			   out_jlo,out_jhi,
+			   FFT_PRECISION,(permute+1)%3,0,2);
+    if (plan->post_plan == NULL) return NULL;
+  }
+
+  // configure plan memory pointers and allocate work space
+  // out_size = amount of memory given to FFT by user
+  // first/second/third_size = amount of memory needed after pre,mid1,mid2 remaps
+  // copy_size = amount needed internally for extra copy of data
+  // scratch_size = amount needed internally for remap scratch space
+  // for each remap:
+  //   out space used for result if big enough, else require copy buffer
+  //   accumulate largest required remap scratch space 
+
+  out_size = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * (out_khi-out_klo+1);
+  first_size = (first_ihi-first_ilo+1) * (first_jhi-first_jlo+1) * 
+    (first_khi-first_klo+1);
+  second_size = (second_ihi-second_ilo+1) * (second_jhi-second_jlo+1) * 
+    (second_khi-second_klo+1);
+  third_size = (third_ihi-third_ilo+1) * (third_jhi-third_jlo+1) * 
+    (third_khi-third_klo+1);
+
+  copy_size = 0;
+  scratch_size = 0;
+
+  if (plan->pre_plan) {
+    if (first_size <= out_size)
+      plan->pre_target = 0;
+    else {
+      plan->pre_target = 1;
+      copy_size = MAX(copy_size,first_size);
+    }
+    scratch_size = MAX(scratch_size,first_size);
+  }
+
+  if (plan->mid1_plan) {
+    if (second_size <= out_size)
+      plan->mid1_target = 0;
+    else {
+      plan->mid1_target = 1;
+      copy_size = MAX(copy_size,second_size);
+    }
+    scratch_size = MAX(scratch_size,second_size);
+  }
+
+  if (plan->mid2_plan) {
+    if (third_size <= out_size)
+      plan->mid2_target = 0;
+    else {
+      plan->mid2_target = 1;
+      copy_size = MAX(copy_size,third_size);
+    }
+    scratch_size = MAX(scratch_size,third_size);
+  }
+
+  if (plan->post_plan)
+    scratch_size = MAX(scratch_size,out_size);
+
+  *nbuf = copy_size + scratch_size;
+
+  if (copy_size) {
+    plan->copy = (FFT_DATA *) malloc(copy_size*sizeof(FFT_DATA));
+    if (plan->copy == NULL) return NULL;
+  }
+  else plan->copy = NULL;
+
+  if (scratch_size) {
+    plan->scratch = (FFT_DATA *) malloc(scratch_size*sizeof(FFT_DATA));
+    if (plan->scratch == NULL) return NULL;
+  }
+  else plan->scratch = NULL;
+
+  // system specific pre-computation of 1d FFT coeffs 
+  // and scaling normalization 
+
+#ifdef FFT_SGI
+
+  plan->coeff1 = (FFT_DATA *) malloc((nfast+15)*sizeof(FFT_DATA));
+  plan->coeff2 = (FFT_DATA *) malloc((nmid+15)*sizeof(FFT_DATA));
+  plan->coeff3 = (FFT_DATA *) malloc((nslow+15)*sizeof(FFT_DATA));
+
+  if (plan->coeff1 == NULL || plan->coeff2 == NULL ||
+      plan->coeff3 == NULL) return NULL;
+
+  FFT_1D_INIT(nfast,plan->coeff1);
+  FFT_1D_INIT(nmid,plan->coeff2);
+  FFT_1D_INIT(nslow,plan->coeff3);
+
+  if (scaled == 0) 
+    plan->scaled = 0;
+  else {
+    plan->scaled = 1;
+    plan->norm = 1.0/(nfast*nmid*nslow);
+    plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) *
+      (out_khi-out_klo+1);
+  }
+
+#endif
+
+#ifdef FFT_SCSL
+
+  plan->coeff1 = (FFT_PREC *) malloc((2*nfast+30)*sizeof(FFT_PREC));
+  plan->coeff2 = (FFT_PREC *) malloc((2*nmid+30)*sizeof(FFT_PREC));
+  plan->coeff3 = (FFT_PREC *) malloc((2*nslow+30)*sizeof(FFT_PREC));
+
+  if (plan->coeff1 == NULL || plan->coeff2 == NULL || 
+      plan->coeff3 == NULL) return NULL;
+
+  plan->work1 = (FFT_PREC *) malloc((2*nfast)*sizeof(FFT_PREC));
+  plan->work2 = (FFT_PREC *) malloc((2*nmid)*sizeof(FFT_PREC));
+  plan->work3 = (FFT_PREC *) malloc((2*nslow)*sizeof(FFT_PREC));
+
+  if (plan->work1 == NULL || plan->work2 == NULL || 
+      plan->work3 == NULL) return NULL;
+
+  isign = 0;
+  scalef = 1.0;
+  isys = 0;
+
+  FFT_1D_INIT(isign,nfast,scalef,dummy_d,dummy_d,plan->coeff1,dummy_p,&isys);
+  FFT_1D_INIT(isign,nmid,scalef,dummy_d,dummy_d,plan->coeff2,dummy_p,&isys);
+  FFT_1D_INIT(isign,nslow,scalef,dummy_d,dummy_d,plan->coeff3,dummy_p,&isys);
+
+  if (scaled == 0) 
+    plan->scaled = 0;
+  else {
+    plan->scaled = 1;
+    plan->norm = 1.0/(nfast*nmid*nslow);
+    plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) *
+      (out_khi-out_klo+1);
+  }
+
+#endif
+
+#ifdef FFT_INTEL
+
+  flag = 0;
+
+  num = 0;
+  factor(nfast,&num,list);
+  for (i = 0; i < num; i++)
+    if (list[i] != 2 && list[i] != 3 && list[i] != 5) flag = 1;
+  num = 0;
+  factor(nmid,&num,list);
+  for (i = 0; i < num; i++)
+    if (list[i] != 2 && list[i] != 3 && list[i] != 5) flag = 1;
+  num = 0;
+  factor(nslow,&num,list);
+  for (i = 0; i < num; i++)
+    if (list[i] != 2 && list[i] != 3 && list[i] != 5) flag = 1;
+
+  MPI_Allreduce(&flag,&fftflag,1,MPI_INT,MPI_MAX,comm);
+  if (fftflag) {
+    if (me == 0) printf("ERROR: FFTs are not power of 2,3,5\n");
+    return NULL;
+  }
+
+  plan->coeff1 = (FFT_DATA *) malloc((3*nfast/2+1)*sizeof(FFT_DATA));
+  plan->coeff2 = (FFT_DATA *) malloc((3*nmid/2+1)*sizeof(FFT_DATA));
+  plan->coeff3 = (FFT_DATA *) malloc((3*nslow/2+1)*sizeof(FFT_DATA));
+
+  if (plan->coeff1 == NULL || plan->coeff2 == NULL || 
+      plan->coeff3 == NULL) return NULL;
+
+  flag = 0;
+  FFT_1D_INIT(&dummy,&nfast,&flag,plan->coeff1);
+  FFT_1D_INIT(&dummy,&nmid,&flag,plan->coeff2);
+  FFT_1D_INIT(&dummy,&nslow,&flag,plan->coeff3);
+
+  if (scaled == 0) {
+    plan->scaled = 1;
+    plan->norm = nfast*nmid*nslow;
+    plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) *
+      (out_khi-out_klo+1);
+  }
+  else
+    plan->scaled = 0;
+
+#endif
+
+#ifdef FFT_DEC
+
+  if (scaled == 0) {
+    plan->scaled = 1;
+    plan->norm = nfast*nmid*nslow;
+    plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) *
+      (out_khi-out_klo+1);
+  }
+  else
+    plan->scaled = 0;
+
+#endif
+
+#ifdef FFT_T3E
+
+  plan->coeff1 = (double *) malloc((12*nfast)*sizeof(double));
+  plan->coeff2 = (double *) malloc((12*nmid)*sizeof(double));
+  plan->coeff3 = (double *) malloc((12*nslow)*sizeof(double));
+
+  if (plan->coeff1 == NULL || plan->coeff2 == NULL || 
+      plan->coeff3 == NULL) return NULL;
+
+  plan->work1 = (double *) malloc((8*nfast)*sizeof(double));
+  plan->work2 = (double *) malloc((8*nmid)*sizeof(double));
+  plan->work3 = (double *) malloc((8*nslow)*sizeof(double));
+
+  if (plan->work1 == NULL || plan->work2 == NULL || 
+      plan->work3 == NULL) return NULL;
+
+  isign = 0;
+  scalef = 1.0;
+  isys = 0;
+
+  FFT_1D_INIT(&isign,&nfast,&scalef,dummy,dummy,plan->coeff1,dummy,&isys);
+  FFT_1D_INIT(&isign,&nmid,&scalef,dummy,dummy,plan->coeff2,dummy,&isys);
+  FFT_1D_INIT(&isign,&nslow,&scalef,dummy,dummy,plan->coeff3,dummy,&isys);
+
+  if (scaled == 0) 
+    plan->scaled = 0;
+  else {
+    plan->scaled = 1;
+    plan->norm = 1.0/(nfast*nmid*nslow);
+    plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) *
+      (out_khi-out_klo+1);
+  }
+
+#endif
+
+#ifdef FFT_FFTW
+
+  plan->plan_fast_forward = 
+    fftw_create_plan(nfast,FFTW_FORWARD,FFTW_ESTIMATE | FFTW_IN_PLACE);
+  plan->plan_fast_backward = 
+    fftw_create_plan(nfast,FFTW_BACKWARD,FFTW_ESTIMATE | FFTW_IN_PLACE);
+
+  if (nmid == nfast) {
+    plan->plan_mid_forward = plan->plan_fast_forward;
+    plan->plan_mid_backward = plan->plan_fast_backward;
+  }
+  else {
+    plan->plan_mid_forward = 
+      fftw_create_plan(nmid,FFTW_FORWARD,FFTW_ESTIMATE | FFTW_IN_PLACE);
+    plan->plan_mid_backward = 
+      fftw_create_plan(nmid,FFTW_BACKWARD,FFTW_ESTIMATE | FFTW_IN_PLACE);
+  }
+
+  if (nslow == nfast) {
+    plan->plan_slow_forward = plan->plan_fast_forward;
+    plan->plan_slow_backward = plan->plan_fast_backward;
+  }
+  else if (nslow == nmid) {
+    plan->plan_slow_forward = plan->plan_mid_forward;
+    plan->plan_slow_backward = plan->plan_mid_backward;
+  }
+  else {
+    plan->plan_slow_forward = 
+      fftw_create_plan(nslow,FFTW_FORWARD,FFTW_ESTIMATE | FFTW_IN_PLACE);
+    plan->plan_slow_backward = 
+      fftw_create_plan(nslow,FFTW_BACKWARD,FFTW_ESTIMATE | FFTW_IN_PLACE);
+  }
+
+  if (scaled == 0)
+    plan->scaled = 0;
+  else {
+    plan->scaled = 1;
+    plan->norm = 1.0/(nfast*nmid*nslow);
+    plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) *
+      (out_khi-out_klo+1);
+  }
+
+#endif
+
+  return plan;
+}
+
+/* ----------------------------------------------------------------------
+   Destroy a 3d fft plan 
+------------------------------------------------------------------------- */
+
+void fft_3d_destroy_plan(struct fft_plan_3d *plan)
+{
+  if (plan->pre_plan) remap_3d_destroy_plan(plan->pre_plan);
+  if (plan->mid1_plan) remap_3d_destroy_plan(plan->mid1_plan);
+  if (plan->mid2_plan) remap_3d_destroy_plan(plan->mid2_plan);
+  if (plan->post_plan) remap_3d_destroy_plan(plan->post_plan);
+
+  if (plan->copy) free(plan->copy);
+  if (plan->scratch) free(plan->scratch);
+
+#ifdef FFT_SGI
+  free(plan->coeff1);
+  free(plan->coeff2);
+  free(plan->coeff3);
+#endif
+#ifdef FFT_SCSL
+  free(plan->coeff1);
+  free(plan->coeff2);
+  free(plan->coeff3);
+  free(plan->work1);
+  free(plan->work2);
+  free(plan->work3);
+#endif
+#ifdef FFT_INTEL
+  free(plan->coeff1);
+  free(plan->coeff2);
+  free(plan->coeff3);
+#endif
+#ifdef FFT_T3E
+  free(plan->coeff1);
+  free(plan->coeff2);
+  free(plan->coeff3);
+  free(plan->work1);
+  free(plan->work2);
+  free(plan->work3);
+#endif
+#ifdef FFT_FFTW
+  if (plan->plan_slow_forward != plan->plan_mid_forward &&
+      plan->plan_slow_forward != plan->plan_fast_forward) {
+    fftw_destroy_plan(plan->plan_slow_forward);
+    fftw_destroy_plan(plan->plan_slow_backward);
+  }
+  if (plan->plan_mid_forward != plan->plan_fast_forward) {
+    fftw_destroy_plan(plan->plan_mid_forward);
+    fftw_destroy_plan(plan->plan_mid_backward);
+  }
+  fftw_destroy_plan(plan->plan_fast_forward);
+  fftw_destroy_plan(plan->plan_fast_backward);
+#endif
+
+  free(plan);
+}
+
+/* ----------------------------------------------------------------------
+   recursively divide n into small factors, return them in list
+------------------------------------------------------------------------- */
+
+void factor(int n, int *num, int *list)
+{
+  if (n == 1) {
+    return;
+  }
+  else if (n % 2 == 0) {
+    *list = 2;
+    (*num)++;
+    factor(n/2,num,list+1);
+  }
+  else if (n % 3 == 0) {
+    *list = 3;
+    (*num)++;
+    factor(n/3,num,list+1);
+  }
+  else if (n % 5 == 0) {
+    *list = 5;
+    (*num)++;
+    factor(n/5,num,list+1);
+  }
+  else if (n % 7 == 0) {
+    *list = 7;
+    (*num)++;
+    factor(n/7,num,list+1);
+  }
+  else if (n % 11 == 0) {
+    *list = 11;
+    (*num)++;
+    factor(n/11,num,list+1);
+  }
+  else if (n % 13 == 0) {
+    *list = 13;
+    (*num)++;
+    factor(n/13,num,list+1);
+  }
+  else {
+    *list = n;
+    (*num)++;
+    return;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   divide n into 2 factors of as equal size as possible 
+------------------------------------------------------------------------- */
+
+void bifactor(int n, int *factor1, int *factor2)
+{
+  int n1,n2,facmax;
+
+  facmax = static_cast<int> (sqrt((double) n));
+
+  for (n1 = facmax; n1 > 0; n1--) {
+    n2 = n/n1;
+    if (n1*n2 == n) {
+      *factor1 = n1;
+      *factor2 = n2;
+      return;
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   perform just the 1d FFTs needed by a 3d FFT, no data movement
+   used for timing purposes
+
+   Arguments:
+   in           starting address of input data on this proc, all set to 0.0
+   nsize        size of in
+   flag         1 for forward FFT, -1 for inverse FFT
+   plan         plan returned by previous call to fft_3d_create_plan
+------------------------------------------------------------------------- */
+
+void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan)
+{
+  int i,total,length,offset,num;
+  double norm;
+
+  // system specific constants 
+
+#ifdef FFT_SCSL
+  int isys = 0;
+  FFT_PREC scalef = 1.0;
+#endif
+#ifdef FFT_DEC
+  char c = 'C';
+  char f = 'F';
+  char b = 'B';
+  int one = 1;
+#endif
+#ifdef FFT_T3E
+  int isys = 0;
+  double scalef = 1.0;
+#endif
+
+  // total = size of data needed in each dim
+  // length = length of 1d FFT in each dim
+  // total/length = # of 1d FFTs in each dim
+  // if total > nsize, limit # of 1d FFTs to available size of data
+
+  int total1 = plan->total1;
+  int length1 = plan->length1;
+  int total2 = plan->total2;
+  int length2 = plan->length2;
+  int total3 = plan->total3;
+  int length3 = plan->length3;
+
+  if (total1 > nsize) total1 = (nsize/length1) * length1;
+  if (total2 > nsize) total2 = (nsize/length2) * length2;
+  if (total3 > nsize) total3 = (nsize/length3) * length3;
+
+  // perform 1d FFTs in each of 3 dimensions
+  // data is just an array of 0.0
+
+#ifdef FFT_SGI
+  for (offset = 0; offset < total1; offset += length1)
+    FFT_1D(flag,length1,&data[offset],1,plan->coeff1);
+  for (offset = 0; offset < total2; offset += length2)
+    FFT_1D(flag,length2,&data[offset],1,plan->coeff2);
+  for (offset = 0; offset < total3; offset += length3)
+    FFT_1D(flag,length3,&data[offset],1,plan->coeff3);
+#endif
+#ifdef FFT_SCSL
+  for (offset = 0; offset < total1; offset += length1)
+    FFT_1D(flag,length1,scalef,&data[offset],&data[offset],plan->coeff1,
+	   plan->work1,&isys);
+  for (offset = 0; offset < total2; offset += length2)
+    FFT_1D(flag,length2,scalef,&data[offset],&data[offset],plan->coeff2,
+	   plan->work2,&isys);
+  for (offset = 0; offset < total3; offset += length3)
+    FFT_1D(flag,length3,scalef,&data[offset],&data[offset],plan->coeff3,
+	   plan->work3,&isys);
+#endif
+#ifdef FFT_INTEL
+  for (offset = 0; offset < total1; offset += length1)
+    FFT_1D(&data[offset],&length1,&flag,plan->coeff1);
+  for (offset = 0; offset < total2; offset += length2)
+    FFT_1D(&data[offset],&length2,&flag,plan->coeff2);
+  for (offset = 0; offset < total3; offset += length3)
+    FFT_1D(&data[offset],&length3,&flag,plan->coeff3);
+#endif
+#ifdef FFT_DEC
+  if (flag == -1) {
+    for (offset = 0; offset < total1; offset += length1)
+      FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length1,&one);
+    for (offset = 0; offset < total2; offset += length2)
+      FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length2,&one);
+    for (offset = 0; offset < total3; offset += length3)
+      FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length3,&one);
+  } else {
+    for (offset = 0; offset < total1; offset += length1)
+      FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length1,&one);
+    for (offset = 0; offset < total2; offset += length2)
+      FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length2,&one);
+    for (offset = 0; offset < total3; offset += length3)
+      FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length3,&one);
+  }
+#endif
+#ifdef FFT_T3E
+  for (offset = 0; offset < total1; offset += length1)
+    FFT_1D(&flag,&length1,&scalef,&data[offset],&data[offset],plan->coeff1,
+	   plan->work1,&isys);
+  for (offset = 0; offset < total2; offset += length2)
+    FFT_1D(&flag,&length2,&scalef,&data[offset],&data[offset],plan->coeff2,
+	   plan->work2,&isys);
+  for (offset = 0; offset < total3; offset += length3)
+    FFT_1D(&flag,&length3,&scalef,&data[offset],&data[offset],plan->coeff3,
+	   plan->work3,&isys);
+#endif
+#ifdef FFT_FFTW
+  if (flag == -1) {
+    fftw(plan->plan_fast_forward,total1/length1,data,1,0,NULL,0,0);
+    fftw(plan->plan_mid_forward,total2/length2,data,1,0,NULL,0,0);
+    fftw(plan->plan_slow_forward,total3/length3,data,1,0,NULL,0,0);
+  } else {
+    fftw(plan->plan_fast_backward,total1/length1,data,1,0,NULL,0,0);
+    fftw(plan->plan_mid_backward,total2/length2,data,1,0,NULL,0,0);
+    fftw(plan->plan_slow_backward,total3/length3,data,1,0,NULL,0,0);
+  }
+#endif
+
+  // scaling if required 
+  // limit num to size of data
+
+#ifndef FFT_T3E
+  if (flag == 1 && plan->scaled) {
+    norm = plan->norm;
+    num = MIN(plan->normnum,nsize);
+    for (i = 0; i < num; i++) {
+      data[i].re *= norm;
+      data[i].im *= norm;
+    }
+  }
+#endif
+
+#ifdef FFT_T3E
+  if (flag == 1 && plan->scaled) {
+    norm = plan->norm;
+    num = MIN(plan->normnum,nsize);
+    for (i = 0; i < num; i++) data[i] *= (norm,norm);
+  }
+#endif
+}
--- a/src/KSPACE/fft3d.h
+++ b/src/KSPACE/fft3d.h
@ -0,0 +1,242 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+// User-settable FFT precision 
+
+// FFT_PRECISION = 1 is single-precision complex (4-byte real, 4-byte imag) 
+// FFT_PRECISION = 2 is double-precision complex (8-byte real, 8-byte imag) 
+
+#define FFT_PRECISION 2
+
+// ------------------------------------------------------------------------- 
+
+// Data types for single-precision complex 
+
+#if FFT_PRECISION == 1
+
+#ifdef FFT_SGI
+#include "fft.h"
+typedef complex FFT_DATA;
+#define FFT_1D cfft1d
+#define FFT_1D_INIT cfft1di
+extern "C" {
+  int cfft1d(int, int, FFT_DATA *, int, FFT_DATA *);
+  FFT_DATA *cfft1di(int, FFT_DATA *);
+}
+
+#endif
+
+#ifdef FFT_SCSL
+#include <scsl_fft.h>
+typedef scsl_complex FFT_DATA;
+typedef float FFT_PREC;
+#define FFT_1D ccfft
+#define FFT_1D_INIT ccfft
+extern "C" {
+  int ccfft(int, int, FFT_PREC, FFT_DATA *, FFT_DATA *,
+                      FFT_PREC *, FFT_PREC *, int *);
+}
+
+#endif
+
+#ifdef FFT_INTEL
+typedef struct {
+  float re;
+  float im;
+} FFT_DATA;
+#define FFT_1D cfft1d_
+#define FFT_1D_INIT cfft1d_
+extern "C" {
+  void cfft1d_(FFT_DATA *, int *, int *, FFT_DATA *);
+}
+#endif
+
+#ifdef FFT_DEC
+typedef struct {
+  float re;
+  float im;
+} FFT_DATA;
+#define FFT_1D cfft_
+extern "C" {
+  void cfft_(char *, char *, char *, FFT_DATA *, FFT_DATA *, int *, int *);
+}
+#endif
+
+#ifdef FFT_T3E
+#include <complex.h>
+typedef complex single FFT_DATA;
+#define FFT_1D GGFFT
+#define FFT_1D_INIT GGFFT
+extern "C" {
+  void GGFFT(int *, int *, double *, FFT_DATA *, FFT_DATA *,
+	     double *, double *, int *);
+}
+#endif
+
+#ifdef FFT_FFTW
+#include "fftw.h"
+typedef FFTW_COMPLEX FFT_DATA;
+#endif
+
+#ifdef FFT_NONE
+typedef struct {
+  float re;
+  float im;
+} FFT_DATA;
+#endif
+
+#endif
+
+// ------------------------------------------------------------------------- 
+
+// Data types for double-precision complex 
+
+#if FFT_PRECISION == 2
+
+#ifdef FFT_SGI
+#include "fft.h"
+typedef zomplex FFT_DATA;
+#define FFT_1D zfft1d
+#define FFT_1D_INIT zfft1di
+extern "C" {
+  int zfft1d(int, int, FFT_DATA *, int, FFT_DATA *);
+  FFT_DATA *zfft1di(int, FFT_DATA *);
+}
+#endif
+
+#ifdef FFT_SCSL
+#include <scsl_fft.h>
+typedef scsl_zomplex FFT_DATA;
+typedef double FFT_PREC;
+#define FFT_1D zzfft
+#define FFT_1D_INIT zzfft
+extern "C" {
+  int zzfft(int, int, FFT_PREC, FFT_DATA *, FFT_DATA *,
+                      FFT_PREC *, FFT_PREC *, int *);
+}
+#endif
+
+#ifdef FFT_INTEL
+typedef struct {
+  double re;
+  double im;
+} FFT_DATA;
+#define FFT_1D zfft1d_
+#define FFT_1D_INIT zfft1d_
+extern "C" {
+  void zfft1d_(FFT_DATA *, int *, int *, FFT_DATA *);
+}
+#endif
+
+#ifdef FFT_DEC
+typedef struct {
+  double re;
+  double im;
+} FFT_DATA;
+#define FFT_1D zfft_
+extern "C" {
+  void zfft_(char *, char *, char *, FFT_DATA *, FFT_DATA *, int *, int *);
+}
+#endif
+
+#ifdef FFT_T3E
+#include <complex.h>
+typedef complex double FFT_DATA;
+#define FFT_1D CCFFT
+#define FFT_1D_INIT CCFFT
+extern "C" {
+  void CCFFT(int *, int *, double *, FFT_DATA *, FFT_DATA *,
+	     double *, double *, int *);
+}
+#endif
+
+#ifdef FFT_FFTW
+#include "fftw.h"
+typedef FFTW_COMPLEX FFT_DATA;
+#endif
+
+#ifdef FFT_NONE
+typedef struct {
+  double re;
+  double im;
+} FFT_DATA;
+#endif
+
+#endif
+
+// ------------------------------------------------------------------------- 
+
+// details of how to do a 3d FFT 
+
+struct fft_plan_3d {
+  struct remap_plan_3d *pre_plan;       // remap from input -> 1st FFTs 
+  struct remap_plan_3d *mid1_plan;      // remap from 1st -> 2nd FFTs 
+  struct remap_plan_3d *mid2_plan;      // remap from 2nd -> 3rd FFTs 
+  struct remap_plan_3d *post_plan;      // remap from 3rd FFTs -> output 
+  FFT_DATA *copy;                   // memory for remap results (if needed) 
+  FFT_DATA *scratch;                // scratch space for remaps 
+  int total1,total2,total3;         // # of 1st,2nd,3rd FFTs (times length) 
+  int length1,length2,length3;      // length of 1st,2nd,3rd FFTs 
+  int pre_target;                   // where to put remap results 
+  int mid1_target,mid2_target;
+  int scaled;                       // whether to scale FFT results 
+  int normnum;                      // # of values to rescale 
+  double norm;                      // normalization factor for rescaling 
+
+                                    // system specific 1d FFT info 
+#ifdef FFT_SGI
+  FFT_DATA *coeff1;
+  FFT_DATA *coeff2;
+  FFT_DATA *coeff3;
+#endif
+#ifdef FFT_SCSL
+  FFT_PREC *coeff1;
+  FFT_PREC *coeff2;
+  FFT_PREC *coeff3;
+  FFT_PREC *work1;
+  FFT_PREC *work2;
+  FFT_PREC *work3;
+#endif
+#ifdef FFT_INTEL
+  FFT_DATA *coeff1;
+  FFT_DATA *coeff2;
+  FFT_DATA *coeff3;
+#endif
+#ifdef FFT_T3E
+  double *coeff1;
+  double *coeff2;
+  double *coeff3;
+  double *work1;
+  double *work2;
+  double *work3;
+#endif
+#ifdef FFT_FFTW
+  fftw_plan plan_fast_forward;
+  fftw_plan plan_fast_backward;
+  fftw_plan plan_mid_forward;
+  fftw_plan plan_mid_backward;
+  fftw_plan plan_slow_forward;
+  fftw_plan plan_slow_backward;
+#endif
+};
+
+// function prototypes 
+
+void fft_3d(FFT_DATA *, FFT_DATA *, int, struct fft_plan_3d *);
+struct fft_plan_3d *fft_3d_create_plan(MPI_Comm, int, int, int,
+  int, int, int, int, int, int, int, int, int, int, int, int,
+  int, int, int *);
+void fft_3d_destroy_plan(struct fft_plan_3d *);
+void factor(int, int *, int *);
+void bifactor(int, int *, int *);
+void fft_1d_only(FFT_DATA *, int, int, struct fft_plan_3d *);
--- a/src/KSPACE/fft3d_wrap.cpp
+++ b/src/KSPACE/fft3d_wrap.cpp
@ -0,0 +1,53 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "mpi.h"
+#include "fft3d_wrap.h"
+#include "error.h"
+
+/* ---------------------------------------------------------------------- */
+
+FFT3d::FFT3d(MPI_Comm comm, int nfast, int nmid, int nslow,
+	     int in_ilo, int in_ihi, int in_jlo, int in_jhi,
+	     int in_klo, int in_khi,
+	     int out_ilo, int out_ihi, int out_jlo, int out_jhi,
+	     int out_klo, int out_khi,
+	     int scaled, int permute, int *nbuf)
+{
+  plan = fft_3d_create_plan(comm,nfast,nmid,nslow,
+			    in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi,
+			    out_ilo,out_ihi,out_jlo,out_jhi,out_klo,out_khi,
+			    scaled,permute,nbuf);
+  if (plan == NULL) error->one("Could not create 3d FFT plan");
+}
+
+/* ---------------------------------------------------------------------- */
+
+FFT3d::~FFT3d()
+{
+  fft_3d_destroy_plan(plan);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FFT3d::compute(double *in, double *out, int flag)
+{
+  fft_3d((FFT_DATA *) in,(FFT_DATA *) out,flag,plan);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FFT3d::timing1d(double *in, int nsize, int flag)
+{
+  fft_1d_only((FFT_DATA *) in,nsize,flag,plan);
+}
--- a/src/KSPACE/fft3d_wrap.h
+++ b/src/KSPACE/fft3d_wrap.h
@ -0,0 +1,32 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef FFT3D_WRAP_H
+#define FFT3D_WRAP_H
+
+#include "lammps.h"
+#include "fft3d.h"
+
+class FFT3d : public LAMMPS {
+ public:
+  FFT3d(MPI_Comm,int,int,int,int,int,int,int,int,int,
+	int,int,int,int,int,int,int,int,int *);
+  ~FFT3d();
+  void compute(double *, double *, int);
+  void timing1d(double *, int, int);
+
+ private:
+  struct fft_plan_3d *plan;
+};
+
+#endif
--- a/src/KSPACE/pair_buck_coul_long.cpp
+++ b/src/KSPACE/pair_buck_coul_long.cpp
@ -0,0 +1,446 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "pair_buck_coul_long.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "kspace.h"
+#include "update.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "error.h"
+
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+
+#define EWALD_F   1.12837917
+#define EWALD_P   0.3275911
+#define A1        0.254829592
+#define A2       -0.284496736
+#define A3        1.421413741
+#define A4       -1.453152027
+#define A5        1.061405429
+
+/* ----------------------------------------------------------------------
+   free all arrays
+------------------------------------------------------------------------- */
+
+PairBuckCoulLong::~PairBuckCoulLong()
+{
+  if (allocated) {
+    memory->destroy_2d_int_array(setflag);
+    memory->destroy_2d_double_array(cutsq);
+
+    memory->destroy_2d_double_array(cut_lj);
+    memory->destroy_2d_double_array(cut_ljsq);
+    memory->destroy_2d_double_array(a);
+    memory->destroy_2d_double_array(rho);
+    memory->destroy_2d_double_array(c);
+    memory->destroy_2d_double_array(rhoinv);
+    memory->destroy_2d_double_array(buck1);
+    memory->destroy_2d_double_array(buck2);
+    memory->destroy_2d_double_array(offset);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairBuckCoulLong::compute(int eflag, int vflag)
+{
+  int i,j,k,numneigh,itype,jtype;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz;
+  double rsq,r2inv,r6inv,forcecoul,forcebuck,fforce,factor_coul,factor_lj;
+  double grij,expm2,prefactor,t,erfc;
+  double factor,phicoul,phibuck,r,rexp;
+  int *neighs;
+  double **f;
+
+  eng_vdwl = eng_coul = 0.0;
+  if (vflag) for (i = 0; i < 6; i++) virial[i] = 0.0;
+
+  if (vflag == 2) f = update->f_pair;
+  else f = atom->f;
+  double **x = atom->x;
+  double *q = atom->q;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  int nall = atom->nlocal + atom->nghost;
+  double *special_coul = force->special_coul;
+  double *special_lj = force->special_lj;
+  int newton_pair = force->newton_pair;
+  double qqrd2e = force->qqrd2e;
+
+  // loop over neighbors of my atoms
+
+  for (i = 0; i < nlocal; i++) {
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    neighs = neighbor->firstneigh[i];
+    numneigh = neighbor->numneigh[i];
+
+    for (k = 0; k < numneigh; k++) {
+      j = neighs[k];
+
+      if (j < nall) factor_coul = factor_lj = 1.0;
+      else {
+	factor_coul = special_coul[j/nall];
+	factor_lj = special_lj[j/nall];
+	j %= nall;
+      }
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+	r2inv = 1.0/rsq;
+
+	if (rsq < cut_coulsq) {
+	  r = sqrt(rsq);
+	  grij = g_ewald * r;
+	  expm2 = exp(-grij*grij);
+	  t = 1.0 / (1.0 + EWALD_P*grij);
+	  erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+	  prefactor = qqrd2e * qtmp*q[j]/r;
+	  forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+	  if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+	} else forcecoul = 0.0;
+
+	if (rsq < cut_ljsq[itype][jtype]) {
+	  r6inv = r2inv*r2inv*r2inv;
+          r = sqrt(rsq);
+	  rexp = exp(-r*rhoinv[itype][jtype]);
+	  forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv;
+	} else forcebuck = 0.0;
+
+	fforce = (forcecoul + factor_lj*forcebuck) * r2inv;
+
+	f[i][0] += delx*fforce;
+	f[i][1] += dely*fforce;
+	f[i][2] += delz*fforce;
+	if (newton_pair || j < nlocal) {
+	  f[j][0] -= delx*fforce;
+	  f[j][1] -= dely*fforce;
+	  f[j][2] -= delz*fforce;
+	}
+
+	if (eflag) {
+	  if (newton_pair || j < nlocal) factor = 1.0;
+	  else factor = 0.5;
+	  if (rsq < cut_coulsq) {
+	    phicoul = prefactor*erfc;
+	    if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor;
+	    eng_coul += factor*phicoul;
+	  }
+	  if (rsq < cut_ljsq[itype][jtype]) {
+	    phibuck = a[itype][jtype]*rexp - c[itype][jtype]*r6inv -
+	      offset[itype][jtype];
+	    eng_vdwl += factor*factor_lj*phibuck;
+	  }
+	}
+
+	if (vflag == 1) {
+	  if (newton_pair || j < nlocal) {
+	    virial[0] += delx*delx*fforce;
+	    virial[1] += dely*dely*fforce;
+	    virial[2] += delz*delz*fforce;
+	    virial[3] += delx*dely*fforce;
+	    virial[4] += delx*delz*fforce;
+	    virial[5] += dely*delz*fforce;
+	  } else {
+	    virial[0] += 0.5*delx*delx*fforce;
+	    virial[1] += 0.5*dely*dely*fforce;
+	    virial[2] += 0.5*delz*delz*fforce;
+	    virial[3] += 0.5*delx*dely*fforce;
+	    virial[4] += 0.5*delx*delz*fforce;
+	    virial[5] += 0.5*dely*delz*fforce;
+	  }
+	}
+      }
+    }
+  }
+  if (vflag == 2) virial_compute();
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays
+------------------------------------------------------------------------- */
+
+void PairBuckCoulLong::allocate()
+{
+  allocated = 1;
+  int n = atom->ntypes;
+
+  setflag = memory->create_2d_int_array(n+1,n+1,"pair:setflag");
+  for (int i = 1; i <= n; i++)
+    for (int j = i; j <= n; j++)
+      setflag[i][j] = 0;
+
+  cutsq = memory->create_2d_double_array(n+1,n+1,"pair:cutsq");
+
+  cut_lj = memory->create_2d_double_array(n+1,n+1,"pair:cut_lj");
+  cut_ljsq = memory->create_2d_double_array(n+1,n+1,"pair:cut_ljsq");
+  a = memory->create_2d_double_array(n+1,n+1,"pair:a");
+  rho = memory->create_2d_double_array(n+1,n+1,"pair:rho");
+  c = memory->create_2d_double_array(n+1,n+1,"pair:c");
+  rhoinv = memory->create_2d_double_array(n+1,n+1,"pair:rhoinv");
+  buck1 = memory->create_2d_double_array(n+1,n+1,"pair:buck1");
+  buck2 = memory->create_2d_double_array(n+1,n+1,"pair:buck2");
+  offset = memory->create_2d_double_array(n+1,n+1,"pair:offset");
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void PairBuckCoulLong::settings(int narg, char **arg)
+{
+  if (narg < 1 || narg > 2) error->all("Illegal pair_style command");
+
+  cut_lj_global = atof(arg[0]);
+  if (narg == 1) cut_coul = cut_lj_global;
+  else cut_coul = atof(arg[1]);
+
+  // reset cutoffs that have been explicitly set
+
+  if (allocated) {
+    int i,j;
+    for (i = 1; i <= atom->ntypes; i++)
+      for (j = i+1; j <= atom->ntypes; j++)
+	if (setflag[i][j]) cut_lj[i][j] = cut_lj_global;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more type pairs
+------------------------------------------------------------------------- */
+
+void PairBuckCoulLong::coeff(int narg, char **arg)
+{
+  if (narg < 5 || narg > 6) error->all("Incorrect args for pair coefficients");
+  if (!allocated) allocate();
+
+  int ilo,ihi,jlo,jhi;
+  force->bounds(arg[0],atom->ntypes,ilo,ihi);
+  force->bounds(arg[1],atom->ntypes,jlo,jhi);
+
+  double a_one = atof(arg[2]);
+  double rho_one = atof(arg[3]);
+  double c_one = atof(arg[4]);
+
+  double cut_lj_one = cut_lj_global;
+  if (narg == 6) cut_lj_one = atof(arg[5]);
+
+  int count = 0;
+  for (int i = ilo; i <= ihi; i++) {
+    for (int j = MAX(jlo,i); j <= jhi; j++) {
+      a[i][j] = a_one;
+      rho[i][j] = rho_one;
+      c[i][j] = c_one;
+      cut_lj[i][j] = cut_lj_one;
+      setflag[i][j] = 1;
+      count++;
+    }
+  }
+
+  if (count == 0) error->all("Incorrect args for pair coefficients");
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+double PairBuckCoulLong::init_one(int i, int j)
+{
+  if (setflag[i][j] == 0) error->all("All pair coeffs are not set");
+
+  double cut = MAX(cut_lj[i][j],cut_coul);
+  cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
+
+  rhoinv[i][j] = 1.0/rho[i][j];
+  buck1[i][j] = a[i][j]/rho[i][j];
+  buck2[i][j] = 6.0*c[i][j];
+     
+  if (offset_flag) {
+    double rexp = exp(-cut_lj[i][j]/rho[i][j]);
+    offset[i][j] = a[i][j]*rexp - c[i][j]/pow(cut_lj[i][j],6.0);
+  } else offset[i][j] = 0.0;
+
+  cut_ljsq[j][i] = cut_ljsq[i][j];
+  a[j][i] = a[i][j];
+  c[j][i] = c[i][j];
+  rhoinv[j][i] = rhoinv[i][j];
+  buck1[j][i] = buck1[i][j];
+  buck2[j][i] = buck2[i][j];
+  offset[j][i] = offset[i][j];
+
+  return cut;
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairBuckCoulLong::init_style()
+{
+  // require an atom style with charge defined
+
+  if (atom->charge_allow == 0)
+    error->all("Must use charged atom style with this pair style");
+
+  cut_coulsq = cut_coul * cut_coul;
+
+  // insure use of KSpace long-range solver, set g_ewald
+
+ if (force->kspace == NULL) 
+    error->all("Pair style is incompatible with KSpace style");
+  else if (strcmp(force->kspace_style,"ewald") == 0)
+    g_ewald = force->kspace->g_ewald;
+  else if (strcmp(force->kspace_style,"pppm") == 0)
+    g_ewald = force->kspace->g_ewald;
+  else error->all("Pair style is incompatible with KSpace style");
+}
+
+/* ----------------------------------------------------------------------
+  proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairBuckCoulLong::write_restart(FILE *fp)
+{
+  write_restart_settings(fp);
+
+  int i,j;
+  for (i = 1; i <= atom->ntypes; i++)
+    for (j = i; j <= atom->ntypes; j++) {
+      fwrite(&setflag[i][j],sizeof(int),1,fp);
+      if (setflag[i][j]) {
+	fwrite(&a[i][j],sizeof(double),1,fp);
+	fwrite(&rho[i][j],sizeof(double),1,fp);
+	fwrite(&c[i][j],sizeof(double),1,fp);
+	fwrite(&cut_lj[i][j],sizeof(double),1,fp);
+      }
+    }
+}
+
+/* ----------------------------------------------------------------------
+  proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairBuckCoulLong::read_restart(FILE *fp)
+{
+  read_restart_settings(fp);
+
+  allocate();
+
+  int i,j;
+  int me = comm->me;
+  for (i = 1; i <= atom->ntypes; i++)
+    for (j = i; j <= atom->ntypes; j++) {
+      if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
+      MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
+      if (setflag[i][j]) {
+	if (me == 0) {
+	  fread(&a[i][j],sizeof(double),1,fp);
+	  fread(&rho[i][j],sizeof(double),1,fp);
+	  fread(&c[i][j],sizeof(double),1,fp);
+	  fread(&cut_lj[i][j],sizeof(double),1,fp);
+	}
+	MPI_Bcast(&a[i][j],1,MPI_DOUBLE,0,world);
+	MPI_Bcast(&rho[i][j],1,MPI_DOUBLE,0,world);
+	MPI_Bcast(&c[i][j],1,MPI_DOUBLE,0,world);
+	MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world);
+      }
+    }
+}
+
+/* ----------------------------------------------------------------------
+  proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairBuckCoulLong::write_restart_settings(FILE *fp)
+{
+  fwrite(&cut_lj_global,sizeof(double),1,fp);
+  fwrite(&cut_coul,sizeof(double),1,fp);
+  fwrite(&offset_flag,sizeof(int),1,fp);
+  fwrite(&mix_flag,sizeof(int),1,fp);
+}
+
+/* ----------------------------------------------------------------------
+  proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairBuckCoulLong::read_restart_settings(FILE *fp)
+{
+  if (comm->me == 0) {
+    fread(&cut_lj_global,sizeof(double),1,fp);
+    fread(&cut_coul,sizeof(double),1,fp);
+    fread(&offset_flag,sizeof(int),1,fp);
+    fread(&mix_flag,sizeof(int),1,fp);
+  }
+  MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
+  MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairBuckCoulLong::single(int i, int j, int itype, int jtype,
+			     double rsq, double factor_coul, double factor_lj,
+			     int eflag, One &one)
+{
+  double r2inv,r6inv,r,rexp,grij,expm2,t,erfc,prefactor;
+  double forcecoul,forcebuck,phicoul,phibuck;
+
+  r2inv = 1.0/rsq;
+  if (rsq < cut_coulsq) {
+    r = sqrt(rsq);
+    grij = g_ewald * r;
+    expm2 = exp(-grij*grij);
+    t = 1.0 / (1.0 + EWALD_P*grij);
+    erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+    prefactor = force->qqrd2e * atom->q[i]*atom->q[j]/r;
+    forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+    if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+  } else forcecoul = 0.0;
+  if (rsq < cut_ljsq[itype][jtype]) {
+    r6inv = r2inv*r2inv*r2inv;
+    r = sqrt(rsq);
+    rexp = exp(-r*rhoinv[itype][jtype]);
+    forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv;
+  } else forcebuck = 0.0;
+  one.fforce = (forcecoul + factor_lj*forcebuck) * r2inv;
+  
+  if (eflag) {
+    if (rsq < cut_coulsq) {
+      phicoul = prefactor*erfc;
+      if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor;
+      one.eng_coul = phicoul;
+    } else one.eng_coul = 0.0;
+    if (rsq < cut_ljsq[itype][jtype]) {
+      phibuck = a[itype][jtype]*rexp - c[itype][jtype]*r6inv -
+	offset[itype][jtype];
+      one.eng_vdwl = factor_lj*phibuck;
+    } else one.eng_vdwl = 0.0;
+  }
+}
--- a/src/KSPACE/pair_buck_coul_long.h
+++ b/src/KSPACE/pair_buck_coul_long.h
@ -0,0 +1,47 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef PAIR_BUCK_COUL_LONG_H
+#define PAIR_BUCK_COUL_LONG_H
+
+#include "pair.h"
+
+class PairBuckCoulLong : public Pair {
+ public:
+  double cut_coul;
+
+  PairBuckCoulLong() {}
+  ~PairBuckCoulLong();
+  void compute(int, int);
+  void settings(int, char **);
+  void coeff(int, char **);
+  double init_one(int, int);
+  void init_style();
+  void write_restart(FILE *);
+  void read_restart(FILE *);
+  void write_restart_settings(FILE *);
+  void read_restart_settings(FILE *);
+  void single(int, int, int, int, double, double, double, int, One &);
+
+ private:
+  double cut_lj_global;
+  double **cut_lj,**cut_ljsq;
+  double cut_coulsq;
+  double **a,**rho,**c;
+  double **rhoinv,**buck1,**buck2,**offset;
+  double g_ewald;
+
+  void allocate();
+};
+
+#endif
--- a/src/KSPACE/pair_lj_charmm_coul_long.cpp
+++ b/src/KSPACE/pair_lj_charmm_coul_long.cpp
--- a/src/KSPACE/pair_lj_cut_coul_long.cpp
+++ b/src/KSPACE/pair_lj_cut_coul_long.cpp
--- a/src/KSPACE/pair_lj_cut_coul_long.h
+++ b/src/KSPACE/pair_lj_cut_coul_long.h
@ -0,0 +1,59 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef PAIR_LJ_CUT_COUL_LONG_H
+#define PAIR_LJ_CUT_COUL_LONG_H
+
+#include "pair.h"
+
+class PairLJCutCoulLong : public Pair {
+ public:
+  double cut_coul;
+
+  PairLJCutCoulLong();
+  ~PairLJCutCoulLong();
+  virtual void compute(int, int);
+  virtual void settings(int, char **);
+  void coeff(int, char **);
+  double init_one(int, int);
+  virtual void init_style();
+  void write_restart(FILE *);
+  void read_restart(FILE *);
+  virtual void write_restart_settings(FILE *);
+  virtual void read_restart_settings(FILE *);
+  virtual void single(int, int, int, int, double, double, double, int, One &);
+
+  void compute_inner();
+  void compute_middle();
+  void compute_outer(int, int);
+
+ protected:
+  double cut_lj_global;
+  double **cut_lj,**cut_ljsq;
+  double cut_coulsq;
+  double **epsilon,**sigma;
+  double **lj1,**lj2,**lj3,**lj4,**offset;
+  double *cut_respa;
+  double g_ewald;
+
+  double tabinnersq;
+  double *rtable,*drtable,*ftable,*dftable,*ctable,*dctable;
+  double *etable,*detable,*ptable,*dptable,*vtable,*dvtable;
+  int ncoulshiftbits,ncoulmask;
+
+  void allocate();
+  void init_tables();
+  void free_tables();
+};
+
+#endif
--- a/src/KSPACE/pair_lj_cut_coul_long_tip4p.cpp
+++ b/src/KSPACE/pair_lj_cut_coul_long_tip4p.cpp
@ -0,0 +1,528 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Amalie Frischknecht and Ahmed Ismail (SNL)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "pair_lj_cut_coul_long_tip4p.h"
+#include "angle.h"
+#include "atom.h"
+#include "bond.h"
+#include "comm.h"
+#include "domain.h"
+#include "force.h"
+#include "kspace.h"
+#include "update.h"
+#include "respa.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "error.h"
+
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+
+#define EWALD_F   1.12837917
+#define EWALD_P   0.3275911
+#define A1        0.254829592
+#define A2       -0.284496736
+#define A3        1.421413741
+#define A4       -1.453152027
+#define A5        1.061405429
+
+/* ---------------------------------------------------------------------- */
+
+PairLJCutCoulLongTIP4P::PairLJCutCoulLongTIP4P()
+{
+  single_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJCutCoulLongTIP4P::compute(int eflag, int vflag)
+{
+  int i,j,k,numneigh,itype,jtype,itable;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,fraction,table;
+  double delx1,dely1,delz1,delx2,dely2,delz2,delx3,dely3,delz3;
+  double r,r2inv,r6inv,forcecoul,forcelj,cforce,negforce;
+  double factor_coul,factor_lj;
+  double grij,expm2,prefactor,t,erfc;
+  double phicoul,philj;
+  int iH1,iH2,jH1,jH2;
+  double xiM[3],xjM[3];
+  double *x1,*x2;
+  double fO[3],fH[3]; 
+  int *neighs;
+  double **f;
+  float rsq;
+  int *int_rsq = (int *) &rsq;
+
+  eng_vdwl = eng_coul = 0.0;
+  if (vflag) for (i = 0; i < 6; i++) virial[i] = tvirial[i] = 0.0;
+
+  if (vflag == 2) {
+    f = update->f_pair;
+    tf = atom->f;
+  }
+  else f = atom->f;
+  double **x = atom->x;
+  double *q = atom->q;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  int nall = atom->nlocal + atom->nghost;
+  double *special_coul = force->special_coul;
+  double *special_lj = force->special_lj;
+  double qqrd2e = force->qqrd2e;
+
+  // loop over neighbors of my atoms
+
+  for (i = 0; i < nlocal; i++) {
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    if (itype == typeO) {
+      find_M(i,iH1,iH2,xiM);
+      x1 = xiM;
+    } else x1 = x[i];
+    neighs = neighbor->firstneigh[i];
+    numneigh = neighbor->numneigh[i];
+
+    for (k = 0; k < numneigh; k++) {
+      j = neighs[k];
+
+      if (j < nall) factor_coul = factor_lj = 1.0;
+      else {
+	factor_coul = special_coul[j/nall];
+	factor_lj = special_lj[j/nall];
+	j %= nall;
+      }
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+      
+      if (rsq < cutsq[itype][jtype]) {
+
+	r2inv = 1.0/rsq;
+
+	if (rsq < cut_ljsq[itype][jtype]) {
+	  r6inv = r2inv*r2inv*r2inv;
+	  forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+	  forcelj *= factor_lj * r2inv;
+
+	  f[i][0] += delx*forcelj;
+	  f[i][1] += dely*forcelj;
+	  f[i][2] += delz*forcelj;
+	  f[j][0] -= delx*forcelj;
+	  f[j][1] -= dely*forcelj;
+	  f[j][2] -= delz*forcelj;
+
+	  if (eflag) {
+	    philj = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
+	      offset[itype][jtype];
+	    eng_vdwl += factor_lj*philj;
+	  }
+	}
+
+	// adjust rsq for off-site O charge(s)
+
+	if (itype == typeO || jtype == typeO) { 
+	  if (jtype == typeO) {
+	    find_M(j,jH1,jH2,xjM);
+	    x2 = xjM;
+	  } else x2 = x[j];
+	  delx = x1[0] - x2[0];
+	  dely = x1[1] - x2[1];
+	  delz = x1[2] - x2[2];
+	  rsq = delx*delx + dely*dely + delz*delz;
+	}
+
+	// test current rsq against cutoff and compute Coulombic force
+
+	if (rsq < cut_coulsq) {
+	  if (!ncoultablebits || rsq <= tabinnersq) {
+	    r = sqrtf(rsq);
+	    r2inv = 1 / rsq;
+	    grij = g_ewald * r;
+	    expm2 = exp(-grij*grij);
+	    t = 1.0 / (1.0 + EWALD_P*grij);
+	    erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+	    prefactor = qqrd2e * qtmp*q[j]/r;
+	    forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+	    if (factor_coul < 1.0) {
+	      forcecoul -= (1.0-factor_coul)*prefactor; 
+	    }
+	  } else {
+	    r2inv = 1 / rsq;
+	    itable = *int_rsq & ncoulmask;
+	    itable >>= ncoulshiftbits;
+	    fraction = (rsq - rtable[itable]) * drtable[itable];
+	    table = ftable[itable] + fraction*dftable[itable];
+	    forcecoul = qtmp*q[j] * table;
+	    if (factor_coul < 1.0) {
+	      table = ctable[itable] + fraction*dctable[itable];
+	      prefactor = qtmp*q[j] * table;
+	      forcecoul -= (1.0-factor_coul)*prefactor;
+	    }
+	  }
+
+	  cforce = forcecoul * r2inv;
+
+	  // if i,j are not O atoms, force is applied directly
+	  // if i or j are O atoms, force is on fictitious atoms
+	  // spread force to all 3 atoms in water molecule
+	  // formulas due to Feenstra et al, J Comp Chem, 20, 786 (1999)
+
+	  if (itype != typeO) {
+	    if (vflag == 0) {
+	      f[i][0] += delx * cforce;
+	      f[i][1] += dely * cforce;
+	      f[i][2] += delz * cforce;
+	    } else {
+	      tf[i][0] += delx * cforce;
+	      tf[i][1] += dely * cforce;
+	      tf[i][2] += delz * cforce;
+
+	      tvirial[0] += 0.5 * delx * delx * cforce;
+	      tvirial[1] += 0.5 * dely * dely * cforce;
+	      tvirial[2] += 0.5 * delz * delz * cforce;
+	      tvirial[3] += 0.5 * dely * delx * cforce;
+	      tvirial[4] += 0.5 * delz * delx * cforce;
+	      tvirial[5] += 0.5 * delz * dely * cforce;
+	    }
+
+	  } else {
+	    fO[0] = delx*cforce*(1.0-2.0*alpha);
+	    fO[1] = dely*cforce*(1.0-2.0*alpha);
+	    fO[2] = delz*cforce*(1.0-2.0*alpha);
+
+	    fH[0] = alpha * (delx*cforce);
+	    fH[1] = alpha * (dely*cforce);
+	    fH[2] = alpha * (delz*cforce);
+
+	    if (vflag == 0) {
+	      f[i][0] += fO[0];
+	      f[i][1] += fO[1];
+	      f[i][2] += fO[2];
+
+	      f[iH1][0] += fH[0];
+	      f[iH1][1] += fH[1];
+	      f[iH1][2] += fH[2];
+	      
+	      f[iH2][0] += fH[0];
+	      f[iH2][1] += fH[1];
+	      f[iH2][2] += fH[2];
+
+	    } else {
+	      tf[i][0] += fO[0];
+	      tf[i][1] += fO[1];
+	      tf[i][2] += fO[2];
+
+	      tf[iH1][0] += fH[0];
+	      tf[iH1][1] += fH[1];
+	      tf[iH1][2] += fH[2];
+	       
+	      tf[iH2][0] += fH[0];
+	      tf[iH2][1] += fH[1];
+	      tf[iH2][2] += fH[2];
+
+	      delx1 = x[i][0] - x2[0];
+	      dely1 = x[i][1] - x2[1];
+	      delz1 = x[i][2] - x2[2];
+	      domain->minimum_image(&delx1,&dely1,&delz1);
+
+	      delx2 = x[iH1][0] - x2[0];
+	      dely2 = x[iH1][1] - x2[1];
+	      delz2 = x[iH1][2] - x2[2];
+	      domain->minimum_image(&delx2,&dely2,&delz2);
+
+	      delx3 = x[iH2][0] - x2[0];
+	      dely3 = x[iH2][1] - x2[1];
+	      delz3 = x[iH2][2] - x2[2];
+	      domain->minimum_image(&delx3,&dely3,&delz3);
+
+	      tvirial[0] += 0.5 * (delx1 * fO[0] + (delx2 + delx3) * fH[0]);
+	      tvirial[1] += 0.5 * (dely1 * fO[1] + (dely2 + dely3) * fH[1]);
+	      tvirial[2] += 0.5 * (delz1 * fO[2] + (delz2 + delz3) * fH[2]);
+	      tvirial[3] += 0.5 * (dely1 * fO[0] + (dely2 + dely3) * fH[0]);
+	      tvirial[4] += 0.5 * (delz1 * fO[0] + (delz2 + delz3) * fH[0]);
+	      tvirial[5] += 0.5 * (delz1 * fO[1] + (delz2 + delz3) * fH[1]);
+	    }
+	  }
+
+	  if (jtype != typeO) {
+	    if (vflag == 0) {
+	      f[j][0] -= delx * cforce;
+	      f[j][1] -= dely * cforce;
+	      f[j][2] -= delz * cforce;
+	    } else {
+	      tf[j][0] -= delx * cforce;
+	      tf[j][1] -= dely * cforce;
+	      tf[j][2] -= delz * cforce;
+
+	      tvirial[0] += 0.5 * (delx * delx * cforce);
+	      tvirial[1] += 0.5 * (dely * dely * cforce);
+	      tvirial[2] += 0.5 * (delz * delz * cforce);
+	      tvirial[3] += 0.5 * (dely * delx * cforce);
+	      tvirial[4] += 0.5 * (delz * delx * cforce);
+	      tvirial[5] += 0.5 * (delz * dely * cforce);
+	    }
+
+	  } else {
+	    negforce = -cforce;
+
+	    fO[0] = delx*negforce*(1.0-2.0*alpha);
+	    fO[1] = dely*negforce*(1.0-2.0*alpha);
+	    fO[2] = delz*negforce*(1.0-2.0*alpha);
+
+	    fH[0] = alpha * (delx*negforce);
+	    fH[1] = alpha * (dely*negforce);
+	    fH[2] = alpha * (delz*negforce);
+
+	    if (vflag != 2) {
+	      f[j][0] += fO[0]; 
+	      f[j][1] += fO[1]; 
+	      f[j][2] += fO[2]; 
+		
+	      f[jH1][0] += fH[0];
+	      f[jH1][1] += fH[1];
+	      f[jH1][2] += fH[2];
+
+	      f[jH2][0] += fH[0];
+	      f[jH2][1] += fH[1];
+	      f[jH2][2] += fH[2];
+
+	    } else {
+	      tf[j][0] += fO[0];
+	      tf[j][1] += fO[1];
+	      tf[j][2] += fO[2];
+
+	      tf[jH1][0] += fH[0];
+	      tf[jH1][1] += fH[1];
+	      tf[jH1][2] += fH[2];
+	      
+	      tf[jH2][0] += fH[0];
+	      tf[jH2][1] += fH[1];
+	      tf[jH2][2] += fH[2];
+
+	      delx1 = x[j][0] - x1[0];
+	      dely1 = x[j][1] - x1[1];
+	      delz1 = x[j][2] - x1[2];
+	      domain->minimum_image(&delx1,&dely1,&delz1);
+
+	      delx2 = x[jH1][0] - x1[0];
+	      dely2 = x[jH1][1] - x1[1];
+	      delz2 = x[jH1][2] - x1[2];
+	      domain->minimum_image(&delx2,&dely2,&delz2);
+
+	      delx3 = x[jH2][0] - x1[0];
+	      dely3 = x[jH2][1] - x1[1];
+	      delz3 = x[jH2][2] - x1[2];
+	      domain->minimum_image(&delx3,&dely3,&delz3);
+
+	      tvirial[0] += 0.5 * (delx1 * fO[0] + (delx2 + delx3) * fH[0]);
+	      tvirial[1] += 0.5 * (dely1 * fO[1] + (dely2 + dely3) * fH[1]);
+	      tvirial[2] += 0.5 * (delz1 * fO[2] + (delz2 + delz3) * fH[2]);
+	      tvirial[3] += 0.5 * (dely1 * fO[0] + (dely2 + dely3) * fH[0]);
+	      tvirial[4] += 0.5 * (delz1 * fO[0] + (delz2 + delz3) * fH[0]);
+	      tvirial[5] += 0.5 * (delz1 * fO[1] + (delz2 + delz3) * fH[1]);
+	    }
+	  }
+ 
+	  if (eflag) {
+	    if (!ncoultablebits || rsq <= tabinnersq)
+	      phicoul = prefactor*erfc;
+	    else {
+	      table = etable[itable] + fraction*detable[itable];
+	      phicoul = qtmp*q[j] * table;
+	    }
+	    if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor;
+	    eng_coul += phicoul;
+	  }
+	}
+      }
+    }
+  }
+  if (vflag == 2) {
+    virial_compute();
+    for (int i = 0; i < 6; i++) virial[i] += tvirial[i];
+  }
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void PairLJCutCoulLongTIP4P::settings(int narg, char **arg)
+{
+  if (narg < 6 || narg > 7) error->all("Illegal pair_style command");
+
+  typeO = atoi(arg[0]);
+  typeH = atoi(arg[1]);
+  typeB = atoi(arg[2]);
+  typeA = atoi(arg[3]);
+  qdist = atof(arg[4]);
+
+  cut_lj_global = atof(arg[5]);
+  if (narg == 6) cut_coul = cut_lj_global;
+  else cut_coul = atof(arg[6]);
+  
+  // reset cutoffs that have been explicitly set
+
+  if (allocated) {
+    int i,j;
+    for (i = 1; i <= atom->ntypes; i++)
+      for (j = i+1; j <= atom->ntypes; j++)
+	if (setflag[i][j]) cut_lj[i][j] = cut_lj_global;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairLJCutCoulLongTIP4P::init_style()
+{
+  int i,j;
+
+  if (atom->tag_enable == 0)
+    error->all("Pair style lj/cut/coul/long/tip4p requires atom IDs");
+  if (!force->newton_pair) 
+    error->all("Pair style lj/cut/coul/long/tip4p requires newton pair on");
+  if (atom->charge_allow == 0)
+    error->all("Must use charged atom style with this pair style");
+
+  cut_coulsq = cut_coul * cut_coul;
+
+  // set & error check interior rRESPA cutoffs
+
+  if (strcmp(update->integrate_style,"respa") == 0) {
+    if (((Respa *) update->integrate)->level_inner >= 0) {
+      cut_respa = ((Respa *) update->integrate)->cutoff;
+      for (i = 1; i <= atom->ntypes; i++)
+	for (j = i; j <= atom->ntypes; j++)
+	  if (MIN(cut_lj[i][j],cut_coul) < cut_respa[3])
+	    error->all("Pair cutoff < Respa interior cutoff");
+    }
+  } else cut_respa = NULL;
+
+  // insure use of correct KSpace long-range solver, set g_ewald
+
+  if (force->kspace == NULL) 
+    error->all("Pair style is incompatible with KSpace style");
+  if (strcmp(force->kspace_style,"pppm/tip4p") == 0)
+    g_ewald = force->kspace->g_ewald;
+  else error->all("Pair style is incompatible with KSpace style");
+
+  // setup force tables
+
+  if (ncoultablebits) init_tables();
+
+  // set alpha parameter
+
+  double theta = force->angle->equilibrium_angle(typeA);
+  double blen = force->bond->equilibrium_distance(typeB);
+  alpha = qdist / (2.0 * cos(0.5*theta) * blen);
+}
+
+/* ----------------------------------------------------------------------
+  proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairLJCutCoulLongTIP4P::write_restart_settings(FILE *fp)
+{
+  fwrite(&typeO,sizeof(int),1,fp);
+  fwrite(&typeH,sizeof(int),1,fp);
+  fwrite(&typeB,sizeof(int),1,fp);
+  fwrite(&typeA,sizeof(int),1,fp);
+  fwrite(&qdist,sizeof(double),1,fp);
+
+  fwrite(&cut_lj_global,sizeof(double),1,fp);
+  fwrite(&cut_coul,sizeof(double),1,fp);
+  fwrite(&offset_flag,sizeof(int),1,fp);
+  fwrite(&mix_flag,sizeof(int),1,fp);
+}
+
+/* ----------------------------------------------------------------------
+  proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairLJCutCoulLongTIP4P::read_restart_settings(FILE *fp)
+{
+  if (comm->me == 0) {
+    fread(&typeO,sizeof(int),1,fp);
+    fread(&typeH,sizeof(int),1,fp);
+    fread(&typeB,sizeof(int),1,fp);
+    fread(&typeA,sizeof(int),1,fp);
+    fread(&qdist,sizeof(double),1,fp);
+
+    fread(&cut_lj_global,sizeof(double),1,fp);
+    fread(&cut_coul,sizeof(double),1,fp);
+    fread(&offset_flag,sizeof(int),1,fp);
+    fread(&mix_flag,sizeof(int),1,fp);
+  }
+
+  MPI_Bcast(&typeO,1,MPI_INT,0,world);
+  MPI_Bcast(&typeH,1,MPI_INT,0,world);
+  MPI_Bcast(&typeB,1,MPI_INT,0,world);
+  MPI_Bcast(&typeA,1,MPI_INT,0,world);
+  MPI_Bcast(&qdist,1,MPI_DOUBLE,0,world);
+
+  MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
+  MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
+
+}
+
+/* ----------------------------------------------------------------------
+  find 2 H atoms bonded to O atom i
+  compute position xM of fictitious charge site for O atom
+  also return local indices iH1,iH2 of H atoms
+------------------------------------------------------------------------- */
+
+void PairLJCutCoulLongTIP4P::find_M(int i, int &iH1, int &iH2, double *xM)
+{
+  // test that O is correctly bonded to 2 succesive H atoms
+
+  iH1 = atom->map(atom->tag[i] + 1);
+  iH2 = atom->map(atom->tag[i] + 2);
+
+  if (iH1 == -1 || iH2 == -1) error->one("TIP4P hydrogen is missing");
+  if (atom->type[iH1] != typeH || atom->type[iH2] != typeH)
+    error->one("TIP4P hydrogen has incorrect atom type");
+
+  double **x = atom->x; 
+
+  double delx1 = x[iH1][0] - x[i][0];
+  double dely1 = x[iH1][1] - x[i][1];
+  double delz1 = x[iH1][2] - x[i][2];
+  domain->minimum_image(&delx1,&dely1,&delz1);
+
+  double delx2 = x[iH2][0] - x[i][0];
+  double dely2 = x[iH2][1] - x[i][1];
+  double delz2 = x[iH2][2] - x[i][2];
+  domain->minimum_image(&delx2,&dely2,&delz2);
+
+  xM[0] = x[i][0] + alpha * (delx1 + delx2);
+  xM[1] = x[i][1] + alpha * (dely1 + dely2);
+  xM[2] = x[i][2] + alpha * (delz1 + delz2);
+}
--- a/src/KSPACE/pair_lj_cut_coul_long_tip4p.h
+++ b/src/KSPACE/pair_lj_cut_coul_long_tip4p.h
@ -0,0 +1,41 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef PAIR_LJ_CUT_COUL_LONG_TIP4P_H
+#define PAIR_LJ_CUT_COUL_LONG_TIP4P_H
+
+#include "pair_lj_cut_coul_long.h"
+
+class PairLJCutCoulLongTIP4P : public PairLJCutCoulLong {
+  friend class PPPM; 
+  
+ public:
+  PairLJCutCoulLongTIP4P();
+  void compute(int, int);
+  void settings(int, char **);
+  void init_style();
+  void write_restart_settings(FILE *fp);
+  void read_restart_settings(FILE *fp);
+
+ private:
+  int typeH,typeO;             // atom types of TIP4P water H and O atoms
+  int typeA,typeB;             // angle and bond types of TIP4P water
+  double qdist;                // distance from O site to negative charge
+  double alpha;                // geometric constraint parameter for TIP4P
+  double **tf;
+  double tvirial[6];
+
+  void find_M(int, int &, int &, double *);
+};
+
+#endif
--- a/src/KSPACE/pppm.cpp
+++ b/src/KSPACE/pppm.cpp
--- a/src/KSPACE/pppm.h
+++ b/src/KSPACE/pppm.h
@ -0,0 +1,94 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef PPPM_H
+#define PPPM_H
+
+#include "kspace.h"
+
+class FFT3d;
+class Remap;
+
+class PPPM : public KSpace {
+ public:
+  PPPM(int, char **);
+  ~PPPM();
+  void init();
+  void setup();
+  void compute(int, int);
+  void timing(int, double &, double &);
+  int memory_usage();
+
+ protected:
+  int me,nprocs;
+  double PI;
+  double precision;
+  int nfactors;
+  int *factors;
+  double qsum,qsqsum;
+  double qqrd2e;
+  double cutoff;
+  double volume;
+  double delxinv,delyinv,delzinv,delvolinv;
+  double shift,shiftone;
+
+  int nxlo_in,nylo_in,nzlo_in,nxhi_in,nyhi_in,nzhi_in;
+  int nxlo_out,nylo_out,nzlo_out,nxhi_out,nyhi_out,nzhi_out;
+  int nxlo_ghost,nxhi_ghost,nylo_ghost,nyhi_ghost,nzlo_ghost,nzhi_ghost;
+  int nxlo_fft,nylo_fft,nzlo_fft,nxhi_fft,nyhi_fft,nzhi_fft;
+  int nlower,nupper;
+  int ngrid,nfft,nbuf,nfft_both;
+
+  double ***density_brick;
+  double ***vdx_brick,***vdy_brick,***vdz_brick;
+  double *greensfn;
+  double **vg;
+  double *fkx,*fky,*fkz;
+  double *density_fft;
+  double *work1,*work2;
+  double *buf1,*buf2;
+
+  double *gf_b;
+  double **rho1d,**rho_coeff;
+
+  FFT3d *fft1,*fft2;
+  Remap *remap;
+
+  int **part2grid;             // storage for particle -> grid mapping
+  int nmax;
+
+                               // TIP4P settings
+  int typeH,typeO;             // atom types of TIP4P water H and O atoms
+  double qdist;                // distance from O site to negative charge
+  double alpha;                // geometric factor
+
+  void set_grid();
+  void allocate();
+  void deallocate();
+  int factorable(int);
+  double rms(double, double, double, double, double **);
+  void compute_gf_denom();
+  double gf_denom(double, double, double);
+  virtual void particle_map();
+  virtual void make_rho();
+  void brick2fft();
+  void fillbrick();
+  void poisson(int, int);
+  virtual void fieldforce();
+  void procs2grid2d(int,int,int,int *, int*);
+  void compute_rho1d(double, double, double);
+  void compute_rho_coeff();
+  void slabcorr(int);
+};
+
+#endif
--- a/src/KSPACE/pppm_tip4p.cpp
+++ b/src/KSPACE/pppm_tip4p.cpp
@ -0,0 +1,261 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Amalie Frischknecht and Ahmed Ismail (SNL)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pppm_tip4p.h"
+#include "atom.h"
+#include "domain.h"
+#include "memory.h"
+#include "error.h"
+
+#define OFFSET 4096
+
+/* ---------------------------------------------------------------------- */
+
+PPPMTIP4P::PPPMTIP4P(int narg, char **arg) : PPPM(narg, arg) {}
+
+/* ----------------------------------------------------------------------
+   find center grid pt for each of my particles
+   check that full stencil for the particle will fit in my 3d brick
+   store central grid pt indices in part2grid array 
+------------------------------------------------------------------------- */
+
+void PPPMTIP4P::particle_map()
+{
+  int nx,ny,nz,iH1,iH2;
+  double *xi,xM[3];
+
+  int *type = atom->type;
+  double **x = atom->x;
+  int nlocal = atom->nlocal;
+  double boxxlo = domain->boxxlo;
+  double boxylo = domain->boxylo;
+  double boxzlo = domain->boxzlo;
+
+  int flag = 0;
+  for (int i = 0; i < nlocal; i++) {
+    if (type[i] == typeO) {
+      find_M(i,iH1,iH2,xM);      
+      xi = xM;
+    } else xi = x[i];
+
+    // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+    // current particle coord can be outside global and local box
+    // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
+
+    nx = static_cast<int> ((xi[0]-boxxlo)*delxinv+shift) - OFFSET;
+    ny = static_cast<int> ((xi[1]-boxylo)*delyinv+shift) - OFFSET;
+    nz = static_cast<int> ((xi[2]-boxzlo)*delzinv+shift) - OFFSET;
+
+    part2grid[i][0] = nx;
+    part2grid[i][1] = ny;
+    part2grid[i][2] = nz;
+
+    // check that entire stencil around nx,ny,nz will fit in my 3d brick
+
+    if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
+	ny+nlower < nylo_out || ny+nupper > nyhi_out ||
+	nz+nlower < nzlo_out || nz+nupper > nzhi_out) flag++;
+  }
+
+  int flag_all;
+  MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
+  if (flag_all) error->all("Out of range atoms - cannot compute PPPM");
+}
+
+/* ----------------------------------------------------------------------
+   create discretized "density" on section of global grid due to my particles
+   density(x,y,z) = charge "density" at grid points of my 3d brick
+   (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
+   in global grid 
+------------------------------------------------------------------------- */
+
+void PPPMTIP4P::make_rho()
+{
+  int i,l,m,n,nx,ny,nz,mx,my,mz,iH1,iH2;
+  double dx,dy,dz,x0,y0,z0;
+  double *xi,xM[3];
+
+  // clear 3d density array
+
+  double *vec = &density_brick[nzlo_out][nylo_out][nxlo_out];
+  for (i = 0; i < ngrid; i++) vec[i] = 0.0;
+
+  // loop over my charges, add their contribution to nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+
+  int *type = atom->type; 
+  double *q = atom->q;
+  double **x = atom->x;
+  int nlocal = atom->nlocal;
+  double boxxlo = domain->boxxlo;
+  double boxylo = domain->boxylo;
+  double boxzlo = domain->boxzlo;
+
+  for (int i = 0; i < nlocal; i++) {
+    if (type[i] == typeO) {
+      find_M(i,iH1,iH2,xM);      
+      xi = xM;
+    } else xi = x[i];
+
+    nx = part2grid[i][0];
+    ny = part2grid[i][1];
+    nz = part2grid[i][2];
+    dx = nx+shiftone - (xi[0]-boxxlo)*delxinv;
+    dy = ny+shiftone - (xi[1]-boxylo)*delyinv;
+    dz = nz+shiftone - (xi[2]-boxzlo)*delzinv;
+
+    compute_rho1d(dx,dy,dz);
+
+    z0 = delvolinv * q[i];
+    for (n = nlower; n <= nupper; n++) {
+      mz = n+nz;
+      y0 = z0*rho1d[2][n];
+      for (m = nlower; m <= nupper; m++) {
+	my = m+ny;
+	x0 = y0*rho1d[1][m];
+	for (l = nlower; l <= nupper; l++) {
+	  mx = l+nx;
+	  density_brick[mz][my][mx] += x0*rho1d[0][l];
+	}
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   interpolate from grid to get electric field & force on my particles 
+------------------------------------------------------------------------- */
+
+void PPPMTIP4P::fieldforce()
+{
+  int i,l,m,n,nx,ny,nz,mx,my,mz;
+  double dx,dy,dz,x0,y0,z0;
+  double ek[3];
+  double *xi;
+  int iH1,iH2;
+  double xM[3];
+  double fx,fy,fz;
+
+  // loop over my charges, interpolate electric field from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  // ek = 3 components of E-field on particle
+
+  double *q = atom->q;
+  double **x = atom->x;
+  double **f = atom->f;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  double boxxlo = domain->boxxlo;
+  double boxylo = domain->boxylo;
+  double boxzlo = domain->boxzlo;
+
+  for (i = 0; i < nlocal; i++) {
+    if (type[i] == typeO) {
+      find_M(i,iH1,iH2,xM);      
+      xi = xM;
+    } else xi = x[i];
+
+    nx = part2grid[i][0];
+    ny = part2grid[i][1];
+    nz = part2grid[i][2];
+    dx = nx+shiftone - (xi[0]-boxxlo)*delxinv;
+    dy = ny+shiftone - (xi[1]-boxylo)*delyinv;
+    dz = nz+shiftone - (xi[2]-boxzlo)*delzinv;
+
+    compute_rho1d(dx,dy,dz);
+
+    ek[0] = ek[1] = ek[2] = 0.0;
+    for (n = nlower; n <= nupper; n++) {
+      mz = n+nz;
+      z0 = rho1d[2][n];
+      for (m = nlower; m <= nupper; m++) {
+	my = m+ny;
+	y0 = z0*rho1d[1][m];
+	for (l = nlower; l <= nupper; l++) {
+	  mx = l+nx;
+	  x0 = y0*rho1d[0][l];
+	  ek[0] -= x0*vdx_brick[mz][my][mx];
+	  ek[1] -= x0*vdy_brick[mz][my][mx];
+	  ek[2] -= x0*vdz_brick[mz][my][mx];
+	}
+      }
+    }
+
+    // convert E-field to force
+
+    if (type[i] != typeO) {
+      f[i][0] += qqrd2e*q[i]*ek[0];
+      f[i][1] += qqrd2e*q[i]*ek[1];
+      f[i][2] += qqrd2e*q[i]*ek[2];
+    } else {
+
+      fx = qqrd2e * q[i] * ek[0];
+      fy = qqrd2e * q[i] * ek[1];
+      fz = qqrd2e * q[i] * ek[2];
+      find_M(i,iH1,iH2,xM);
+
+      f[i][0] += fx*(1.0-2.0*alpha);
+      f[i][1] += fy*(1.0-2.0*alpha);
+      f[i][2] += fz*(1.0-2.0*alpha);
+
+      f[iH1][0] += alpha*(fx); 
+      f[iH1][1] += alpha*(fy); 
+      f[iH1][2] += alpha*(fz); 
+
+      f[iH2][0] += alpha*(fx); 
+      f[iH2][1] += alpha*(fy); 
+      f[iH2][2] += alpha*(fz); 
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+  find 2 H atoms bonded to O atom i
+  compute position xM of fictitious charge site for O atom
+  also return local indices iH1,iH2 of H atoms
+------------------------------------------------------------------------- */
+
+void PPPMTIP4P::find_M(int i, int &iH1, int &iH2, double *xM)
+{
+  iH1 = atom->map(atom->tag[i] + 1);
+  iH2 = atom->map(atom->tag[i] + 2);
+
+  if (iH1 == -1 || iH2 == -1) error->one("TIP4P hydrogen is missing");
+  if (atom->type[iH1] != typeH || atom->type[iH2] != typeH)
+    error->one("TIP4P hydrogen has incorrect atom type");
+
+  double **x = atom->x; 
+
+  double delx1 = x[iH1][0] - x[i][0];
+  double dely1 = x[iH1][1] - x[i][1];
+  double delz1 = x[iH1][2] - x[i][2];
+  domain->minimum_image(&delx1,&dely1,&delz1);
+
+  double delx2 = x[iH2][0] - x[i][0];
+  double dely2 = x[iH2][1] - x[i][1];
+  double delz2 = x[iH2][2] - x[i][2];
+  domain->minimum_image(&delx2,&dely2,&delz2);
+
+  xM[0] = x[i][0] + alpha * (delx1 + delx2);
+  xM[1] = x[i][1] + alpha * (dely1 + dely2);
+  xM[2] = x[i][2] + alpha * (delz1 + delz2);
+}
--- a/src/KSPACE/pppm_tip4p.h
+++ b/src/KSPACE/pppm_tip4p.h
@ -0,0 +1,31 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef PPPM_TIP4P_H
+#define PPPM_TIP4P_H
+
+#include "pppm.h"
+
+class PPPMTIP4P : public PPPM {
+ public:
+  PPPMTIP4P(int, char **);
+
+ private:
+  void particle_map();
+  void make_rho();
+  void fieldforce();
+
+  void find_M(int, int &, int &, double *); 
+};
+
+#endif
--- a/src/KSPACE/remap.cpp
+++ b/src/KSPACE/remap.cpp
@ -0,0 +1,506 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "mpi.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "remap.h"
+#include "pack.h"
+
+#define MIN(A,B) ((A) < (B)) ? (A) : (B)
+#define MAX(A,B) ((A) > (B)) ? (A) : (B)
+
+/* ----------------------------------------------------------------------
+   Data layout for 3d remaps:
+
+   data set of Nfast x Nmid x Nslow elements is owned by P procs
+   each element = nqty contiguous datums
+   on input, each proc owns a subsection of the elements
+   on output, each proc will own a (presumably different) subsection
+   my subsection must not overlap with any other proc's subsection,
+     i.e. the union of all proc's input (or output) subsections must
+     exactly tile the global Nfast x Nmid x Nslow data set
+   when called from C, all subsection indices are 
+     C-style from 0 to N-1 where N = Nfast or Nmid or Nslow
+   when called from F77, all subsection indices are 
+     F77-style from 1 to N where N = Nfast or Nmid or Nslow
+   a proc can own 0 elements on input or output
+     by specifying hi index < lo index
+   on both input and output, data is stored contiguously on a processor
+     with a fast-varying, mid-varying, and slow-varying index
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Perform 3d remap 
+
+   Arguments:
+   in           starting address of input data on this proc
+   out          starting address of where output data for this proc
+                  will be placed (can be same as in)
+   buf          extra memory required for remap
+                if memory=0 was used in call to remap_3d_create_plan
+		  then buf must be big enough to hold output result
+		  i.e. nqty * (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * 
+		              (out_khi-out_klo+1)
+		if memory=1 was used in call to remap_3d_create_plan
+		  then buf is not used, can just be a dummy pointer
+   plan         plan returned by previous call to remap_3d_create_plan
+------------------------------------------------------------------------- */
+
+void remap_3d(double *in, double *out, double *buf,
+	      struct remap_plan_3d *plan)
+
+{
+  MPI_Status status;
+  int i,isend,irecv;
+  double *scratch;
+
+  if (plan->memory == 0)
+    scratch = buf;
+  else
+    scratch = plan->scratch;
+
+  // post all recvs into scratch space 
+
+  for (irecv = 0; irecv < plan->nrecv; irecv++)
+    MPI_Irecv(&scratch[plan->recv_bufloc[irecv]],plan->recv_size[irecv],
+	      MPI_DOUBLE,plan->recv_proc[irecv],0,
+	      plan->comm,&plan->request[irecv]);
+
+  // send all messages to other procs 
+
+  for (isend = 0; isend < plan->nsend; isend++) {
+    plan->pack(&in[plan->send_offset[isend]],
+	       plan->sendbuf,&plan->packplan[isend]);
+    MPI_Send(plan->sendbuf,plan->send_size[isend],MPI_DOUBLE,
+	     plan->send_proc[isend],0,plan->comm);
+  }       
+
+  // copy in -> scratch -> out for self data 
+
+  if (plan->self) {
+    isend = plan->nsend;
+    irecv = plan->nrecv;
+    plan->pack(&in[plan->send_offset[isend]],
+	       &scratch[plan->recv_bufloc[irecv]],
+	       &plan->packplan[isend]);
+    plan->unpack(&scratch[plan->recv_bufloc[irecv]],
+		 &out[plan->recv_offset[irecv]],&plan->unpackplan[irecv]);
+  }
+
+  // unpack all messages from scratch -> out 
+
+  for (i = 0; i < plan->nrecv; i++) {
+    MPI_Waitany(plan->nrecv,plan->request,&irecv,&status);
+    plan->unpack(&scratch[plan->recv_bufloc[irecv]],
+		 &out[plan->recv_offset[irecv]],&plan->unpackplan[irecv]);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   Create plan for performing a 3d remap 
+
+   Arguments:
+   comm                 MPI communicator for the P procs which own the data
+   in_ilo,in_ihi        input bounds of data I own in fast index
+   in_jlo,in_jhi        input bounds of data I own in mid index
+   in_klo,in_khi        input bounds of data I own in slow index
+   out_ilo,out_ihi      output bounds of data I own in fast index
+   out_jlo,out_jhi      output bounds of data I own in mid index
+   out_klo,out_khi      output bounds of data I own in slow index
+   nqty                 # of datums per element
+   permute              permutation in storage order of indices on output
+                          0 = no permutation
+			  1 = permute once = mid->fast, slow->mid, fast->slow
+			  2 = permute twice = slow->fast, fast->mid, mid->slow
+   memory               user provides buffer memory for remap or system does
+                          0 = user provides memory
+			  1 = system provides memory
+   precision            precision of data
+                          1 = single precision (4 bytes per datum)
+			  2 = double precision (8 bytes per datum)
+------------------------------------------------------------------------- */
+
+struct remap_plan_3d *remap_3d_create_plan(
+       MPI_Comm comm,
+       int in_ilo, int in_ihi, int in_jlo, int in_jhi,
+       int in_klo, int in_khi,
+       int out_ilo, int out_ihi, int out_jlo, int out_jhi,
+       int out_klo, int out_khi,
+       int nqty, int permute, int memory, int precision)
+
+{
+  struct remap_plan_3d *plan;
+  struct extent_3d *array;
+  struct extent_3d in,out,overlap;
+  int i,iproc,nsend,nrecv,ibuf,size,me,nprocs;
+
+  // query MPI info 
+
+  MPI_Comm_rank(comm,&me);
+  MPI_Comm_size(comm,&nprocs);
+
+  // single precision not yet supported 
+
+  if (precision == 1) {
+    if (me == 0) printf("Single precision not supported\n");
+    return NULL;
+  }
+
+  // allocate memory for plan data struct 
+
+  plan = (struct remap_plan_3d *) malloc(sizeof(struct remap_plan_3d));
+  if (plan == NULL) return NULL;
+
+  // store parameters in local data structs 
+
+  in.ilo = in_ilo;
+  in.ihi = in_ihi;
+  in.isize = in.ihi - in.ilo + 1;
+
+  in.jlo = in_jlo;
+  in.jhi = in_jhi;
+  in.jsize = in.jhi - in.jlo + 1;
+
+  in.klo = in_klo;
+  in.khi = in_khi;
+  in.ksize = in.khi - in.klo + 1;
+
+  out.ilo = out_ilo;
+  out.ihi = out_ihi;
+  out.isize = out.ihi - out.ilo + 1;
+
+  out.jlo = out_jlo;
+  out.jhi = out_jhi;
+  out.jsize = out.jhi - out.jlo + 1;
+
+  out.klo = out_klo;
+  out.khi = out_khi;
+  out.ksize = out.khi - out.klo + 1;
+
+  // combine output extents across all procs 
+
+  array = (struct extent_3d *) malloc(nprocs*sizeof(struct extent_3d));
+  if (array == NULL) return NULL;
+
+  MPI_Allgather(&out,sizeof(struct extent_3d),MPI_BYTE,
+		array,sizeof(struct extent_3d),MPI_BYTE,comm);
+
+  // count send collides, including self 
+
+  nsend = 0;
+  iproc = me;
+  for (i = 0; i < nprocs; i++) {
+    iproc++;
+    if (iproc == nprocs) iproc = 0;
+    nsend += remap_3d_collide(&in,&array[iproc],&overlap);
+  }
+
+  // malloc space for send info 
+
+  if (nsend) {
+    if (precision == 1)
+      plan->pack = NULL;
+    else
+      plan->pack = pack_3d;
+
+    plan->send_offset = (int *) malloc(nsend*sizeof(int));
+    plan->send_size = (int *) malloc(nsend*sizeof(int));
+    plan->send_proc = (int *) malloc(nsend*sizeof(int));
+    plan->packplan = (struct pack_plan_3d *) 
+      malloc(nsend*sizeof(struct pack_plan_3d));
+
+    if (plan->send_offset == NULL || plan->send_size == NULL || 
+	plan->send_proc == NULL || plan->packplan == NULL) return NULL;
+  }
+
+  // store send info, with self as last entry 
+
+  nsend = 0;
+  iproc = me;
+  for (i = 0; i < nprocs; i++) {
+    iproc++;
+    if (iproc == nprocs) iproc = 0;
+    if (remap_3d_collide(&in,&array[iproc],&overlap)) {
+      plan->send_proc[nsend] = iproc;
+      plan->send_offset[nsend] = nqty * 
+	((overlap.klo-in.klo)*in.jsize*in.isize + 
+	((overlap.jlo-in.jlo)*in.isize + overlap.ilo-in.ilo));
+      plan->packplan[nsend].nfast = nqty*overlap.isize;
+      plan->packplan[nsend].nmid = overlap.jsize;
+      plan->packplan[nsend].nslow = overlap.ksize;
+      plan->packplan[nsend].nstride_line = nqty*in.isize;
+      plan->packplan[nsend].nstride_plane = nqty*in.jsize*in.isize;
+      plan->packplan[nsend].nqty = nqty;
+      plan->send_size[nsend] = nqty*overlap.isize*overlap.jsize*overlap.ksize;
+      nsend++;
+    }
+  }
+
+  // plan->nsend = # of sends not including self 
+
+  if (nsend && plan->send_proc[nsend-1] == me)
+    plan->nsend = nsend - 1;
+  else
+    plan->nsend = nsend;
+
+  // combine input extents across all procs 
+
+  MPI_Allgather(&in,sizeof(struct extent_3d),MPI_BYTE,
+		array,sizeof(struct extent_3d),MPI_BYTE,comm);
+
+  // count recv collides, including self 
+
+  nrecv = 0;
+  iproc = me;
+  for (i = 0; i < nprocs; i++) {
+    iproc++;
+    if (iproc == nprocs) iproc = 0;
+    nrecv += remap_3d_collide(&out,&array[iproc],&overlap);
+  }
+  
+  // malloc space for recv info 
+
+  if (nrecv) {
+    if (precision == 1) {
+      if (permute == 0)
+	plan->unpack = NULL;
+      else if (permute == 1) {
+	if (nqty == 1)
+	  plan->unpack = NULL;
+	else if (nqty == 2)
+	  plan->unpack = NULL;
+	else
+	  plan->unpack = NULL;
+      }
+      else if (permute == 2) {
+	if (nqty == 1)
+	  plan->unpack = NULL;
+	else if (nqty == 2)
+	  plan->unpack = NULL;
+	else
+	  plan->unpack = NULL;
+      }
+    }
+    else if (precision == 2) {
+      if (permute == 0)
+	plan->unpack = unpack_3d;
+      else if (permute == 1) {
+	if (nqty == 1)
+	  plan->unpack = unpack_3d_permute1_1;
+	else if (nqty == 2)
+	  plan->unpack = unpack_3d_permute1_2;
+	else
+	  plan->unpack = unpack_3d_permute1_n;
+      }
+      else if (permute == 2) {
+	if (nqty == 1)
+	  plan->unpack = unpack_3d_permute2_1;
+	else if (nqty == 2)
+	  plan->unpack = unpack_3d_permute2_2;
+	else
+	  plan->unpack = unpack_3d_permute2_n;
+      }
+    }
+
+    plan->recv_offset = (int *) malloc(nrecv*sizeof(int));
+    plan->recv_size = (int *) malloc(nrecv*sizeof(int));
+    plan->recv_proc = (int *) malloc(nrecv*sizeof(int));
+    plan->recv_bufloc = (int *) malloc(nrecv*sizeof(int));
+    plan->request = (MPI_Request *) malloc(nrecv*sizeof(MPI_Request));
+    plan->unpackplan = (struct pack_plan_3d *) 
+      malloc(nrecv*sizeof(struct pack_plan_3d));
+
+    if (plan->recv_offset == NULL || plan->recv_size == NULL || 
+	plan->recv_proc == NULL || plan->recv_bufloc == NULL ||
+	plan->request == NULL || plan->unpackplan == NULL) return NULL;
+  }
+
+  // store recv info, with self as last entry 
+
+  ibuf = 0;
+  nrecv = 0;
+  iproc = me;
+
+  for (i = 0; i < nprocs; i++) {
+    iproc++;
+    if (iproc == nprocs) iproc = 0;
+    if (remap_3d_collide(&out,&array[iproc],&overlap)) {
+      plan->recv_proc[nrecv] = iproc;
+      plan->recv_bufloc[nrecv] = ibuf;
+
+      if (permute == 0) {
+	plan->recv_offset[nrecv] = nqty *
+	  ((overlap.klo-out.klo)*out.jsize*out.isize +
+	   (overlap.jlo-out.jlo)*out.isize + (overlap.ilo-out.ilo));
+	plan->unpackplan[nrecv].nfast = nqty*overlap.isize;
+	plan->unpackplan[nrecv].nmid = overlap.jsize;
+	plan->unpackplan[nrecv].nslow = overlap.ksize;
+	plan->unpackplan[nrecv].nstride_line = nqty*out.isize;
+	plan->unpackplan[nrecv].nstride_plane = nqty*out.jsize*out.isize;
+	plan->unpackplan[nrecv].nqty = nqty;
+      }
+      else if (permute == 1) {
+	plan->recv_offset[nrecv] = nqty *
+	  ((overlap.ilo-out.ilo)*out.ksize*out.jsize +
+	   (overlap.klo-out.klo)*out.jsize + (overlap.jlo-out.jlo));
+	plan->unpackplan[nrecv].nfast = overlap.isize;
+	plan->unpackplan[nrecv].nmid = overlap.jsize;
+	plan->unpackplan[nrecv].nslow = overlap.ksize;
+	plan->unpackplan[nrecv].nstride_line = nqty*out.jsize;
+	plan->unpackplan[nrecv].nstride_plane = nqty*out.ksize*out.jsize;
+	plan->unpackplan[nrecv].nqty = nqty;
+      }
+      else {
+	plan->recv_offset[nrecv] = nqty *
+	  ((overlap.jlo-out.jlo)*out.isize*out.ksize +
+	   (overlap.ilo-out.ilo)*out.ksize + (overlap.klo-out.klo));
+	plan->unpackplan[nrecv].nfast = overlap.isize;
+	plan->unpackplan[nrecv].nmid = overlap.jsize;
+	plan->unpackplan[nrecv].nslow = overlap.ksize;
+	plan->unpackplan[nrecv].nstride_line = nqty*out.ksize;
+	plan->unpackplan[nrecv].nstride_plane = nqty*out.isize*out.ksize;
+	plan->unpackplan[nrecv].nqty = nqty;
+      }
+
+      plan->recv_size[nrecv] = nqty*overlap.isize*overlap.jsize*overlap.ksize;
+      ibuf += plan->recv_size[nrecv];
+      nrecv++;
+    }
+  }
+
+  // plan->nrecv = # of recvs not including self 
+
+  if (nrecv && plan->recv_proc[nrecv-1] == me)
+    plan->nrecv = nrecv - 1;
+  else
+    plan->nrecv = nrecv;
+
+  // init remaining fields in remap plan 
+
+  plan->memory = memory;
+
+  if (nrecv == plan->nrecv)
+    plan->self = 0;
+  else
+    plan->self = 1;
+
+  // free locally malloced space 
+
+  free(array);
+
+  // find biggest send message (not including self) and malloc space for it 
+
+  plan->sendbuf = NULL;
+
+  size = 0;
+  for (nsend = 0; nsend < plan->nsend; nsend++)
+    size = MAX(size,plan->send_size[nsend]);
+
+  if (size) {
+    if (precision == 1)
+      plan->sendbuf = NULL;
+    else
+      plan->sendbuf = (double *) malloc(size*sizeof(double));
+    if (plan->sendbuf == NULL) return NULL;
+  }
+
+  // if requested, allocate internal scratch space for recvs,
+  // only need it if I will receive any data (including self) 
+
+  plan->scratch = NULL;
+
+  if (memory == 1) {
+    if (nrecv > 0) {
+      if (precision == 1)
+	plan->scratch = NULL;
+      else
+	plan->scratch =
+	  (double *) malloc(nqty*out.isize*out.jsize*out.ksize*sizeof(double));
+      if (plan->scratch == NULL) return NULL;
+    }
+  }
+
+  // create new MPI communicator for remap 
+
+  MPI_Comm_dup(comm,&plan->comm);
+
+  // return pointer to plan 
+
+  return plan;
+}
+
+/* ----------------------------------------------------------------------
+   Destroy a 3d remap plan 
+------------------------------------------------------------------------- */
+
+void remap_3d_destroy_plan(struct remap_plan_3d *plan)
+
+{
+  // free MPI communicator 
+
+  MPI_Comm_free(&plan->comm);
+
+  // free internal arrays 
+
+  if (plan->nsend || plan->self) {
+    free(plan->send_offset);
+    free(plan->send_size);
+    free(plan->send_proc);
+    free(plan->packplan);
+    if (plan->sendbuf) free(plan->sendbuf);
+  }
+
+  if (plan->nrecv || plan->self) {
+    free(plan->recv_offset);
+    free(plan->recv_size);
+    free(plan->recv_proc);
+    free(plan->recv_bufloc);
+    free(plan->request);
+    free(plan->unpackplan);
+    if (plan->scratch) free(plan->scratch);
+  }
+
+  // free plan itself 
+
+  free(plan);
+}
+
+/* ----------------------------------------------------------------------
+   collide 2 sets of indices to determine overlap 
+   compare bounds of block1 with block2 to see if they overlap
+   return 1 if they do and put bounds of overlapping section in overlap
+   return 0 if they do not overlap 
+------------------------------------------------------------------------- */
+
+int remap_3d_collide(struct extent_3d *block1, struct extent_3d *block2,
+		     struct extent_3d *overlap)
+
+{
+  overlap->ilo = MAX(block1->ilo,block2->ilo);
+  overlap->ihi = MIN(block1->ihi,block2->ihi);
+  overlap->jlo = MAX(block1->jlo,block2->jlo);
+  overlap->jhi = MIN(block1->jhi,block2->jhi);
+  overlap->klo = MAX(block1->klo,block2->klo);
+  overlap->khi = MIN(block1->khi,block2->khi);
+  
+  if (overlap->ilo > overlap->ihi || 
+      overlap->jlo > overlap->jhi ||
+      overlap->klo > overlap->khi) return 0;
+
+  overlap->isize = overlap->ihi - overlap->ilo + 1;
+  overlap->jsize = overlap->jhi - overlap->jlo + 1;
+  overlap->ksize = overlap->khi - overlap->klo + 1;
+
+  return 1;
+}
--- a/src/KSPACE/remap.h
+++ b/src/KSPACE/remap.h
@ -0,0 +1,56 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+// details of how to do a 3d remap 
+
+struct remap_plan_3d {
+  double *sendbuf;                  // buffer for MPI sends 
+  double *scratch;                  // scratch buffer for MPI recvs 
+  void (*pack)(double *, double *, struct pack_plan_3d *);
+                                    // which pack function to use 
+  void (*unpack)(double *, double *, struct pack_plan_3d *);
+                                    // which unpack function to use 
+  int *send_offset;                 // extraction loc for each send 
+  int *send_size;                   // size of each send message 
+  int *send_proc;                   // proc to send each message to 
+  struct pack_plan_3d *packplan;    // pack plan for each send message 
+  int *recv_offset;                 // insertion loc for each recv 
+  int *recv_size;                   // size of each recv message 
+  int *recv_proc;                   // proc to recv each message from 
+  int *recv_bufloc;                 // offset in scratch buf for each recv 
+  MPI_Request *request;             // MPI request for each posted recv 
+  struct pack_plan_3d *unpackplan;  // unpack plan for each recv message 
+  int nrecv;                        // # of recvs from other procs 
+  int nsend;                        // # of sends to other procs 
+  int self;                         // whether I send/recv with myself 
+  int memory;                       // user provides scratch space or not 
+  MPI_Comm comm;                    // group of procs performing remap 
+};
+
+// collision between 2 regions 
+
+struct extent_3d {
+  int ilo,ihi,isize;
+  int jlo,jhi,jsize;
+  int klo,khi,ksize;
+};
+
+// function prototypes 
+
+void remap_3d(double *, double *, double *, struct remap_plan_3d *);
+struct remap_plan_3d *remap_3d_create_plan(MPI_Comm, 
+  int, int, int, int, int, int,	int, int, int, int, int, int,
+  int, int, int, int);
+void remap_3d_destroy_plan(struct remap_plan_3d *);
+int remap_3d_collide(struct extent_3d *, 
+		     struct extent_3d *, struct extent_3d *);
--- a/src/KSPACE/remap_wrap.cpp
+++ b/src/KSPACE/remap_wrap.cpp
@ -0,0 +1,46 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "mpi.h"
+#include "remap_wrap.h"
+#include "error.h"
+
+/* ---------------------------------------------------------------------- */
+
+Remap::Remap(MPI_Comm comm,
+	     int in_ilo, int in_ihi, int in_jlo, int in_jhi,
+	     int in_klo, int in_khi,
+	     int out_ilo, int out_ihi, int out_jlo, int out_jhi,
+	     int out_klo, int out_khi,
+	     int nqty, int permute, int memory, int precision)
+{
+  plan = remap_3d_create_plan(comm,
+			      in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi,
+			      out_ilo,out_ihi,out_jlo,out_jhi,out_klo,out_khi,
+			      nqty,permute,memory,precision);
+  if (plan == NULL) error->one("Could not create 3d remap plan");
+}
+
+/* ---------------------------------------------------------------------- */
+
+Remap::~Remap()
+{
+  remap_3d_destroy_plan(plan);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void Remap::perform(double *in, double *out, double *buf)
+{
+  remap_3d(in,out,buf,plan);
+}
--- a/src/KSPACE/remap_wrap.h
+++ b/src/KSPACE/remap_wrap.h
@ -0,0 +1,31 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef REMAP_WRAP_H
+#define REMAP_WRAP_H
+
+#include "lammps.h"
+#include "remap.h"
+
+class Remap : public LAMMPS {
+ public:
+  Remap(MPI_Comm,int,int,int,int,int,int,
+	int,int,int,int,int,int,int,int,int,int);
+  ~Remap();
+  void perform(double *, double *, double *);
+
+ private:
+  struct remap_plan_3d *plan;
+};
+
+#endif
--- a/src/KSPACE/style_kspace.h
+++ b/src/KSPACE/style_kspace.h
@ -0,0 +1,38 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef KSpaceInclude
+#include "ewald.h"
+#include "pppm.h"
+#include "pppm_tip4p.h"
+#endif
+
+#ifdef KSpaceClass
+KSpaceStyle(ewald,Ewald)
+KSpaceStyle(pppm,PPPM)
+KSpaceStyle(pppm/tip4p,PPPMTIP4P)
+#endif
+
+#ifdef PairInclude
+#include "pair_buck_coul_long.h"
+#include "pair_lj_cut_coul_long.h"
+#include "pair_lj_cut_coul_long_tip4p.h"
+#include "pair_lj_charmm_coul_long.h"
+#endif
+
+#ifdef PairClass
+PairStyle(buck/coul/long,PairBuckCoulLong)
+PairStyle(lj/cut/coul/long,PairLJCutCoulLong)
+PairStyle(lj/cut/coul/long/tip4p,PairLJCutCoulLongTIP4P)
+PairStyle(lj/charmm/coul/long,PairLJCharmmCoulLong)
+#endif
--- a/src/MAKE/Makefile.altix
+++ b/src/MAKE/Makefile.altix
@ -0,0 +1,36 @@
+# altix = SGI Altix, Intel icc, MPI, FFTs from SGI SCSL library
+
+SHELL = /bin/sh
+#.IGNORE:
+
+# System-specific settings
+
+CC =		icc
+CCFLAGS =	-O2 -DFFT_SCSL -w
+DEPFLAGS =	-M
+# one user needed icpc to link
+LINK =		icc
+LINKFLAGS =	-O2
+USRLIB =	
+SYSLIB =	-lmpi -lscs_mp
+SIZE =		size
+
+# Link rule
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+include $(DEPENDS)
+
--- a/src/MAKE/Makefile.bgl
+++ b/src/MAKE/Makefile.bgl
@ -0,0 +1,42 @@
+# bgl = LLNL Blue Gene Light machine, xlC, native MPI, FFTW
+
+SHELL = /bin/sh
+.SUFFIXES: .cpp .u 
+.IGNORE:
+
+# System-specific settings
+
+CC =	        /opt/ibmcmp/vacpp/7.0/bin/blrts_xlC \
+               -I/bgl/BlueLight/ppcfloor/bglsys/include \
+               -I/bgl/local/bglfftwgel-2.1.5.pre5/include 
+CCFLAGS =       -O3 -DFFT_FFTW -DMPICH_IGNORE_CXX_SEEK
+DEPFLAGS =	-M
+LINK =	        /opt/ibmcmp/vacpp/7.0/bin/blrts_xlC
+LINKFLAGS =	-O3 -L/bgl/BlueLight/ppcfloor/bglsys/lib \
+                -L/opt/ibmcmp/xlf/9.1/blrts_lib \
+                -L/opt/ibmcmp/vacpp/7.0/blrts_lib \
+                -L/bgl/local/lib \
+                -L/bgl/local/bglfftwgel-2.1.5.pre5/lib
+USRLIB =	-lxlopt -lxlomp_ser -lxl -lxlfmath -lm -lfftw \
+                -lmpich.rts -lmsglayer.rts -lrts.rts -ldevices.rts -lmassv
+SYSLIB =
+SIZE =		size
+
+# Link rule
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<
+
+%.u:%.cpp
+	$(CC) $(CCFLAGS) $(DEPFLAGS) -c $< 
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.u)
+include $(DEPENDS)
--- a/src/MAKE/Makefile.cheetah
+++ b/src/MAKE/Makefile.cheetah
@ -0,0 +1,48 @@
+# cheetah = ORNL IBM machine, mpCC, native MPI, FFTW
+
+SHELL = /bin/sh
+.SUFFIXES: .cpp .u
+.IGNORE:
+
+# System-specific settings
+
+CC =		mpCC_r
+CCFLAGS =	-O4 -qnoipa -I/usr/apps/include -DFFT_FFTW
+DEPFLAGS =	-M
+LINK =		mpCC_r
+LINKFLAGS =	-O -L/usr/apps/lib
+USRLIB =	-lfftw
+SYSLIB =	-lm
+SIZE =		size
+
+# Link rule
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# --------- old section -------------
+
+# Compilation rules
+
+#.cpp.o:
+#	$(CC) $(CCFLAGS) -c $<
+
+# Individual dependencies
+
+#$(OBJ):	     $(INC)
+
+# --------- new section -------------
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<
+
+%.u:%.cpp
+	$(CC) $(CCFLAGS) $(DEPFLAGS) -c $<
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.u)
+include $(DEPENDS)
--- a/src/MAKE/Makefile.crockett
+++ b/src/MAKE/Makefile.crockett
@ -0,0 +1,30 @@
+# crockett = RedHat Linux box, mpiCC, LAM MPI, no FFTs
+
+SHELL = /bin/sh
+#.IGNORE:
+
+# System-specific settings
+
+CC =		mpiCC
+CCFLAGS =	-g -O -DFFT_NONE -DGZIP
+DEPFLAGS =	-M
+LINK =		mpiCC
+LINKFLAGS =	-g -O
+USRLIB =
+SYSLIB =	
+SIZE =		size
+
+# Link rule
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Compilation rules
+
+.cpp.o:
+	$(CC) $(CCFLAGS) -c $<
+
+# Individual dependencies
+
+$(OBJ):	     $(INC)
--- a/src/MAKE/Makefile.cygwin
+++ b/src/MAKE/Makefile.cygwin
@ -0,0 +1,40 @@
+# cygwin = RedHat Linux box, g++, no MPI, no FFTs
+
+SHELL = /bin/sh
+
+# System-specific settings
+
+CC =		g++
+CCFLAGS =	-O -I../STUBS -DFFT_NONE
+DEPFLAGS =	-M
+LINK =		g++
+LINKFLAGS =	-O -L../STUBS
+USRLIB =	-lmpi
+SYSLIB = 
+ARCHIVE =	ar
+ARFLAGS =	-rc
+SIZE =		size
+
+# Link target
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE).exe
+
+# Library target
+
+lib:	$(OBJ)
+	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+include $(DEPENDS)
--- a/src/MAKE/Makefile.debian
+++ b/src/MAKE/Makefile.debian
@ -0,0 +1,41 @@
+# debian = Debian, g++, MPICH, FFTW
+
+SHELL = /bin/sh
+#.IGNORE:
+
+# System-specific settings
+
+CC =		g++
+CCFLAGS =	-g -O -I/usr/lib/mpich/include/ -DFFT_FFTW -DGZIP
+DEPFLAGS =	-M
+LINK =		g++
+LINKFLAGS =	-g -O 	-L/usr/lib/mpich/lib	
+USRLIB =	-lfftw -lmpich
+SYSLIB =
+ARCHIVE =	ar
+ARFLAGS =	-rc
+SIZE =		size
+
+# Link target
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Library target
+
+lib:	$(OBJ)
+	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+include $(DEPENDS)
--- a/src/MAKE/Makefile.diesel
+++ b/src/MAKE/Makefile.diesel
@ -0,0 +1,39 @@
+# diesel = SGI Origin 350, 64-bit, SGI MIPSpro CC, SGI MPT, SGI SCSL MP FFTs
+
+SHELL = /bin/sh
+#.IGNORE:
+
+# System-specific settings
+
+CC =		CC
+CCFLAGS =	-64 -O -mp -DFFT_SCSL
+DEPFLAGS =	-M
+LINK =		CC
+LINKFLAGS =	-64
+USRLIB =	
+SYSLIB =	-lm -lscs_mp -lmpi -lmpi++
+SIZE =		size
+
+# Link rule
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Library target
+
+lib:	$(OBJ)
+	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+include $(DEPENDS)
--- a/src/MAKE/Makefile.fink
+++ b/src/MAKE/Makefile.fink
@ -0,0 +1,35 @@
+# fink = Mac OS-X w/ fink installed libraries, c++, no MPI, FFTW 2.1.5
+
+SHELL = /bin/sh
+
+# System-specific settings
+
+CC =		c++
+CCFLAGS =	-O -I../STUBS -I/sw/include -DFFT_FFTW
+DEPFLAGS =	-M
+LINK =		c++
+LINKFLAGS =	-O -L../STUBS -L/sw/lib
+USRLIB =	-lfftw -lmpi
+SYSLIB = 
+SIZE =		size
+
+# Link rule
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Compilation rules
+
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+include $(DEPENDS)
+
--- a/src/MAKE/Makefile.g++
+++ b/src/MAKE/Makefile.g++
@ -0,0 +1,43 @@
+# g++ = RedHat Linux box, g++, MPICH, FFTW
+
+SHELL = /bin/sh
+#.IGNORE:
+
+# System-specific settings
+
+CC =		g++
+CCFLAGS =	-g -O -I/home/sjplimp/tools/mpich/include \
+		-I/home/sjplimp/tools/fftw/include -DFFT_FFTW -DGZIP
+DEPFLAGS =	-M
+LINK =		g++
+LINKFLAGS =	-g -O -L/home/sjplimp/tools/mpich/lib \
+		-L/home/sjplimp/tools/fftw/lib
+USRLIB =	-lfftw -lmpich
+SYSLIB =
+ARCHIVE =	ar
+ARFLAGS =	-rc
+SIZE =		size
+
+# Link target
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Library target
+
+lib:	$(OBJ)
+	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+include $(DEPENDS)
--- a/src/MAKE/Makefile.g++_poems
+++ b/src/MAKE/Makefile.g++_poems
@ -0,0 +1,45 @@
+# g++_poems = RedHat Linux box, g++, MPICH, FFTW, POEMS library
+
+SHELL = /bin/sh
+#.IGNORE:
+
+# System-specific settings
+
+CC =		g++
+CCFLAGS =	-g -O -I/home/sjplimp/tools/mpich/include \
+		-I/home/sjplimp/lammps/lib/poems \
+		-I/home/sjplimp/tools/fftw/include -DFFT_FFTW -DGZIP
+DEPFLAGS =	-M
+LINK =		g++
+LINKFLAGS =	-g -O -L/home/sjplimp/tools/mpich/lib \
+		-L/home/sjplimp/lammps/lib/poems \
+		-L/home/sjplimp/tools/fftw/lib
+USRLIB =	-lfftw -lmpich -lpoems
+SYSLIB =
+ARCHIVE =	ar
+ARFLAGS =	-rc
+SIZE =		size
+
+# Link target
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Library target
+
+lib:	$(OBJ)
+	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+include $(DEPENDS)
--- a/src/MAKE/Makefile.liberty
+++ b/src/MAKE/Makefile.liberty
@ -0,0 +1,36 @@
+# liberty = HP cluster with dual 3.0 GHz Xeons, mpiCC, native MPI, FFTW
+
+SHELL = /bin/sh
+.IGNORE:
+
+# System-specific settings
+
+FFTW =		/apps/libraries/fftw/icc
+
+CC =		mpiCC
+CCFLAGS =	-O -DFFT_FFTW -I${FFTW}/fftw
+DEPFLAGS =	-M
+LINK =		mpiCC
+LINKFLAGS =	-O -L${FFTW}/fftw/.libs
+USRLIB =	-lfftw
+SYSLIB =	-lstdc++ -lm
+SIZE =		size
+
+# Link rule
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+include $(DEPENDS)
--- a/src/MAKE/Makefile.liberty_poems
+++ b/src/MAKE/Makefile.liberty_poems
@ -0,0 +1,36 @@
+# liberty_poems = HP clust w/ dual Xeons, mpiCC, native MPI, FFTW, POEMS lib
+
+SHELL = /bin/sh
+.IGNORE:
+
+# System-specific settings
+
+FFTW =		/apps/libraries/fftw/icc
+
+CC =		mpiCC
+CCFLAGS =	-O -DFFT_FFTW -I${FFTW}/fftw -I/home/sjplimp/lammps/lib/poems
+DEPFLAGS =	-M
+LINK =		mpiCC
+LINKFLAGS =	-O -L${FFTW}/fftw/.libs -L/home/sjplimp/lammps/lib/poems
+USRLIB =	-lfftw -lpoems
+SYSLIB =	-lstdc++ -lm
+SIZE =		size
+
+# Link rule
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+include $(DEPENDS)
--- a/src/MAKE/Makefile.linux
+++ b/src/MAKE/Makefile.linux
@ -0,0 +1,43 @@
+# linux = RedHat Linux box, Intel icc, MPICH, FFTW
+
+SHELL = /bin/sh
+#.IGNORE:
+
+# System-specific settings
+
+CC =		icc
+CCFLAGS =	-O -I/home/sjplimp/tools/mpich/include \
+		-I/home/sjplimp/tools/fftw/include -DFFT_FFTW -DGZIP
+DEPFLAGS =	-M
+LINK =		icc
+LINKFLAGS =	-O -L/home/sjplimp/tools/mpich/lib \
+		-L/home/sjplimp/tools/fftw/lib
+USRLIB =	-lfftw -lmpich
+SYSLIB =	-lcxa -lunwind -lstdc++
+ARCHIVE =	ar
+ARFLAGS =	-rc
+SIZE =		size
+
+# Link target
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Library target
+
+lib:	$(OBJ)
+	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+include $(DEPENDS)
--- a/src/MAKE/Makefile.linux_poems
+++ b/src/MAKE/Makefile.linux_poems
@ -0,0 +1,45 @@
+# linux_poems = RedHat Linux box, Intel icc, MPICH, FFTW, POEMS library
+
+SHELL = /bin/sh
+#.IGNORE:
+
+# System-specific settings
+
+CC =		icc
+CCFLAGS =	-O -I/home/sjplimp/tools/mpich/include \
+		-I/home/sjplimp/lammps/lib/poems \
+		-I/home/sjplimp/tools/fftw/include -DFFT_FFTW -DGZIP
+DEPFLAGS =	-M
+LINK =		icc
+LINKFLAGS =	-O -L/home/sjplimp/tools/mpich/lib \
+		-L/home/sjplimp/lammps/lib/poems \
+		-L/home/sjplimp/tools/fftw/lib
+USRLIB =	-lfftw -lmpich -lpoems
+SYSLIB =	-lstdc++ -lcxa -lunwind
+ARCHIVE =	ar
+ARFLAGS =	-rc
+SIZE =		size
+
+# Link target
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Library target
+
+lib:	$(OBJ)
+	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+include $(DEPENDS)
--- a/src/MAKE/Makefile.mac
+++ b/src/MAKE/Makefile.mac
@ -0,0 +1,34 @@
+# mac = Apple PowerBook G4 laptop, c++, no MPI, FFTW 2.1.5
+
+SHELL = /bin/sh
+
+# System-specific settings
+
+CC =		c++
+CCFLAGS =	-O -I../STUBS -I/Users/sjplimp/tools/fftw/include -DFFT_FFTW
+DEPFLAGS =	-M
+LINK =		c++
+LINKFLAGS =	-O -L../STUBS -L/Users/sjplimp/tools/fftw/lib
+USRLIB =	-lfftw -lmpi
+SYSLIB = 
+SIZE =		size
+
+# Link rule
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Compilation rules
+
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+include $(DEPENDS)
--- a/src/MAKE/Makefile.odin
+++ b/src/MAKE/Makefile.odin
@ -0,0 +1,41 @@
+# odin = 1400 cluster, g++, MPICH, no FFTs
+
+SHELL = /bin/sh
+#.IGNORE:
+
+# System-specific settings
+
+CC =		g++
+CCFLAGS =	-O -I/opt/mpich-mx/include -DFFT_NONE -DGZIP
+DEPFLAGS =	-M
+LINK =		g++
+LINKFLAGS =	-O -L/opt/mpich-mx/lib -L/opt/mx/lib
+USRLIB =	-lmpich -lmyriexpress
+SYSLIB =
+ARCHIVE =	ar
+ARFLAGS =	-rc
+SIZE =		size
+
+# Link target
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Library target
+
+lib:	$(OBJ)
+	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+include $(DEPENDS)
--- a/src/MAKE/Makefile.ross
+++ b/src/MAKE/Makefile.ross
@ -0,0 +1,34 @@
+# ross = CPlant cluster (compile on taylor), c++, native MPI, DEC FFTs
+
+SHELL = /bin/sh
+.IGNORE:
+
+# System-specific settings
+
+CC =		/usr/local/cplant/ross/current/bin/c++
+CCFLAGS =	-O -DFFT_DEC
+DEPFLAGS =	-M
+LINK =		/usr/local/cplant/ross/current/bin/c++
+LINKFLAGS =	-O
+USRLIB =	-lmpi -lcxml
+SYSLIB =	-lm
+SIZE =		size
+
+# Link rule
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+include $(DEPENDS)
--- a/src/MAKE/Makefile.seaborg
+++ b/src/MAKE/Makefile.seaborg
@ -0,0 +1,49 @@
+# seaborg = NERSC IBM machine, mpCC, native MPI, FFTW
+
+SHELL = /bin/sh
+.SUFFIXES: .cpp .u
+.IGNORE:
+
+# System-specific settings
+
+CC =		mpCC_r
+CCFLAGS =	-O2 -qnoipa -I/usr/common/usg/fftw/2.1.5/include -DFFT_FFTW  
+DEPFLAGS =	-M
+LINK =		mpCC_r
+LINKFLAGS =	-O -L/usr/lib -L/usr/common/usg/fftw/2.1.5/lib
+USRLIB =	-lfftw -lfftw_mpi
+SYSLIB =	-lm
+SIZE =		size
+
+# Link rule
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# --------- old section -------------
+
+# Compilation rules
+
+#.cpp.o:
+#	$(CC) $(CCFLAGS) -c $<
+
+# Individual dependencies
+
+#$(OBJ):	     $(INC)
+
+# --------- new section -------------
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<
+
+%.u:%.cpp
+	$(CC) $(CCFLAGS) $(DEPFLAGS) -c $<
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.u)
+include $(DEPENDS)
+
--- a/src/MAKE/Makefile.serial
+++ b/src/MAKE/Makefile.serial
@ -0,0 +1,40 @@
+# serial = RedHat Linux box, g++, no MPI, no FFTs
+
+SHELL = /bin/sh
+
+# System-specific settings
+
+CC =		g++
+CCFLAGS =	-O -I../STUBS -DFFT_NONE
+DEPFLAGS =	-M
+LINK =		g++
+LINKFLAGS =	-O -L../STUBS
+USRLIB =	-lmpi
+SYSLIB = 
+ARCHIVE =	ar
+ARFLAGS =	-rc
+SIZE =		size
+
+# Link target
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Library target
+
+lib:	$(OBJ)
+	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+include $(DEPENDS)
--- a/src/MAKE/Makefile.spirit
+++ b/src/MAKE/Makefile.spirit
@ -0,0 +1,36 @@
+# spirit = HP cluster with dual 3.4 GHz EM64T (64 bit), mpiCC, native MPI, FFTW
+
+SHELL = /bin/sh
+.IGNORE:
+
+# System-specific settings
+
+FFTW =		/apps/libraries/fftw/nwcc
+
+CC =		mpiCC
+CCFLAGS =	-O -DFFT_FFTW -I${FFTW}/include
+DEPFLAGS =	-M
+LINK =		mpiCC
+LINKFLAGS =	-O -L${FFTW}/lib
+USRLIB =	-lfftw -lstdc++
+SYSLIB =	-lm
+SIZE =		size
+
+# Link rule
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+include $(DEPENDS)
--- a/src/MAKE/Makefile.squall
+++ b/src/MAKE/Makefile.squall
@ -0,0 +1,34 @@
+# squall = Red Squall (compile on reddish), pgCC, native MPI, FFTW
+
+SHELL = /bin/sh
+
+# System-specific settings
+
+CC =		mpiCC
+CCFLAGS =	-fastsse -tp k8-64 -DGZIP -DFFT_FFTW \
+		-I/home/sjplimp/tools/fftw/include
+DEPFLAGS =	-M
+LINK =		mpiCC
+LINKFLAGS =	-O -L/home/sjplimp/tools/fftw/lib
+USRLIB =	-lfftw -lmpi
+SYSLIB =	
+SIZE =		size
+
+# Link rule
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+include $(DEPENDS)
--- a/src/MAKE/Makefile.storm
+++ b/src/MAKE/Makefile.storm
@ -0,0 +1,37 @@
+# storm = Cray Red Storm, Cray mpicxx, native MPI, FFTW
+
+SHELL = /bin/sh
+.SUFFIXES: .cpp .d
+.IGNORE:
+
+# System-specific settings
+
+CC =		CC
+CCFLAGS =	-fastsse -DFFT_FFTW -DMPICH_IGNORE_CXX_SEEK \
+		-I/projects/fftw/fftw-2.1.5/include
+DEPFLAGS =	-M
+LINK =		CC
+LINKFLAGS =	-O -L/projects/fftw/fftw-2.1.5/lib
+USRLIB =	-lfftw
+SYSLIB =
+SIZE =		size
+
+# Link rule
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Library target
+
+lib:	$(OBJ)
+	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
+
+# Compilation rules
+
+.cpp.o:
+	$(CC) $(CCFLAGS) -c $<
+
+# Individual dependencies
+
+$(OBJ):	     $(INC)
--- a/src/MAKE/Makefile.storm_poems
+++ b/src/MAKE/Makefile.storm_poems
@ -0,0 +1,39 @@
+# storm_poems = Cray Red Storm, Cray mpicxx, native MPI, FFTW, POEMS library
+
+SHELL = /bin/sh
+.SUFFIXES: .cpp .d
+.IGNORE:
+
+# System-specific settings
+
+CC =		CC
+CCFLAGS =	-fastsse -DFFT_FFTW -DMPICH_IGNORE_CXX_SEEK \
+		-I/projects/fftw/fftw-2.1.5/include \
+		-I/home/sjplimp/lammps/lib/poems
+DEPFLAGS =	-M
+LINK =		CC
+LINKFLAGS =	-O -L/projects/fftw/fftw-2.1.5/lib \
+		-L/home/sjplimp/lammps/lib/poems
+USRLIB =	-lfftw -lpoems
+SYSLIB =
+SIZE =		size
+
+# Link rule
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Library target
+
+lib:	$(OBJ)
+	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
+
+# Compilation rules
+
+.cpp.o:
+	$(CC) $(CCFLAGS) -c $<
+
+# Individual dependencies
+
+$(OBJ):	     $(INC)
--- a/src/MAKE/Makefile.tbird
+++ b/src/MAKE/Makefile.tbird
@ -0,0 +1,36 @@
+# tbird = Dell cluster with dual 3.6 GHz Xeons, Intel mpicxx, native MPI, FFTW
+
+SHELL = /bin/sh
+.IGNORE:
+
+# System-specific settings
+
+FFTW =		/apps/libraries/fftw/nwcc
+
+CC =		mpicxx
+CCFLAGS =	-O -DFFT_FFTW -I${FFTW}/include
+DEPFLAGS =	-M
+LINK =		mpicxx
+LINKFLAGS =	-O -L${FFTW}/lib
+USRLIB =	-lfftw -lstdc++
+SYSLIB =	-lm
+SIZE =		size
+
+# Link rule
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+include $(DEPENDS)
--- a/src/MAKE/Makefile.tesla
+++ b/src/MAKE/Makefile.tesla
@ -0,0 +1,35 @@
+# tesla = 16-proc SGI Onyx3, g++, no MPI, SGI FFTs
+
+SHELL = /bin/sh
+#.IGNORE:
+
+# System-specific settings
+
+CC =		g++
+CCFLAGS =	-O -I../STUBS -DFFT_SGI
+DEPFLAGS =	-M
+LINK =		g++
+LINKFLAGS =	-O -L../STUBS
+USRLIB =	-lmpi
+SYSLIB =	-lm -lcomplib.sgimath
+SIZE =		size
+
+# Link rule
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+include $(DEPENDS)
+
--- a/src/MAKE/Makefile.tflop
+++ b/src/MAKE/Makefile.tflop
@ -0,0 +1,31 @@
+# tflop = Intel Tflops (compile on sasn100), ciCC, native MPI, Intel FFTs
+
+SHELL = /bin/sh
+.SUFFIXES: .cpp .d
+.IGNORE:
+
+# System-specific settings
+
+CC =		ciCC
+CCFLAGS =	-O4 -Knoieee -DFFT_INTEL
+DEPFLAGS =	-M
+LINK =		ciCC
+LINKFLAGS =	-Knoieee
+USRLIB =	-lmpi -lkmath
+SYSLIB =
+SIZE =		xsize
+
+# Link rule
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Compilation rules
+
+.cpp.o:
+	$(CC) $(CCFLAGS) -c $<
+
+# Individual dependencies
+
+$(OBJ):	     $(INC)
--- a/src/MAKE/Makefile.valor
+++ b/src/MAKE/Makefile.valor
@ -0,0 +1,36 @@
+# valor = HP cluster with dual Xeons, mpiCC, native MPI, FFTW
+
+SHELL = /bin/sh
+.IGNORE:
+
+# System-specific settings
+
+FFTW =		/apps/libraries/fftw-2.1.5
+
+CC =		mpiCC
+CCFLAGS =	-O -DFFT_FFTW -I${FFTW}/include
+DEPFLAGS =	-M
+LINK =		mpiCC
+LINKFLAGS =	-O -L${FFTW}/lib
+USRLIB =	-lfftw -lstdc++
+SYSLIB =	-lm
+SIZE =		size
+
+# Link rule
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+include $(DEPENDS)
--- a/src/MAKE/Makefile.ydl
+++ b/src/MAKE/Makefile.ydl
@ -0,0 +1,42 @@
+# ydl = Yellow Dog Linux box, xlc -q64, MPICH, FFTW
+
+SHELL = /bin/sh
+#.IGNORE:
+
+# System-specific settings
+
+CC =		xlc -q64
+CCFLAGS =	-g -O -I/opt/mpich/include  \
+		-I/usr/local/include -L/opt/mpich/lib64 \
+		-DFFT_FFTW -DGZIP
+DEPFLAGS =	-M
+LINK =		xlc -q64
+LINKFLAGS =	-g -O -L/opt/mpich/lib64 \
+		-L/usr/local/lib -lstdc++ -lc
+USRLIB =	-lfftw -lmpich
+SYSLIB =
+SIZE =		size
+
+# Link rule
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(OBJ) $(USRLIB) $(SYSLIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Library target
+
+lib:	$(OBJ)
+	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+include $(DEPENDS)
--- a/src/MAKE/Windows/erfc.cpp
+++ b/src/MAKE/Windows/erfc.cpp
@ -0,0 +1,51 @@
+//This code was written by Philip Nicoletti
+//http://www.codeguru.com/forum/archive/index.php/t-129990.html
+//
+//Modified by Jin Ma, Oklahoma State University for LAMMPS
+//erfc() is defined in GNU libraries. This code is a simplified
+//version for implementation with Visual C++.
+//
+//Warning: these functions are not fully tested.
+//
+#include "erfc.h"
+#include "math.h"
+
+double erf(double x)
+{
+    //
+    // Computation of the error function erf(x).
+    //
+    return (1-erfc(x));
+}
+
+//
+//
+double erfc(double x)
+{
+    //
+    // Computation of the complementary error function erfc(x).
+    //
+    // The algorithm is based on a Chebyshev fit as denoted in
+    // Numerical Recipes 2nd ed. on p. 214 (W.H.Press et al.).
+    //
+    // The fractional error is always less than 1.2e-7.
+    //
+    //
+    // The parameters of the Chebyshev fit
+    //
+    const double a1 = -1.26551223, a2 = 1.00002368,
+    a3 = 0.37409196, a4 = 0.09678418,
+    a5 = -0.18628806, a6 = 0.27886807,
+    a7 = -1.13520398, a8 = 1.48851587,
+    a9 = -0.82215223, a10 = 0.17087277;
+    //
+    double v = 1; // The return value
+    double z = fabs(x);
+    //
+    if (z == 0) return v; // erfc(0)=1
+    double t = 1/(1+0.5*z);
+    v = t*exp((-z*z) +a1+t*(a2+t*(a3+t*(a4+t*(a5+t*(a6+
+				t*(a7+t*(a8+t*(a9+t*a10)))))))));
+    if (x < 0) v = 2-v;	  // erfc(-x)=2-erfc(x)
+    return v;
+}
--- a/src/MAKE/Windows/erfc.h
+++ b/src/MAKE/Windows/erfc.h
@ -0,0 +1,5 @@
+//
+
+double erf(double x);
+
+double erfc(double x);
--- a/src/MAKE/Windows/notes.1
+++ b/src/MAKE/Windows/notes.1
@ -0,0 +1,52 @@
+Compiling LAMMPS under MS Windows:
+
+Tips from Jin Ma at Oklahoma State Univerisity
+jin.ma@okstate.edu
+November 20, 2004
+
+compiled without MPI and FFT in Viusal C++ 6.0
+
+-------------------
+
+0. Create an empty workspace (Win32 console), add all .h and .cpp
+files into the project.
+
+1. At about 80 places in the code, variables are redefined.  Most of
+these variables are loop counters, which can be easily fixed.
+
+Code looks like this:
+
+   for (int i=0; i<5; i++) {
+	something;
+   }
+   for (int i=0; i<5; i++) {
+	something else;
+   }
+
+This is ok with g++ compiler. But VC thinks the i is redefined in the
+second loop. So the variable scope is different. This happens many times
+in the code. It can be fixed easily based on the compiling error.
+
+2. At the beginning of fft3d.h, added:
+#ifndef FFT_NONE
+#define FFT_NONE
+#endif
+
+3. In input.cpp, changed the two header files
+//#include "unistd.h"  
+#include "direct.h" 
+
+4. Added mpi.h and mpi.cpp (in STUBS folder) to the workspace
+In mpi.cpp, commented the time.h header file
+//#include <sys/time.h>	
+commented the original code in MPI_Wtime(), just make it return 0;
+
+5. In system.cpp, two changes due to difference in the input argument
+list
+Line 82: int iarg = 2;
+Line 171: 	inflag=1;	
+
+The number of input arguments (nargs) is different in g++ and VC when
+you give arguments to run a program. This might be related to MPI as
+well. The difference is one. Once the above changes are made, the
+program is taking the correct argument.
--- a/src/MAKE/Windows/notes.2
+++ b/src/MAKE/Windows/notes.2
@ -0,0 +1,128 @@
+/*
+//This is instruction for the modification of LAMMPS for MS Windows
+//LAMMPS version: Jan 2005
+//
+
+compiled without MPI and FFT in Viusal C++ 6.0
+(All packages except for XTC appear to work.)
+-------------------
+
+1. Create an empty workspace (Win32 console), add all .h and .cpp
+files into the project.
+
+2. At about 80 places in the code, variables are redefined.  Most of
+these variables are loop counters, which can be easily fixed.
+
+Code looks like this:
+
+   for (int i=0; i<5; i++) {
+	something;
+   }
+   for (int i=0; i<5; i++) {
+	something else;
+   }
+
+This is ok with g++ compiler. But VC thinks the i is redefined in the
+second loop. So the variable scope is different. This happens many times
+in the code. It can be fixed easily based on the compiling error.
+
+3. At the beginning of fft3d.h, added:
+#ifndef FFT_NONE
+#define FFT_NONE
+#endif
+
+4. In input.cpp, changed the two header files
+//#include "unistd.h"  
+#include "direct.h" 
+
+4A. (added by Tim Lau, MIT, ttl@mit.edu)
+
+In variable.cpp, change the header files
+//#include "unistd.h"
+#include "direct.h"
+#include "windows.h"
+
+Change usleep(100000) to Sleep(100)
+
+Note that the value is divided by 1000 since usleep takes in 
+microseconds while Sleep takes in milliseconds.
+
+4B. (added by Tim Lau, MIT, ttl@mit.edu)
+
+In shell.cpp, change the header file:
+//#include "unistd.h"
+#include "direct.h"
+
+Change the line in shell.cpp:
+mkdir(arg[i], S_IRWXU | S_IRGRP | S_IXGRP);
+to:
+mkdir(arg[i]);
+since Windows obviously does not use UNIX file permissions.
+
+It's also possible that the line has to be changed to:
+_mkdir(arg[i]);
+depending on the version of the Visual C++ compiler used.
+
+5. Added mpi.h and mpi.cpp (in STUBS folder) to the workspace
+In mpi.cpp, commented the time.h header file
+//#include <sys/time.h>	
+commented the original code in MPI_Wtime(), just make it return 0;
+
+6. In system.cpp, two changes due to difference in the input argument
+list
+
+Line 83:	int iarg = 2;
+Line 172: 	inflag=1;	//add this line
+
+The number of input arguments (nargs) is different in g++ and VC when
+you give arguments to run a program. This might be related to MPI as
+well. The difference is one. Once the above changes are made, the
+program is taking the correct argument.
+
+However, it has been observed in the latest versions of sytem.cpp that
+no modification needs be made to the file as distributed from the
+LAMMPS website to work. The user however, instead of starting LAMMPS
+by the command:
+
+lammps in.file
+
+as he would if he implemented the changes detailed here, would launch
+in the Unix style:
+
+lammps < in.file
+
+7. The new version LAMMPS calls the error function:
+   double erfc(double)
+   This function is in the GNU C library. However, it's not found for
+   VC++. 
+   Three options: 
+   a. One can try to find erfc() from other libraries.
+   b. The erfc() is called for pair_modify table option. One can set 
+   the table option to be 0 to avoid calling this function.
+   c. Write your own functions.
+
+   In this code, two files erfc.h, erfc.cpp are created and added to the project.
+   Files that call erfc() all add 
+	#include "erfc.h" at the beginning.
+   Note: the functions are not fully tested, use with caution. 
+
+8. MSVC does not have a inttypes.h file. The simplest way 
+   to deal with this problem is to download inttypes.h from the 
+   following site: 
+   http://www.koders.com/c/fidDE7D6EFFD475FAB1B7F6A2BBA791401CFA88FFA3.aspx 
+   and add this file into the workspace.
+
+9. MSVC does not have dirent.h. The problem is solved by downloading 
+a version of it for Windows from the following website:
+
+http://www.softagalleria.net/dirent/index.en.html
+
+10. Build the project. Specify appropriate input file to run the code. 
+   The Windows result might be different from Unix results. Be Cautious.
+
+---------------------------------------------------------
+  Jin Ma
+  Email: jin.ma@okstate.edu
+  Oklahoma State University
+  March 7, 2005
+---------------------------------------------------------
--- a/src/MAKE/Windows/notes.3
+++ b/src/MAKE/Windows/notes.3
@ -0,0 +1,33 @@
+Using MPI and FFTW with LAMMPS under Windows
+from Timothy Lau <ttl@MIT.EDU>
+(the referenced step #'s refer to the notes.2 document)
+
+-------
+
+If the user would like to use FFT with LAMMPS, he can download the source code
+for FFTW 2.1.5 and dump all the files into the same directory as LAMMPS. Then
+he can add to the project all the .c and .h files of FFTW as though those were
+LAMMPS files.  Instead of following step 3 of the instructions, however, the
+following should be added to fft3d.h:
+
+#ifndef FFT_FFTW
+#define FFT_FFTW
+#endif
+
+The user must take care to check for a Visual Studio compile that the "WIN32"
+variable is defined although it is likely that Visual Studio would
+automatically define this. Refer to line 137 of fftw.h that comes with FFTW
+2.1.5.
+
+If the user would like to use MPI with his Microsoft Visual Studio compile for
+use on a multicore processor or for use on a Windows cluster, it has been
+observed that MPICH 2 (at least the IA32 version) is known to compile with
+LAMMPS in Visual Studio. Instead of following step 5 of the instructions, the
+user could add the MPICH2\include as an additional include directory for MSVS
+to find "mpi.h" and also add the MPICH2\lib as an additional link directory. He
+should add mpi.lib to be specifically linked to.
+
+-------
+
+To compile LAMMPS with MPI-2 (e.g. MPICH 2) on Windows, you need
+to use the MPICH_IGNORE_CXX_SEEK preprocessor definition.
--- a/src/MANYBODY/Install.csh
+++ b/src/MANYBODY/Install.csh
@ -0,0 +1,30 @@
+# Install/unInstall package classes in LAMMPS
+
+# pair_eam.h must always be in src
+
+if ($1 == 1) then
+
+  cp style_manybody.h ..
+
+  cp pair_eam.cpp ..
+  cp pair_eam_alloy.cpp ..
+  cp pair_eam_fs.cpp ..
+
+#  cp pair_eam.h ..
+  cp pair_eam_alloy.h ..
+  cp pair_eam_fs.h ..
+
+else if ($1 == 0) then
+
+  rm ../style_manybody.h
+  touch ../style_manybody.h
+
+  rm ../pair_eam.cpp
+  rm ../pair_eam_alloy.cpp
+  rm ../pair_eam_fs.cpp
+
+#  rm ../pair_eam.h
+  rm ../pair_eam_alloy.h
+  rm ../pair_eam_fs.h
+
+endif
--- a/src/MANYBODY/pair_eam.cpp
+++ b/src/MANYBODY/pair_eam.cpp
@ -0,0 +1,927 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Stephen Foiles (SNL), Murray Daw (SNL)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "pair_eam.h"
+#include "atom.h"
+#include "force.h"
+#include "update.h"
+#include "comm.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "error.h"
+
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+
+#define MAXLINE 1024
+
+/* ---------------------------------------------------------------------- */
+
+PairEAM::PairEAM()
+{
+  nmax = 0;
+  rho = NULL;
+  fp = NULL;
+  
+  ntables = 0;
+  tables = NULL;
+  frho = NULL;
+  frho_0 = NULL;
+
+  // set rhor to NULL so memory deallocation will work
+  // even from derived classes that don't use rhor
+
+  rhor = NULL;
+}
+
+/* ----------------------------------------------------------------------
+   free all arrays
+   check if allocated, since class can be destructed when incomplete
+------------------------------------------------------------------------- */
+
+PairEAM::~PairEAM()
+{
+  memory->sfree(rho);
+  memory->sfree(fp);
+
+  if (allocated) {
+    memory->destroy_2d_int_array(setflag);
+    memory->destroy_2d_double_array(cutsq);
+    memory->destroy_2d_int_array(tabindex);
+  }
+
+  for (int m = 0; m < ntables; m++) {
+    delete [] tables[m].filename;
+    delete [] tables[m].frho;
+    delete [] tables[m].rhor;
+    delete [] tables[m].zr;
+    delete [] tables[m].z2r;
+  }
+  memory->sfree(tables);
+
+  if (frho) {
+    memory->destroy_2d_double_array(frho);
+    memory->destroy_2d_double_array(rhor);
+    memory->destroy_2d_double_array(zrtmp);
+    memory->destroy_3d_double_array(z2r);
+  }
+
+  if (frho_0) interpolate_deallocate();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairEAM::compute(int eflag, int vflag)
+{
+  int i,j,k,m,numneigh,itype,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz;
+  double rsq,r,p,fforce,rhoip,rhojp,z2,z2p,recip,phi,phip,psip;
+  int *neighs;
+  double **f;
+
+  // grow energy array if necessary
+
+  if (atom->nmax > nmax) {
+    memory->sfree(rho);
+    memory->sfree(fp);
+    nmax = atom->nmax;
+    rho = (double *) memory->smalloc(nmax*sizeof(double),"eam:rho");
+    fp = (double *) memory->smalloc(nmax*sizeof(double),"eam:fp");
+  }
+
+  eng_vdwl = 0.0;
+  if (vflag) for (i = 0; i < 6; i++) virial[i] = 0.0;
+
+  if (vflag == 2) f = update->f_pair;
+  else f = atom->f;
+  double **x = atom->x;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  int newton_pair = force->newton_pair;
+
+  // zero out density
+
+  if (newton_pair) {
+    m = nlocal + atom->nghost;
+    for (i = 0; i < m; i++) rho[i] = 0.0;
+  } else for (i = 0; i < nlocal; i++) rho[i] = 0.0;
+
+  // rho = density at each atom
+  // loop over neighbors of my atoms
+
+  for (i = 0; i < nlocal; i++) {
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    neighs = neighbor->firstneigh[i];
+    numneigh = neighbor->numneigh[i];
+
+    for (k = 0; k < numneigh; k++) {
+      j = neighs[k];
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+
+      if (rsq < cutforcesq) {
+	jtype = type[j];
+	p = sqrt(rsq)*rdr + 1.0;
+	m = static_cast<int> (p);
+	m = MIN(m,nr-1);
+	p -= m;
+	p = MIN(p,1.0);
+	rho[i] += ((rhor_3[jtype][m]*p + rhor_2[jtype][m])*p + 
+		   rhor_1[jtype][m])*p + rhor_0[jtype][m];
+	if (newton_pair || j < nlocal)
+	  rho[j] += ((rhor_3[itype][m]*p + rhor_2[itype][m])*p + 
+		     rhor_1[itype][m])*p + rhor_0[itype][m];
+      }
+    }
+  }
+
+  // communicate and sum densities
+
+  if (newton_pair) comm->reverse_comm_pair(this);
+
+  // fp = derivative of embedding energy at each atom
+  // phi = embedding energy at each atom
+
+  for (i = 0; i < nlocal; i++) {
+    itype = type[i];
+    p = rho[i]*rdrho + 1.0;
+    m = static_cast<int> (p);
+    m = MAX(1,MIN(m,nrho-1));
+    p -= m;
+    p = MIN(p,1.0);
+    fp[i] = (frho_6[itype][m]*p + frho_5[itype][m])*p + frho_4[itype][m];
+    if (eflag) {
+      phi = ((frho_3[itype][m]*p + frho_2[itype][m])*p + 
+	     frho_1[itype][m])*p + frho_0[itype][m];
+      eng_vdwl += phi;
+    }
+  }
+
+  // communicate derivative of embedding function
+
+  comm->comm_pair(this);
+
+  // compute forces on each atom
+  // loop over neighbors of my atoms
+
+  for (i = 0; i < nlocal; i++) {
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    neighs = neighbor->firstneigh[i];
+    numneigh = neighbor->numneigh[i];
+
+    for (k = 0; k < numneigh; k++) {
+      j = neighs[k];
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+
+      if (rsq < cutforcesq) {
+	jtype = type[j];
+	r = sqrt(rsq);
+	p = r*rdr + 1.0;
+	m = static_cast<int> (p);
+	m = MIN(m,nr-1);
+	p -= m;
+	p = MIN(p,1.0);
+
+	// rhoip = derivative of (density at atom j due to atom i)
+	// rhojp = derivative of (density at atom i due to atom j)
+	// phi = pair potential energy
+	// phip = phi'
+	// z2 = phi * r
+	// z2p = (phi * r)' = (phi' r) + phi
+	// psip needs both fp[i] and fp[j] terms since r_ij appears in two
+	//   terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji)
+	//   hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip
+
+	rhoip = (rhor_6[itype][m]*p + rhor_5[itype][m])*p + 
+	  rhor_4[itype][m];
+	rhojp = (rhor_6[jtype][m]*p + rhor_5[jtype][m])*p + 
+	  rhor_4[jtype][m];
+	z2 = ((z2r_3[itype][jtype][m]*p + z2r_2[itype][jtype][m])*p + 
+	      z2r_1[itype][jtype][m])*p + z2r_0[itype][jtype][m];
+	z2p = (z2r_6[itype][jtype][m]*p + z2r_5[itype][jtype][m])*p + 
+	  z2r_4[itype][jtype][m];
+
+	recip = 1.0/r;
+	phi = z2*recip;
+	phip = z2p*recip - phi*recip;
+	psip = fp[i]*rhojp + fp[j]*rhoip + phip;
+	fforce = psip*recip;
+	f[i][0] -= delx*fforce;
+	f[i][1] -= dely*fforce;
+	f[i][2] -= delz*fforce;
+	if (newton_pair || j < nlocal) {
+	  f[j][0] += delx*fforce;
+	  f[j][1] += dely*fforce;
+	  f[j][2] += delz*fforce;
+	}
+
+	if (eflag) {
+	  if (newton_pair || j < nlocal) eng_vdwl += phi;
+	  else eng_vdwl += 0.5*phi;
+	}
+
+	if (vflag == 1) {
+	  if (newton_pair || j < nlocal) {
+	    virial[0] -= delx*delx*fforce;
+	    virial[1] -= dely*dely*fforce;
+	    virial[2] -= delz*delz*fforce;
+	    virial[3] -= delx*dely*fforce;
+	    virial[4] -= delx*delz*fforce;
+	    virial[5] -= dely*delz*fforce;
+	  } else {
+	    virial[0] -= 0.5*delx*delx*fforce;
+	    virial[1] -= 0.5*dely*dely*fforce;
+	    virial[2] -= 0.5*delz*delz*fforce;
+	    virial[3] -= 0.5*delx*dely*fforce;
+	    virial[4] -= 0.5*delx*delz*fforce;
+	    virial[5] -= 0.5*dely*delz*fforce;
+	  }
+	}
+      }
+    }
+  }
+  if (vflag == 2) virial_compute();
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays
+------------------------------------------------------------------------- */
+
+void PairEAM::allocate()
+{
+  allocated = 1;
+  int n = atom->ntypes;
+
+  setflag = memory->create_2d_int_array(n+1,n+1,"pair:setflag");
+  for (int i = 1; i <= n; i++)
+    for (int j = i; j <= n; j++)
+      setflag[i][j] = 0;
+
+  cutsq = memory->create_2d_double_array(n+1,n+1,"pair:cutsq");
+  tabindex = memory->create_2d_int_array(n+1,n+1,"pair:tabindex");
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void PairEAM::settings(int narg, char **arg)
+{
+  if (narg > 0) error->all("Illegal pair_style command");
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more type pairs
+   reading multiple funcfl files defines a funcfl alloy simulation
+------------------------------------------------------------------------- */
+
+void PairEAM::coeff(int narg, char **arg)
+{
+  if (!allocated) allocate();
+
+  if (narg != 3) error->all("Incorrect args for pair coefficients");
+
+  // parse pair of atom types
+
+  int ilo,ihi,jlo,jhi;
+  force->bounds(arg[0],atom->ntypes,ilo,ihi);
+  force->bounds(arg[1],atom->ntypes,jlo,jhi);
+
+  // read funcfl file only for i,i pairs
+  // only setflag i,i will be set
+  // set mass of each atom type
+
+  int itable;
+
+  int count = 0;
+  for (int i = ilo; i <= ihi; i++) {
+    for (int j = MAX(jlo,i); j <= jhi; j++) {
+      if (i == j) {
+	itable = read_funcfl(arg[2]);
+	atom->set_mass(i,tables[itable].mass);
+	tabindex[i][i] = itable;
+	setflag[i][i] = 1;
+	count++;
+      }
+    }
+  }
+
+  if (count == 0) error->all("Incorrect args for pair coefficients");
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+double PairEAM::init_one(int i, int j)
+{
+  // only setflag I,I was set by coeff
+  // mixing will occur in init_style if both I,I and J,J were set
+
+  if (setflag[i][i] == 0 || setflag[j][j] == 0)
+    error->all("All EAM pair coeffs are not set");
+
+  // EAM has only one cutoff = max of all pairwise cutoffs
+  // determine max by checking table assigned to all type pairs
+  // only setflag[i][j] = 1 is relevant (if hybrid, some may not be set)
+
+  cutmax = 0.0;
+  for (int ii = 1; ii <= atom->ntypes; ii++) {
+    for (int jj = ii; jj <= atom->ntypes; jj++) {
+      if (setflag[ii][jj] == 0) continue;
+      cutmax = MAX(cutmax,tables[tabindex[ii][jj]].cut);
+    }
+  }
+
+  return cutmax;
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairEAM::init_style()
+{
+  // set communication sizes in comm class
+
+  comm->maxforward_pair = MAX(comm->maxforward_pair,1);
+  comm->maxreverse_pair = MAX(comm->maxreverse_pair,1);
+
+  // convert read-in funcfl tables to multi-type setfl format and mix I,J
+  // interpolate final spline coeffs
+  
+  convert_funcfl();
+  interpolate();
+  
+  cutforcesq = cutmax*cutmax;
+}
+
+/* ----------------------------------------------------------------------
+   read potential values from a single element EAM file
+   read values into table and bcast values
+------------------------------------------------------------------------- */
+
+int PairEAM::read_funcfl(char *file)
+{
+  // check if same file has already been read
+  // if yes, return index of table entry
+  // if no, extend table list
+
+  for (int i = 0; i < ntables; i++)
+    if (strcmp(file,tables->filename) == 0) return i;
+
+  tables = (Table *) 
+    memory->srealloc(tables,(ntables+1)*sizeof(Table),"pair:tables");
+
+  Table *tb = &tables[ntables];
+  int n = strlen(file) + 1;
+  tb->filename = new char[n];
+  strcpy(tb->filename,file);
+  tb->ith = tb->jth = 0;
+
+  // open potential file
+
+  int me = comm->me;
+  FILE *fp;
+  char line[MAXLINE];
+
+  if (me == 0) {
+    fp = fopen(file,"r");
+    if (fp == NULL) {
+      char str[128];
+      sprintf(str,"Cannot open EAM potential file %s",file);
+      error->one(str);
+    }
+  }
+
+  // read and broadcast header
+
+  int tmp;
+  if (me == 0) {
+    fgets(line,MAXLINE,fp);
+    fgets(line,MAXLINE,fp);
+    sscanf(line,"%d %lg",&tmp,&tb->mass);
+    fgets(line,MAXLINE,fp);
+    sscanf(line,"%d %lg %d %lg %lg",
+	   &tb->nrho,&tb->drho,&tb->nr,&tb->dr,&tb->cut);
+  }
+
+  MPI_Bcast(&tb->mass,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&tb->nrho,1,MPI_INT,0,world);
+  MPI_Bcast(&tb->drho,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&tb->nr,1,MPI_INT,0,world);
+  MPI_Bcast(&tb->dr,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&tb->cut,1,MPI_DOUBLE,0,world);
+
+  // allocate potential arrays and read/bcast them
+  // set z2r to NULL (setfl array) so it can be deallocated
+
+  tb->frho = new double[tb->nrho+1];
+  tb->zr = new double[tb->nr+1];
+  tb->rhor = new double[tb->nr+1];
+  tb->z2r = NULL;
+
+  if (me == 0) grab(fp,tb->nrho,&tb->frho[1]);
+  MPI_Bcast(&tb->frho[1],tb->nrho,MPI_DOUBLE,0,world);
+
+  if (me == 0) grab(fp,tb->nr,&tb->zr[1]);
+  MPI_Bcast(&tb->zr[1],tb->nr,MPI_DOUBLE,0,world);
+
+  if (me == 0) grab(fp,tb->nr,&tb->rhor[1]);
+  MPI_Bcast(&tb->rhor[1],tb->nr,MPI_DOUBLE,0,world);
+
+  // close the potential file
+
+  if (me == 0) fclose(fp);
+
+  ntables++;
+  return ntables-1;
+}
+
+/* ----------------------------------------------------------------------
+   convert read-in funcfl potentials to multi-type setfl format
+------------------------------------------------------------------------- */
+
+void PairEAM::convert_funcfl()
+{
+  int i,j,k,m;
+
+  int ntypes = atom->ntypes;
+
+  // determine max values for all i,i type pairs
+  // skip if setflag = 0 (if hybrid, some may not be set)
+
+  double rmax,rhomax;
+  dr = drho = rmax = rhomax = 0.0;
+
+  for (int i = 1; i <= ntypes; i++) {
+    if (setflag[i][i] == 0) continue;
+    Table *tb = &tables[tabindex[i][i]];
+    dr = MAX(dr,tb->dr);
+    drho = MAX(drho,tb->drho);
+    rmax = MAX(rmax,(tb->nr-1) * tb->dr);
+    rhomax = MAX(rhomax,(tb->nrho-1) * tb->drho);
+  }
+
+  // set nr,nrho from cutoff and spacings
+  // 0.5 is for round-off in divide
+
+  nr = static_cast<int> (rmax/dr + 0.5);
+  nrho = static_cast<int> (rhomax/drho + 0.5);
+
+  // allocate multi-type setfl arrays
+
+  if (frho) {
+    memory->destroy_2d_double_array(frho);
+    memory->destroy_2d_double_array(rhor);
+    memory->destroy_2d_double_array(zrtmp);
+    memory->destroy_3d_double_array(z2r);
+  }
+
+  frho = (double **) 
+    memory->create_2d_double_array(ntypes+1,nrho+1,"eam:frho");
+  rhor = (double **)
+    memory->create_2d_double_array(ntypes+1,nr+1,"eam:rhor");
+  zrtmp = (double **)
+    memory->create_2d_double_array(ntypes+1,nr+1,"eam:zrtmp");
+  z2r = (double ***)
+    memory->create_3d_double_array(ntypes+1,ntypes+1,nr+1,"eam:frho");
+
+  // interpolate all potentials to a single grid and cutoff for all atom types
+  // frho,rhor are 1:ntypes, z2r is 1:ntypes,1:ntypes
+  // skip if setflag i,i or j,j = 0 (if hybrid, some may not be set)
+
+  double r,p,cof1,cof2,cof3,cof4;
+  
+  for (i = 1; i <= ntypes; i++) {
+    if (setflag[i][i] == 0) continue;
+    Table *tb = &tables[tabindex[i][i]];
+    for (m = 1; m <= nrho; m++) {
+      r = (m-1)*drho;
+      p = r/tb->drho + 1.0;
+      k = static_cast<int> (p);
+      k = MIN(k,tb->nrho-2);
+      k = MAX(k,2);
+      p -= k;
+      p = MIN(p,2.0);
+      cof1 = -0.166666667*p*(p-1.0)*(p-2.0);
+      cof2 = 0.5*(p*p-1.0)*(p-2.0);
+      cof3 = -0.5*p*(p+1.0)*(p-2.0);
+      cof4 = 0.166666667*p*(p*p-1.0);
+      frho[i][m] = cof1*tb->frho[k-1] + cof2*tb->frho[k] + 
+	cof3*tb->frho[k+1] + cof4*tb->frho[k+2];
+    }
+  }
+
+  for (i = 1; i <= ntypes; i++) {
+    if (setflag[i][i] == 0) continue;
+    Table *tb = &tables[tabindex[i][i]];
+    for (m = 1; m <= nr; m++) {
+      r = (m-1)*dr;
+      p = r/tb->dr + 1.0;
+      k = static_cast<int> (p);
+      k = MIN(k,tb->nr-2);
+      k = MAX(k,2);
+      p -= k;
+      p = MIN(p,2.0);
+      cof1 = -0.166666667*p*(p-1.0)*(p-2.0);
+      cof2 = 0.5*(p*p-1.0)*(p-2.0);
+      cof3 = -0.5*p*(p+1.0)*(p-2.0);
+      cof4 = 0.166666667*p*(p*p-1.0);
+      rhor[i][m] = cof1*tb->rhor[k-1] + cof2*tb->rhor[k] +
+	cof3*tb->rhor[k+1] + cof4*tb->rhor[k+2];
+      zrtmp[i][m] = cof1*tb->zr[k-1] + cof2*tb->zr[k] +
+	cof3*tb->zr[k+1] + cof4*tb->zr[k+2];
+    }
+  }
+
+  for (i = 1; i <= ntypes; i++)
+    for (j = i; j <= ntypes; j++) {
+      if (setflag[i][i] == 0 || setflag[j][j] == 0) continue;
+      for (m = 1; m <= nr; m++)
+	z2r[i][j][m] = 27.2*0.529 * zrtmp[i][m]*zrtmp[j][m];
+    }
+}
+
+/* ----------------------------------------------------------------------
+   interpolate EAM potentials
+------------------------------------------------------------------------- */
+
+void PairEAM::interpolate()
+{
+  // free memory from previous interpolation
+
+  if (frho_0) interpolate_deallocate();
+
+  // interpolation spacings
+
+  rdr = 1.0/dr;
+  rdrho = 1.0/drho;
+
+  // allocate coeff arrays
+
+  int n = atom->ntypes;
+
+  frho_0 = memory->create_2d_double_array(n+1,nrho+1,"eam:frho_0");
+  frho_1 = memory->create_2d_double_array(n+1,nrho+1,"eam:frho_1");
+  frho_2 = memory->create_2d_double_array(n+1,nrho+1,"eam:frho_2");
+  frho_3 = memory->create_2d_double_array(n+1,nrho+1,"eam:frho_3");
+  frho_4 = memory->create_2d_double_array(n+1,nrho+1,"eam:frho_4");
+  frho_5 = memory->create_2d_double_array(n+1,nrho+1,"eam:frho_5");
+  frho_6 = memory->create_2d_double_array(n+1,nrho+1,"eam:frho_6");
+
+  rhor_0 = memory->create_2d_double_array(n+1,nr+1,"eam:rhor_0");
+  rhor_1 = memory->create_2d_double_array(n+1,nr+1,"eam:rhor_1");
+  rhor_2 = memory->create_2d_double_array(n+1,nr+1,"eam:rhor_2");
+  rhor_3 = memory->create_2d_double_array(n+1,nr+1,"eam:rhor_3");
+  rhor_4 = memory->create_2d_double_array(n+1,nr+1,"eam:rhor_4");
+  rhor_5 = memory->create_2d_double_array(n+1,nr+1,"eam:rhor_5");
+  rhor_6 = memory->create_2d_double_array(n+1,nr+1,"eam:rhor_6");
+
+  z2r_0 = memory->create_3d_double_array(n+1,n+1,nr+1,"eam:z2r_0");
+  z2r_1 = memory->create_3d_double_array(n+1,n+1,nr+1,"eam:z2r_1");
+  z2r_2 = memory->create_3d_double_array(n+1,n+1,nr+1,"eam:z2r_2");
+  z2r_3 = memory->create_3d_double_array(n+1,n+1,nr+1,"eam:z2r_3");
+  z2r_4 = memory->create_3d_double_array(n+1,n+1,nr+1,"eam:z2r_4");
+  z2r_5 = memory->create_3d_double_array(n+1,n+1,nr+1,"eam:z2r_5");
+  z2r_6 = memory->create_3d_double_array(n+1,n+1,nr+1,"eam:z2r_6");
+
+  // frho interpolation for 1:ntypes
+  // skip if setflag = 0 (if hybrid, some may not be set)
+  // if skip, set frho arrays to 0.0, since they will still be accessed
+  //   for non-EAM atoms when compute() calculates embedding function
+
+  int i,j,m;
+
+  for (i = 1; i <= atom->ntypes; i++) {
+    if (setflag[i][i] == 0) {
+      for (j = 1; j <= n; j++)
+      	for (m = 1; m <= nrho; m++)
+      	  frho_0[j][m] = frho_1[j][m] = frho_2[j][m] =  frho_3[j][m] =
+      	    frho_4[j][m] = frho_5[j][m] = frho_6[j][m] = 0.0;
+      continue;
+    }
+
+    for (m = 1; m <= nrho; m++) frho_0[i][m] = frho[i][m];
+
+    frho_1[i][1] = frho_0[i][2]-frho_0[i][1];
+    frho_1[i][2] = 0.5*(frho_0[i][3]-frho_0[i][1]);
+    frho_1[i][nrho-1] = 0.5*(frho_0[i][nrho]-frho_0[i][nrho-2]);
+    frho_1[i][nrho] = frho_0[i][nrho]-frho_0[i][nrho-1];
+
+    for (m = 3; m <= nrho-2; m++)
+      frho_1[i][m] = ((frho_0[i][m-2]-frho_0[i][m+2]) + 
+		       8.0*(frho_0[i][m+1]-frho_0[i][m-1]))/12.0;
+
+    for (m = 1; m <= nrho-1; m++) {
+      frho_2[i][m] = 3.*(frho_0[i][m+1]-frho_0[i][m]) - 
+	2.0*frho_1[i][m] - frho_1[i][m+1];
+      frho_3[i][m] = frho_1[i][m] + frho_1[i][m+1] - 
+	2.0*(frho_0[i][m+1]-frho_0[i][m]);
+    }
+
+    frho_2[i][nrho] = 0.0;
+    frho_3[i][nrho] = 0.0;
+
+    for (m = 1; m <= nrho; m++) {
+      frho_4[i][m] = frho_1[i][m]/drho;
+      frho_5[i][m] = 2.0*frho_2[i][m]/drho;
+      frho_6[i][m] = 3.0*frho_3[i][m]/drho;
+    }
+  }
+
+  // rhor interpolation for 1:ntypes
+  // skip if setflag = 0 (if hybrid, some may not be set)
+
+  for (i = 1; i <= atom->ntypes; i++) {
+    if (setflag[i][i] == 0) continue;
+
+    for (m = 1; m <= nr; m++) rhor_0[i][m] = rhor[i][m];
+
+    rhor_1[i][1] = rhor_0[i][2]-rhor_0[i][1];
+    rhor_1[i][2] = 0.5*(rhor_0[i][3]-rhor_0[i][1]);
+    rhor_1[i][nr-1] = 0.5*(rhor_0[i][nr]-rhor_0[i][nr-2]);
+    rhor_1[i][nr] = 0.0;
+
+    for (m = 3; m <= nr-2; m++)
+      rhor_1[i][m] = ((rhor_0[i][m-2]-rhor_0[i][m+2]) + 
+		       8.0*(rhor_0[i][m+1]-rhor_0[i][m-1]))/12.;
+
+    for (m = 1; m <= nr-1; m++) {
+      rhor_2[i][m] = 3.0*(rhor_0[i][m+1]-rhor_0[i][m]) - 
+	2.0*rhor_1[i][m] - rhor_1[i][m+1];
+      rhor_3[i][m] = rhor_1[i][m] + rhor_1[i][m+1] - 
+	2.0*(rhor_0[i][m+1]-rhor_0[i][m]);
+    }
+
+    rhor_2[i][nr] = 0.0;
+    rhor_3[i][nr] = 0.0;
+
+    for (m = 1; m <= nr; m++) {
+      rhor_4[i][m] = rhor_1[i][m]/dr;
+      rhor_5[i][m] = 2.0*rhor_2[i][m]/dr;
+      rhor_6[i][m] = 3.0*rhor_3[i][m]/dr;
+    }
+  }
+
+  // z2r interpolation for 1:ntypes,1:ntypes
+  // skip if setflag i,i or j,j = 0 (if hybrid, some may not be set)
+  // set j,i coeffs = i,j coeffs
+
+  for (i = 1; i <= atom->ntypes; i++) {
+    for (j = i; j <= atom->ntypes; j++) {
+      if (setflag[i][i] == 0 || setflag[j][j] == 0) continue;
+
+      for (m = 1; m <= nr; m++) z2r_0[i][j][m] = z2r[i][j][m];
+
+      z2r_1[i][j][1] = z2r_0[i][j][2]-z2r_0[i][j][1];
+      z2r_1[i][j][2] = 0.5*(z2r_0[i][j][3]-z2r_0[i][j][1]);
+      z2r_1[i][j][nr-1] = 0.5*(z2r_0[i][j][nr]-z2r_0[i][j][nr-2]);
+      z2r_1[i][j][nr] = 0.0;
+
+      for (m = 3; m <= nr-2; m++) 
+	z2r_1[i][j][m] = ((z2r_0[i][j][m-2]-z2r_0[i][j][m+2]) + 
+			   8.0*(z2r_0[i][j][m+1]-z2r_0[i][j][m-1]))/12.;
+
+      for (m = 1; m <= nr-1; m++) {
+	z2r_2[i][j][m] = 3.0*(z2r_0[i][j][m+1]-z2r_0[i][j][m]) - 
+	  2.0*z2r_1[i][j][m] - z2r_1[i][j][m+1];
+	z2r_3[i][j][m] = z2r_1[i][j][m] + z2r_1[i][j][m+1] - 
+	  2.0*(z2r_0[i][j][m+1]-z2r_0[i][j][m]);
+      }
+
+      z2r_2[i][j][nr] = 0.0;
+      z2r_3[i][j][nr] = 0.0;
+
+      for (m = 1; m <= nr; m++) {
+	z2r_4[i][j][m] = z2r_1[i][j][m]/dr;
+	z2r_5[i][j][m] = 2.0*z2r_2[i][j][m]/dr;
+	z2r_6[i][j][m] = 3.0*z2r_3[i][j][m]/dr;
+      }
+
+      for (m = 1; m <= nr; m++) {
+	z2r_0[j][i][m] = z2r_0[i][j][m];
+	z2r_1[j][i][m] = z2r_1[i][j][m];
+	z2r_2[j][i][m] = z2r_2[i][j][m];
+	z2r_3[j][i][m] = z2r_3[i][j][m];
+	z2r_4[j][i][m] = z2r_4[i][j][m];
+	z2r_5[j][i][m] = z2r_5[i][j][m];
+	z2r_6[j][i][m] = z2r_6[i][j][m];
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   grab n values from file fp and put them in list
+   values can be several to a line
+   only called by proc 0
+------------------------------------------------------------------------- */
+
+void PairEAM::grab(FILE *fp, int n, double *list)
+{
+  char *ptr;
+  char line[MAXLINE];
+
+  int i = 0;
+  while (i < n) {
+    fgets(line,MAXLINE,fp);
+    ptr = strtok(line," \t\n\r\f");
+    list[i++] = atof(ptr);
+    while (ptr = strtok(NULL," \t\n\r\f")) list[i++] = atof(ptr);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   skip n values from file fp
+   values can be several to a line
+   only called by proc 0
+------------------------------------------------------------------------- */
+
+void PairEAM::skip(FILE *fp, int n)
+{
+  char line[MAXLINE];
+
+  int i = 0;
+  while (i < n) {
+    fgets(line,MAXLINE,fp);
+    strtok(line," \t\n\r\f");
+    i++;
+    while (strtok(NULL," \t\n\r\f")) i++;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   deallocate spline interpolation arrays
+------------------------------------------------------------------------- */
+
+void PairEAM::interpolate_deallocate()
+{
+  memory->destroy_2d_double_array(frho_0);
+  memory->destroy_2d_double_array(frho_1);
+  memory->destroy_2d_double_array(frho_2);
+  memory->destroy_2d_double_array(frho_3);
+  memory->destroy_2d_double_array(frho_4);
+  memory->destroy_2d_double_array(frho_5);
+  memory->destroy_2d_double_array(frho_6);
+
+  memory->destroy_2d_double_array(rhor_0);
+  memory->destroy_2d_double_array(rhor_1);
+  memory->destroy_2d_double_array(rhor_2);
+  memory->destroy_2d_double_array(rhor_3);
+  memory->destroy_2d_double_array(rhor_4);
+  memory->destroy_2d_double_array(rhor_5);
+  memory->destroy_2d_double_array(rhor_6);
+
+  memory->destroy_3d_double_array(z2r_0);
+  memory->destroy_3d_double_array(z2r_1);
+  memory->destroy_3d_double_array(z2r_2);
+  memory->destroy_3d_double_array(z2r_3);
+  memory->destroy_3d_double_array(z2r_4);
+  memory->destroy_3d_double_array(z2r_5);
+  memory->destroy_3d_double_array(z2r_6);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairEAM::single(int i, int j, int itype, int jtype,
+		     double rsq, double factor_coul, double factor_lj,
+		     int eflag, One &one)
+{
+  double r,p,rhoip,rhojp,z2,z2p,recip,phi,phip,psip;
+  int m;
+
+  r = sqrt(rsq);
+  p = r*rdr + 1.0;
+  m = static_cast<int> (p);
+  m = MIN(m,nr-1);
+  p -= m;
+  p = MIN(p,1.0);
+
+  rhoip = (rhor_6[itype][m]*p + rhor_5[itype][m])*p + 
+    rhor_4[itype][m];
+  rhojp = (rhor_6[jtype][m]*p + rhor_5[jtype][m])*p + 
+    rhor_4[jtype][m];
+  z2 = ((z2r_3[itype][jtype][m]*p + z2r_2[itype][jtype][m])*p + 
+	z2r_1[itype][jtype][m])*p + z2r_0[itype][jtype][m];
+  z2p = (z2r_6[itype][jtype][m]*p + z2r_5[itype][jtype][m])*p + 
+    z2r_4[itype][jtype][m];
+
+  recip = 1.0/r;
+  phi = z2*recip;
+  phip = z2p*recip - phi*recip;
+  psip = fp[i]*rhojp + fp[j]*rhoip + phip;
+  one.fforce = -psip*recip;
+
+  if (eflag) {
+    one.eng_vdwl = phi;
+    one.eng_coul = 0.0;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairEAM::single_embed(int i, int itype, double &fpi,
+			   int eflag, double &phi)
+{
+  double p = rho[i]*rdrho + 1.0;
+  int m = static_cast<int> (p);
+  m = MAX(1,MIN(m,nrho-1));
+  p -= m;
+
+  fpi = (frho_6[itype][m]*p + frho_5[itype][m])*p + frho_4[itype][m];
+  if (eflag)
+    phi = ((frho_3[itype][m]*p + frho_2[itype][m])*p + 
+	   frho_1[itype][m])*p + frho_0[itype][m];
+}
+
+/* ---------------------------------------------------------------------- */
+
+int PairEAM::pack_comm(int n, int *list, double *buf, int *pbc_flags)
+{
+  int i,j,m;
+
+  m = 0;
+  for (i = 0; i < n; i++) {
+    j = list[i];
+    buf[m++] = fp[j];
+  }
+  return 1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairEAM::unpack_comm(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) fp[i] = buf[m++];
+}
+
+/* ---------------------------------------------------------------------- */
+
+int PairEAM::pack_reverse_comm(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) buf[m++] = rho[i];
+  return 1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairEAM::unpack_reverse_comm(int n, int *list, double *buf)
+{
+  int i,j,m;
+
+  m = 0;
+  for (i = 0; i < n; i++) {
+    j = list[i];
+    rho[j] += buf[m++];
+  }
+}
+
+/* ----------------------------------------------------------------------
+   memory usage of local atom-based arrays 
+------------------------------------------------------------------------- */
+
+int PairEAM::memory_usage()
+{
+  int bytes = 2 * nmax * sizeof(double);
+  return bytes;
+}
--- a/Show More
+++ b/Show More