diff --git a/src/Depend.sh b/src/Depend.sh
index 520d9ae2bf..0962dace51 100644
--- a/src/Depend.sh
+++ b/src/Depend.sh
@@ -126,4 +126,5 @@ fi
 
 if (test $1 = "USER-REAXC") then
   depend KOKKOS
+  depend USER-OMP
 fi
diff --git a/src/USER-OMP/fix_qeq_reax_omp.cpp b/src/USER-OMP/fix_qeq_reax_omp.cpp
new file mode 100644
index 0000000000..c8e5e3cb93
--- /dev/null
+++ b/src/USER-OMP/fix_qeq_reax_omp.cpp
@@ -0,0 +1,1300 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Hasan Metin Aktulga, Purdue University
+   (now at Lawrence Berkeley National Laboratory, hmaktulga@lbl.gov)
+
+     Hybrid and sub-group capabilities: Ray Shan (Sandia)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "fix_qeq_reax_omp.h"
+#include "pair_reaxc_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "domain.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "neigh_request.h"
+#include "update.h"
+#include "force.h"
+#include "group.h"
+#include "pair.h"
+#include "respa.h"
+#include "memory.h"
+#include "citeme.h"
+#include "error.h"
+#include "reaxc_defs.h"
+
+using namespace LAMMPS_NS;
+using namespace FixConst;
+
+#define EV_TO_KCAL_PER_MOL 14.4
+//#define DANGER_ZONE     0.95
+//#define LOOSE_ZONE      0.7
+#define SQR(x) ((x)*(x))
+#define CUBE(x) ((x)*(x)*(x))
+#define MIN_NBRS 100
+
+/* ---------------------------------------------------------------------- */
+
+FixQEqReaxOMP::FixQEqReaxOMP(LAMMPS *lmp, int narg, char **arg) :
+  FixQEqReax(lmp, narg, arg)
+{
+  if (narg < 8 || narg > 9) error->all(FLERR,"Illegal fix qeq/reax/omp command");
+
+  nevery = force->inumeric(FLERR,arg[3]);
+  swa = force->numeric(FLERR,arg[4]);
+  swb = force->numeric(FLERR,arg[5]);
+  tolerance = force->numeric(FLERR,arg[6]);
+
+  // dual CG support
+  dual_enabled = 0;
+  if(narg == 9)
+    if(strcmp(arg[8],"dual") == 0) dual_enabled = 1;
+    else error->all(FLERR,"Unknown fix qeq/reax argument in location 9");
+
+  // perform initial allocation of atom-based arrays
+  // register with Atom class
+
+  s_hist = t_hist = NULL;
+  grow_arrays(atom->nmax);
+  atom->add_callback(0);
+  for( int i = 0; i < atom->nmax; i++ )
+    for (int j = 0; j < nprev; ++j )
+      s_hist[i][j] = t_hist[i][j] = 0;
+
+  reaxc = NULL;
+  reaxc = (PairReaxCOMP *) force->pair_match("reax/c/omp",1);
+
+  b_temp = NULL;
+
+  // ASPC: Kolafa, J. Comp. Chem., 25(3), 335 (2003)
+  do_aspc = 0;
+  aspc_order = 1;
+  aspc_order_max = nprev - 2; // Must be consistent with nprev to store history: nprev = aspc_order + 2
+  aspc_omega = 0.0;
+  aspc_b = NULL;
+}
+
+FixQEqReaxOMP::~FixQEqReaxOMP()
+{
+  memory->destroy(b_temp);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixQEqReaxOMP::post_constructor()
+{
+  pertype_parameters(pertype_option);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixQEqReaxOMP::pertype_parameters(char *arg)
+{
+  if (strcmp(arg,"reax/c") == 0) {
+    reaxflag = 1;
+    Pair *pair = force->pair_match("reax/c",0);
+    if (pair == NULL) error->all(FLERR,"No pair reax/c for fix qeq/reax/omp");
+    int tmp;
+    chi = (double *) pair->extract("chi",tmp);
+    eta = (double *) pair->extract("eta",tmp);
+    gamma = (double *) pair->extract("gamma",tmp);
+    if (chi == NULL || eta == NULL || gamma == NULL)
+      error->all(FLERR,
+                 "Fix qeq/reax/omp could not extract params from pair reax/c");
+    return;
+  }
+
+  int i,itype,ntypes;
+  double v1,v2,v3;
+  FILE *pf;
+
+  reaxflag = 0;
+  ntypes = atom->ntypes;
+
+  memory->create(chi,ntypes+1,"qeq/reax:chi");
+  memory->create(eta,ntypes+1,"qeq/reax:eta");
+  memory->create(gamma,ntypes+1,"qeq/reax:gamma");
+
+  if (comm->me == 0) {
+    if ((pf = fopen(arg,"r")) == NULL)
+      error->one(FLERR,"Fix qeq/reax/omp parameter file could not be found");
+
+    for (i = 1; i <= ntypes && !feof(pf); i++) {
+      fscanf(pf,"%d %lg %lg %lg",&itype,&v1,&v2,&v3);
+      if (itype < 1 || itype > ntypes)
+        error->one(FLERR,"Fix qeq/reax/omp invalid atom type in param file");
+      chi[itype] = v1;
+      eta[itype] = v2;
+      gamma[itype] = v3;
+    }
+    if (i <= ntypes) error->one(FLERR,"Invalid param file for fix qeq/reax/omp");
+    fclose(pf);
+  }
+
+  MPI_Bcast(&chi[1],ntypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&eta[1],ntypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&gamma[1],ntypes,MPI_DOUBLE,0,world);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixQEqReaxOMP::allocate_storage()
+{
+  FixQEqReax::allocate_storage();
+
+  // dual CG support
+  int size = nmax;
+  if(dual_enabled) size*= 2;
+  memory->create(b_temp, comm->nthreads, size, "qeq/reax/omp:b_temp");
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixQEqReaxOMP::deallocate_storage()
+{  
+  memory->destroy(b_temp);
+  
+  FixQEqReax::deallocate_storage();  
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixQEqReaxOMP::allocate_matrix()
+{
+  int i,ii,inum,m;
+  int *ilist, *numneigh;
+
+  int mincap;
+  double safezone;
+
+  if( reaxflag ) {
+    mincap = reaxc->system->mincap;
+    safezone = reaxc->system->safezone;
+  } else {
+    mincap = MIN_CAP;
+    safezone = SAFE_ZONE;
+  }
+
+  n = atom->nlocal;
+  n_cap = MAX( (int)(n * safezone), mincap );
+
+  // determine the total space for the H matrix
+
+  if (reaxc) {
+    inum = reaxc->list->inum;
+    ilist = reaxc->list->ilist;
+    numneigh = reaxc->list->numneigh;
+  } else {
+    inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+  }
+
+  m = 0;
+  for( ii = 0; ii < inum; ii++ ) {
+    i = ilist[ii];
+    m += numneigh[i];
+  }
+  m_cap = MAX( (int)(m * safezone), mincap * MIN_NBRS );
+
+  H.n = n_cap;
+  H.m = m_cap;
+  memory->create(H.firstnbr,n_cap,"qeq:H.firstnbr");
+  memory->create(H.numnbrs,n_cap,"qeq:H.numnbrs");
+  memory->create(H.jlist,m_cap,"qeq:H.jlist");
+  memory->create(H.val,m_cap,"qeq:H.val");
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixQEqReaxOMP::init()
+{
+  if (!atom->q_flag) error->all(FLERR,"Fix qeq/reax/omp requires atom attribute q");
+
+  ngroup = group->count(igroup);
+  if (ngroup == 0) error->all(FLERR,"Fix qeq/reax group has no atoms");
+
+  if (!force->pair_match("reax/c/omp",1))
+    error->all(FLERR,"Must use pair_style reax/c/omp with fix qeq/reax/omp");
+
+  // need a half neighbor list w/ Newton off and ghost neighbors
+  // built whenever re-neighboring occurs
+
+  int irequest = neighbor->request(this,instance_me);
+  neighbor->requests[irequest]->pair = 0;
+  neighbor->requests[irequest]->fix = 1;
+  neighbor->requests[irequest]->newton = 2;
+  neighbor->requests[irequest]->ghost = 1;
+
+  init_shielding();
+  init_taper();
+
+  if (strstr(update->integrate_style,"respa"))
+    nlevels_respa = ((Respa *) update->integrate)->nlevels;
+
+  // APSC setup
+  if(do_aspc) {
+    memory->create(aspc_b, aspc_order_max+2, "qeq/reax/aspc_b");
+
+    // Calculate damping factor
+    double o = double(aspc_order);
+    aspc_omega = (o+2.0) / (2*o+3.0);
+
+    // Calculate B coefficients
+    double c = (4.0 * o + 6.0) / (o + 3.0);
+    aspc_b[0] = c;
+
+    double n =  1.0;
+    double d =  4.0;
+    double s = -1.0;
+    double f =  2.0;
+
+    for(int i=1; i<aspc_order_max+2; i++ ) {
+      c*= (o + n) / (o + d);
+      aspc_b[i] = s * f * c;
+
+      s *= -1.0;
+      f += 1.0;
+      n -= 1.0;
+      d += 1.0;
+    }
+    
+    // fprintf(stdout,"aspc_omega= %f\n",aspc_omega);
+    // for(int i=0; i<aspc_order_max+2; i++) fprintf(stdout,"i= %i  B= %f\n",i,aspc_b[i]);
+    // error->all(FLERR,"Early Termination");
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixQEqReaxOMP::compute_H()
+{
+  int inum, *ilist, *numneigh, **firstneigh;
+  double SMALL = 0.0001;
+
+  int *type = atom->type;
+  tagint * tag = atom->tag;
+  double **x = atom->x;
+  int *mask = atom->mask;
+
+  if(reaxc) {
+    inum = reaxc->list->inum;
+    ilist = reaxc->list->ilist;
+    numneigh = reaxc->list->numneigh;
+    firstneigh = reaxc->list->firstneigh;
+  } else {
+    inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+  }
+  int ai, num_nbrs;
+  
+  // sumscan of the number of neighbors per atom to determine the offsets
+  // most likely, we are overallocating. desirable to work on this part
+  // to reduce the memory footprint of the far_nbrs list.
+  
+  num_nbrs = 0;
+  
+  for (int itr_i = 0; itr_i < inum; ++itr_i) {
+    ai = ilist[itr_i];
+    H.firstnbr[ai] = num_nbrs;
+    num_nbrs += numneigh[ai];
+  }
+  
+  // fill in the H matrix
+  
+#pragma omp parallel default(shared)
+  {
+    int i, j, ii, jj, mfill, jnum, flag;
+    int *jlist;
+    double dx, dy, dz, r_sqr;
+
+    mfill = 0;
+    
+    //#pragma omp for schedule(dynamic,50)
+#pragma omp for schedule(guided)
+    for (ii = 0; ii < inum; ii++) {
+      i = ilist[ii];
+      if(mask[i] & groupbit) {
+	jlist = firstneigh[i];
+	jnum = numneigh[i];
+	mfill = H.firstnbr[i];
+	
+	for (jj = 0; jj < jnum; jj++) {
+	  j = jlist[jj];
+	  
+	  dx = x[j][0] - x[i][0];
+	  dy = x[j][1] - x[i][1];
+	  dz = x[j][2] - x[i][2];
+	  r_sqr = SQR(dx) + SQR(dy) + SQR(dz);
+	  
+	  flag = 0;
+	  if (r_sqr <= SQR(swb)) {
+	    if (j < n) flag = 1;
+	    else if (tag[i] < tag[j]) flag = 1;
+	    else if (tag[i] == tag[j]) {
+	      if (dz > SMALL) flag = 1;
+	      else if (fabs(dz) < SMALL) {
+		if (dy > SMALL) flag = 1;
+		else if (fabs(dy) < SMALL && dx > SMALL) flag = 1;
+	      }
+	    }
+	  }
+	  
+	  if( flag ) {
+	    H.jlist[mfill] = j;
+	    H.val[mfill] = calculate_H( sqrt(r_sqr), shld[type[i]][type[j]] );
+	    mfill++;
+	  }
+	}
+	
+	H.numnbrs[i] = mfill - H.firstnbr[i];
+      }
+    }
+
+    if (mfill >= H.m) {
+      char str[128];
+      sprintf(str,"H matrix size has been exceeded: mfill=%d H.m=%d\n",
+	      mfill, H.m );
+      error->warning(FLERR,str);
+      error->all(FLERR,"Fix qeq/reax/omp has insufficient QEq matrix size");
+    }
+  } // omp
+  
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixQEqReaxOMP::init_storage()
+{
+  int NN;
+  
+  if(reaxc) NN = reaxc->list->inum + reaxc->list->gnum;
+  else NN = list->inum + list->gnum;
+  
+#pragma omp parallel for schedule(static)
+  for (int i = 0; i < NN; i++) {
+    Hdia_inv[i] = 1. / eta[atom->type[i]];
+    b_s[i] = -chi[atom->type[i]];
+    b_t[i] = -1.0;
+    b_prc[i] = 0;
+    b_prm[i] = 0;
+    s[i] = t[i] = 0;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixQEqReaxOMP::pre_force(int vflag)
+{
+
+#ifdef OMP_TIMING
+  double endTimeBase, startTimeBase, funcstartTimeBase;
+  funcstartTimeBase = MPI_Wtime();
+#endif
+
+  double t_start, t_end;
+
+  if (update->ntimestep % nevery) return;
+  if( comm->me == 0 ) t_start = MPI_Wtime();
+
+  n = atom->nlocal;
+  N = atom->nlocal + atom->nghost;
+
+  // grow arrays if necessary
+  // need to be atom->nmax in length
+
+  if( atom->nmax > nmax ) reallocate_storage();
+  if( n > n_cap*DANGER_ZONE || m_fill > m_cap*DANGER_ZONE )
+    reallocate_matrix();
+
+#ifdef OMP_TIMING
+  startTimeBase = MPI_Wtime();
+#endif
+
+  init_matvec();
+
+#ifdef OMP_TIMING
+  endTimeBase = MPI_Wtime();
+  ompTimingData[COMPUTEINITMVINDEX] += (endTimeBase-startTimeBase);
+  startTimeBase = endTimeBase;
+#endif
+
+  if(dual_enabled) matvecs = dual_CG(b_s, b_t, s, t); // OMP_TIMING inside dual_CG
+  else {
+
+    matvecs_s = CG(b_s, s);    	// CG on s - parallel
+
+#ifdef OMP_TIMING
+    endTimeBase = MPI_Wtime();
+    ompTimingData[COMPUTECG1INDEX] += (endTimeBase-startTimeBase);
+    ompTimingCount[COMPUTECG1INDEX]++;
+    ompTimingCGCount[COMPUTECG1INDEX]+= matvecs_s;
+    startTimeBase = endTimeBase;
+#endif
+    
+    matvecs_t = CG(b_t, t); 	// CG on t - parallel
+    
+#ifdef OMP_TIMING
+    endTimeBase = MPI_Wtime();
+    ompTimingData[COMPUTECG2INDEX] += (endTimeBase-startTimeBase);
+    ompTimingCount[COMPUTECG2INDEX]++;
+    ompTimingCGCount[COMPUTECG2INDEX]+= matvecs_t;
+    startTimeBase = endTimeBase;
+#endif
+
+  } // if(dual_enabled)
+
+  //  if(comm->me == 0) fprintf(stdout,"matvecs= %i %i\n",matvecs_s,matvecs_t);
+
+#ifdef OMP_TIMING
+  startTimeBase = MPI_Wtime();
+#endif
+
+  calculate_Q();
+
+#ifdef OMP_TIMING
+  endTimeBase = MPI_Wtime();
+  ompTimingData[COMPUTECALCQINDEX] += (endTimeBase-startTimeBase);
+#endif
+
+  if( comm->me == 0 ) {
+    t_end = MPI_Wtime();
+    qeq_time = t_end - t_start;
+  }
+
+#ifdef OMP_TIMING
+  endTimeBase = MPI_Wtime();
+  ompTimingData[COMPUTEQEQINDEX] += (endTimeBase-funcstartTimeBase);
+#endif
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixQEqReaxOMP::init_matvec()
+{
+#ifdef OMP_TIMING
+  long endTimeBase, startTimeBase;
+  startTimeBase = MPI_Wtime();
+#endif
+  
+  /* fill-in H matrix */
+  compute_H();
+
+  int nn,i;
+  int *ilist;
+
+  if(reaxc) {
+    nn = reaxc->list->inum;
+    ilist = reaxc->list->ilist;
+  } else {
+    nn = list->inum;
+    ilist = list->ilist;
+  }
+
+  // Should really be more careful with initialization and first (aspc_order+2) MD steps
+  if(do_aspc) {
+
+    double m_aspc_omega = 1.0 - aspc_omega;
+#pragma omp parallel for schedule(dynamic,50) private(i)
+    for(int ii = 0; ii < nn; ++ii ) {
+      i = ilist[ii];
+      if(atom->mask[i] & groupbit) {
+	
+	/* init pre-conditioner for H and init solution vectors */
+	Hdia_inv[i] = 1. / eta[ atom->type[i] ];
+	b_s[i]      = -chi[ atom->type[i] ];
+	b_t[i]      = -1.0;
+	
+	// Predictor Step
+	double tp = 0.0;
+	double sp = 0.0;
+	for(int j=0; j<aspc_order+2; j++) {
+	  tp+= aspc_b[j] * t_hist[i][j];
+	  sp+= aspc_b[j] * s_hist[i][j];
+	}
+
+	// Corrector Step
+	t[i] = aspc_omega * t_hist[i][0] + m_aspc_omega * tp;
+	s[i] = aspc_omega * s_hist[i][0] + m_aspc_omega * sp;
+      }
+    }
+
+  } else {
+
+#pragma omp parallel for schedule(dynamic,50) private(i)
+    for(int ii = 0; ii < nn; ++ii ) {
+      i = ilist[ii];
+      if(atom->mask[i] & groupbit) {
+	
+	/* init pre-conditioner for H and init solution vectors */
+	Hdia_inv[i] = 1. / eta[ atom->type[i] ];
+	b_s[i]      = -chi[ atom->type[i] ];
+	b_t[i]      = -1.0;
+	
+	/* linear extrapolation for s & t from previous solutions */
+	//s[i] = 2 * s_hist[i][0] - s_hist[i][1];
+	//t[i] = 2 * t_hist[i][0] - t_hist[i][1];
+	
+	/* quadratic extrapolation for s & t from previous solutions */
+	//s[i] = s_hist[i][2] + 3 * ( s_hist[i][0] - s_hist[i][1] );
+	t[i] = t_hist[i][2] + 3 * ( t_hist[i][0] - t_hist[i][1] );
+	
+	/* cubic extrapolation for s & t from previous solutions */
+	s[i] = 4*(s_hist[i][0]+s_hist[i][2])-(6*s_hist[i][1]+s_hist[i][3]);
+	//t[i] = 4*(t_hist[i][0]+t_hist[i][2])-(6*t_hist[i][1]+t_hist[i][3]);
+      }
+    }
+  }
+
+  pack_flag = 2;
+  comm->forward_comm_fix(this); //Dist_vector( s );
+  pack_flag = 3;
+  comm->forward_comm_fix(this); //Dist_vector( t );
+
+#ifdef OMP_TIMING
+  endTimeBase = MPI_Wtime();
+  ompTimingData[COMPUTEMVCOMPINDEX] += (long) (endTimeBase-startTimeBase);
+#endif
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixQEqReaxOMP::CG( double *b, double *x )
+{
+  int  i, ii, j, imax;
+  double tmp, alpha, beta, b_norm;
+  double sig_old, sig_new;
+
+  double my_buf[2], buf[2];
+
+  int nn, jj;
+  int *ilist;
+  if (reaxc) {
+    nn = reaxc->list->inum;
+    ilist = reaxc->list->ilist;
+  } else {
+    nn = list->inum;
+    ilist = list->ilist;
+  }
+
+  imax = 200;
+
+  pack_flag = 1;
+  sparse_matvec( &H, x, q );
+  comm->reverse_comm_fix( this ); //Coll_Vector( q );
+
+  double tmp1, tmp2;
+  tmp1 = tmp2 = 0.0;
+
+#pragma omp parallel for schedule(dynamic,50) private(i) reduction(+:tmp1,tmp2)
+  for( jj = 0; jj < nn; ++jj ) {
+    i = ilist[jj];
+    if (atom->mask[i] & groupbit) {
+      r[i] = b[i] - q[i];
+      d[i] = r[i] * Hdia_inv[i]; //pre-condition
+      
+      tmp1 += b[i] * b[i];
+      tmp2 += r[i] * d[i];
+    }
+  }
+  
+  my_buf[0] = tmp1;
+  my_buf[1] = tmp2;
+
+  MPI_Allreduce(&my_buf, &buf, 2, MPI_DOUBLE, MPI_SUM, world);
+
+  b_norm = sqrt(buf[0]);
+  sig_new = buf[1];
+
+  for( i = 1; i < imax && sqrt(sig_new) / b_norm > tolerance; ++i ) {
+    comm->forward_comm_fix(this); //Dist_vector( d );
+    sparse_matvec( &H, d, q );
+    comm->reverse_comm_fix(this); //Coll_vector( q );
+
+    tmp1 = 0.0;
+#pragma omp parallel
+    {
+      
+#pragma omp for schedule(dynamic,50) private(ii) reduction(+:tmp1)
+      for( jj = 0; jj < nn; jj++) {
+	ii = ilist[jj];
+	if(atom->mask[ii] & groupbit) tmp1 += d[ii] * q[ii];
+      }
+
+#pragma omp barrier
+#pragma omp master
+      {
+	MPI_Allreduce(&tmp1, &tmp2, 1, MPI_DOUBLE, MPI_SUM, world);
+	
+	alpha = sig_new / tmp2;
+	tmp1 = 0.0;
+      }
+
+#pragma omp barrier
+#pragma omp for schedule(dynamic,50) private(ii) reduction(+:tmp1)
+      for( jj = 0; jj < nn; jj++) {
+	ii = ilist[jj];
+	if(atom->mask[ii] & groupbit) {
+	  x[ii] += alpha * d[ii];
+	  r[ii] -= alpha * q[ii];
+	  
+	  // pre-conditioning
+	  p[ii] = r[ii] * Hdia_inv[ii];
+	  tmp1 += r[ii] * p[ii];
+	}
+      }
+    } // omp parallel
+
+    sig_old = sig_new;
+    
+    MPI_Allreduce(&tmp1, &tmp2, 1, MPI_DOUBLE, MPI_SUM, world);
+    
+    sig_new = tmp2;
+    beta = sig_new / sig_old;
+      
+#pragma omp for schedule(dynamic,50) private(ii)
+    for( jj = 0; jj < nn; jj++) {
+      ii = ilist[jj];
+      if(atom->mask[ii] & groupbit) d[ii] = p[ii] + beta * d[ii];
+    }
+  }
+
+  if (i >= imax && comm->me == 0) {
+    char str[128];
+    sprintf(str,"Fix qeq/reax CG convergence failed after %d iterations "
+            "at " BIGINT_FORMAT " step",i,update->ntimestep);
+    error->warning(FLERR,str);
+  }
+
+  return i;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixQEqReaxOMP::sparse_matvec( sparse_matrix *A, double *x, double *b )
+{
+#pragma omp parallel default(shared)
+  {
+    int i, j, itr_j;
+    int nn, NN, ii;
+    int *ilist;
+    int nthreads = comm->nthreads;
+    int tid = omp_get_thread_num();
+    
+    if(reaxc) {
+      nn = reaxc->list->inum;
+      NN = reaxc->list->inum + reaxc->list->gnum;
+      ilist = reaxc->list->ilist;
+    } else {
+      nn = list->inum;
+      NN = list->inum + list->gnum;
+      ilist = list->ilist;
+    }
+    
+#pragma omp for schedule(dynamic,50)
+    for (ii = 0; ii < nn; ++ii) {
+      i = ilist[ii];
+      if(atom->mask[i] & groupbit) b[i] = eta[ atom->type[i] ] * x[i];
+    }
+    
+#pragma omp for schedule(dynamic,50)
+    for (ii = nn; ii < NN; ++ii) {
+      i = ilist[ii];
+      if(atom->mask[i] & groupbit) b[i] = 0;
+    }
+    
+#pragma omp for schedule(dynamic,50)
+    for (i = 0; i < NN; ++i) 
+      for(int t=0; t<nthreads; t++) b_temp[t][i] = 0.0;
+
+    // Wait for b accumulated and b_temp zeroed.
+#pragma omp barrier
+
+#pragma omp for schedule(dynamic,50)
+    for (ii = 0; ii < nn; ++ii) {
+      i = ilist[ii];
+      if(atom->mask[i] & groupbit) {
+	for (itr_j=A->firstnbr[i]; itr_j<A->firstnbr[i]+A->numnbrs[i]; itr_j++) {
+	  j = A->jlist[itr_j];
+	  b[i] += A->val[itr_j] * x[j];
+
+	  b_temp[tid][j] += A->val[itr_j] * x[i];
+	}
+      }
+    }
+
+    // Wait till b_temp accumulated
+#pragma omp barrier
+
+#pragma omp for schedule(dynamic,50)
+    for (i = 0; i < NN; ++i)
+      for (int t = 0; t < nthreads; ++t) b[i] += b_temp[t][i];
+    
+  } //end omp parallel
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixQEqReaxOMP::calculate_Q()
+{
+  int i;
+  double *q = atom->q;
+
+  int nn;
+  int *ilist;
+
+  if (reaxc) {
+    nn = reaxc->list->inum;
+    ilist = reaxc->list->ilist;
+  } else {
+    nn = list->inum;
+    ilist = list->ilist;
+  }
+
+  double tmp1, tmp2;
+  tmp1 = tmp2 = 0.0;
+#pragma omp parallel for schedule(dynamic,50) private(i) reduction(+:tmp1,tmp2)
+  for(int ii = 0; ii < nn; ii++) {
+    i = ilist[ii];
+    if(atom->mask[i] & groupbit) {
+      tmp1 += s[i];
+      tmp2 += t[i];
+    }
+  }
+
+  double my_buf[2], buf[2];
+  buf[0] = 0.0;
+  buf[1] = 0.0;
+
+  my_buf[0] = tmp1;
+  my_buf[1] = tmp2;
+
+  MPI_Allreduce(&my_buf,&buf,2,MPI_DOUBLE,MPI_SUM,world);
+
+  double u = buf[0] / buf[1];
+
+#pragma omp parallel for schedule(static) private(i)
+  for (int ii = 0; ii < nn; ++ii) {
+    i = ilist[ii];
+    if(atom->mask[i] & groupbit) {
+      q[i] = s[i] - u * t[i];
+ 
+      // backup s & t
+      for (int k = 4; k > 0; --k) {
+	s_hist[i][k] = s_hist[i][k-1];
+	t_hist[i][k] = t_hist[i][k-1];
+      }
+      s_hist[i][0] = s[i];
+      t_hist[i][0] = t[i];
+    }
+  }
+
+  pack_flag = 4;
+  comm->forward_comm_fix( this ); //Dist_vector( atom->q );
+}
+
+/* ---------------------------------------------------------------------- */
+
+// double FixQEqReaxOMP::parallel_norm( double *v, int n )
+// {
+//   int i;
+//   double my_sum, norm_sqr;  
+
+//   int *ilist;
+
+//   if (reaxc) ilist = reaxc->list->ilist;
+//   else ilist = list->ilist;
+
+//   my_sum = 0.0;
+//   norm_sqr = 0.0;
+
+// #pragma omp parallel for schedule(static) private(i) reduction(+:my_sum)
+//   for (int ii = 0; ii < n; ++ii) {
+//     i = ilist[ii];
+//     if(atom->mask[i] & groupbit) my_sum += SQR( v[i] );
+//   }
+
+//   MPI_Allreduce( &my_sum, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
+
+//   return sqrt( norm_sqr );
+// }
+
+// /* ---------------------------------------------------------------------- */
+
+// double FixQEqReaxOMP::parallel_dot( double *v1, double *v2, int n )
+// {
+//   int  i;
+//   double my_dot, res;
+
+//   int *ilist;
+
+//   if (reaxc) ilist = reaxc->list->ilist;
+//   else ilist = list->ilist;
+
+//   my_dot = 0.0;
+//   res = 0.0;
+
+// #pragma omp parallel for schedule(static) private(i) reduction(+:my_dot)
+//   for (int ii = 0; ii < n; ++ii) {
+//     i = ilist[ii];
+//     if(atom->mask[i] & groupbit) my_dot += v1[i] * v2[i];
+//   }
+
+//   MPI_Allreduce( &my_dot, &res, 1, MPI_DOUBLE, MPI_SUM, world );
+
+//   return res;
+// }
+
+// /* ---------------------------------------------------------------------- */
+
+// double FixQEqReaxOMP::parallel_vector_acc( double *v, int n )
+// {
+//   int  i;
+//   double my_acc, res;
+
+//   int *ilist;
+
+//   if (reaxc) ilist = reaxc->list->ilist;
+//   else ilist = list->ilist;
+
+//   my_acc = 0.0;
+//   res = 0.0;
+
+// #pragma omp parallel for schedule(static) private(i) reduction(+:my_acc)
+//   for (int ii = 0; ii < n; ++ii) {
+//     i = ilist[ii];
+//     if(atom->mask[i] & groupbit) my_acc += v[i];
+//   }
+
+//   MPI_Allreduce( &my_acc, &res, 1, MPI_DOUBLE, MPI_SUM, world );
+
+//   return res;
+// }
+
+/* ---------------------------------------------------------------------- */
+
+void FixQEqReaxOMP::vector_sum( double* dest, double c, double* v,
+                                double d, double* y, int k )
+{
+  int i;
+  int *ilist;
+
+  if (reaxc) ilist = reaxc->list->ilist;
+  else ilist = list->ilist;
+
+#pragma omp parallel for schedule(static) private(i)
+  for (int ii=0; ii<k; ii++) {
+    i = ilist[ii];
+    if(atom->mask[i] & groupbit) dest[i] = c * v[i] + d * y[i];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixQEqReaxOMP::vector_add( double* dest, double c, double* v, int k )
+{  
+  int i;
+  int *ilist;
+
+  if (reaxc) ilist = reaxc->list->ilist;
+  else ilist = list->ilist;
+
+#pragma omp parallel for schedule(static) private(i)
+  for (int ii=0; ii<k; ii++) {
+    i = ilist[ii];
+    if(atom->mask[i] & groupbit) dest[i] += c * v[i];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* ---------------------------------------------------------------------- */
+/* dual CG support                                                        */
+/* ---------------------------------------------------------------------- */
+
+int FixQEqReaxOMP::dual_CG( double *b1, double *b2, double *x1, double *x2 )
+{
+
+#ifdef OMP_TIMING
+  double endTimeBase, startTimeBase;
+  startTimeBase = MPI_Wtime();
+#endif
+
+  int  i, j, imax;
+  double tmp, alpha_s, alpha_t, beta_s, beta_t, b_norm_s, b_norm_t;
+  double sig_old_s, sig_old_t, sig_new_s, sig_new_t;
+
+  double my_buf[4], buf[4];
+
+  int nn, ii, jj;
+  int *ilist;
+  if (reaxc) {
+    nn = reaxc->list->inum;
+    ilist = reaxc->list->ilist;
+  } else {
+    nn = list->inum;
+    ilist = list->ilist;
+  }
+
+  imax = 200;
+
+  pack_flag = 5; // forward 2x d and reverse 2x q
+  dual_sparse_matvec( &H, x1, x2, q );
+  comm->reverse_comm_fix( this ); //Coll_Vector( q );
+
+  double tmp1, tmp2, tmp3, tmp4;
+  tmp1 = tmp2 = tmp3 = tmp4 = 0.0;
+
+#pragma omp parallel for schedule(dynamic,50) private(i) reduction(+:tmp1,tmp2,tmp3,tmp4)
+  for( jj = 0; jj < nn; ++jj ) {
+    i = ilist[jj];
+    if (atom->mask[i] & groupbit) {
+      int indxI = 2 * i;
+      r[indxI  ] = b1[i] - q[indxI  ];
+      r[indxI+1] = b2[i] - q[indxI+1];
+      
+      d[indxI  ] = r[indxI  ] * Hdia_inv[i]; //pre-condition
+      d[indxI+1] = r[indxI+1] * Hdia_inv[i];
+      
+      tmp1 += b1[i] * b1[i];
+      tmp2 += b2[i] * b2[i];
+      
+      tmp3 += r[indxI  ] * d[indxI  ];
+      tmp4 += r[indxI+1] * d[indxI+1];
+    }
+  }
+  
+  my_buf[0] = tmp1;
+  my_buf[1] = tmp2;
+  my_buf[2] = tmp3;
+  my_buf[3] = tmp4;
+
+  MPI_Allreduce(&my_buf, &buf, 4, MPI_DOUBLE, MPI_SUM, world);
+
+  b_norm_s = sqrt(buf[0]);
+  b_norm_t = sqrt(buf[1]);
+
+  sig_new_s = buf[2];
+  sig_new_t = buf[3];
+
+  for( i = 1; i < imax; ++i ) {
+    comm->forward_comm_fix(this); //Dist_vector( d );
+    dual_sparse_matvec( &H, d, q );
+    comm->reverse_comm_fix(this); //Coll_vector( q );
+
+    tmp1 = tmp2 = 0.0;
+#pragma omp parallel
+    {
+    
+#pragma omp for schedule(dynamic,50) private(ii) reduction(+:tmp1,tmp2)
+      for( jj = 0; jj < nn; jj++) {
+	ii = ilist[jj];
+	if(atom->mask[ii] & groupbit) {
+	  int indxI = 2 * ii;
+	  tmp1 += d[indxI  ] * q[indxI  ];
+	  tmp2 += d[indxI+1] * q[indxI+1];
+	}
+      }
+
+#pragma omp barrier
+#pragma omp master
+      {
+	my_buf[0] = tmp1;
+	my_buf[1] = tmp2;
+	
+	MPI_Allreduce(&my_buf, &buf, 2, MPI_DOUBLE, MPI_SUM, world);
+	
+	alpha_s = sig_new_s / buf[0];
+	alpha_t = sig_new_t / buf[1];
+
+	tmp1 = tmp2 = 0.0;
+      }
+
+#pragma omp barrier
+#pragma omp for schedule(dynamic,50) private(ii) reduction(+:tmp1,tmp2)
+      for( jj = 0; jj < nn; jj++) {
+	ii = ilist[jj];
+	if(atom->mask[ii] & groupbit) {
+	  int indxI = 2 * ii;
+	  x1[ii] += alpha_s * d[indxI  ];
+	  x2[ii] += alpha_t * d[indxI+1];
+	  
+	  r[indxI  ] -= alpha_s * q[indxI  ];
+	  r[indxI+1] -= alpha_t * q[indxI+1];
+	  
+	  // pre-conditioning
+	  p[indxI  ] = r[indxI  ] * Hdia_inv[ii];
+	  p[indxI+1] = r[indxI+1] * Hdia_inv[ii];
+	  
+	  tmp1 += r[indxI  ] * p[indxI  ];
+	  tmp2 += r[indxI+1] * p[indxI+1];
+	}
+      }
+    } // omp parallel
+
+    my_buf[0] = tmp1;
+    my_buf[1] = tmp2;
+    
+    sig_old_s = sig_new_s;
+    sig_old_t = sig_new_t;
+    
+    MPI_Allreduce(&my_buf, &buf, 2, MPI_DOUBLE, MPI_SUM, world);
+    
+    sig_new_s = buf[0];
+    sig_new_t = buf[1];
+
+    if( sqrt(sig_new_s)/b_norm_s <= tolerance || sqrt(sig_new_t)/b_norm_t <= tolerance) break;
+      
+    beta_s = sig_new_s / sig_old_s;
+    beta_t = sig_new_t / sig_old_t;
+      
+#pragma omp for schedule(dynamic,50) private(ii)
+    for( jj = 0; jj < nn; jj++) {
+      ii = ilist[jj];
+      if(atom->mask[ii] & groupbit) {
+	int indxI = 2 * ii;
+	
+	d[indxI  ] = p[indxI  ] + beta_s * d[indxI  ];
+	d[indxI+1] = p[indxI+1] + beta_t * d[indxI+1];
+      }
+    }
+  }
+
+  i++;
+  matvecs_s = matvecs_t = i; // The plus one makes consistent with count from CG()
+  matvecs = i;
+  
+  // Timing info for iterating s&t together
+#ifdef OMP_TIMING
+  endTimeBase = MPI_Wtime();
+  ompTimingData[COMPUTECG1INDEX] += (endTimeBase-startTimeBase);
+  ompTimingCount[COMPUTECG1INDEX]++;
+  ompTimingCGCount[COMPUTECG1INDEX]+= i;
+  startTimeBase = endTimeBase;
+#endif
+
+  // If necessary, converge other system
+  if(sqrt(sig_new_s)/b_norm_s > tolerance) {
+    pack_flag = 2;
+    comm->forward_comm_fix(this); // x1 => s
+
+    i+= CG(b1, x1);
+    matvecs_s = i;
+  }
+  else if(sqrt(sig_new_t)/b_norm_t > tolerance) {
+    pack_flag = 3;
+    comm->forward_comm_fix(this); // x2 => t
+
+    i+= CG(b2, x2);
+    matvecs_t = i;
+  }
+
+  // Timing info for remainder of s or t
+#ifdef OMP_TIMING
+    endTimeBase = MPI_Wtime();
+    ompTimingData[COMPUTECG2INDEX] += (endTimeBase-startTimeBase);
+    ompTimingCount[COMPUTECG2INDEX]++;
+    ompTimingCGCount[COMPUTECG2INDEX]+= i - matvecs;
+    startTimeBase = endTimeBase;
+#endif
+
+  if ( i >= imax && comm->me == 0) {
+    char str[128];
+    sprintf(str,"Fix qeq/reax CG convergence failed after %d iterations "
+            "at " BIGINT_FORMAT " step",i,update->ntimestep);
+    error->warning(FLERR,str);
+  }
+
+  return i;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixQEqReaxOMP::dual_sparse_matvec( sparse_matrix *A, double *x1, double *x2, double *b )
+{
+#pragma omp parallel default(shared)
+  {
+    int i, j, itr_j;
+    int nn, NN, ii;
+    int *ilist;
+    int indxI, indxJ;
+
+    int nthreads = comm->nthreads;
+    int tid = omp_get_thread_num();
+    
+    if (reaxc) {
+      nn = reaxc->list->inum;
+      NN = reaxc->list->inum + reaxc->list->gnum;
+      ilist = reaxc->list->ilist;
+    } else {
+      nn = list->inum;
+      NN = list->inum + list->gnum;
+      ilist = list->ilist;
+    }
+    
+#pragma omp for schedule(dynamic,50)
+    for( ii = 0; ii < nn; ++ii ) {
+      i = ilist[ii];
+      if (atom->mask[i] & groupbit) {
+	indxI = 2 * i;
+	b[indxI  ] = eta[ atom->type[i] ] * x1[i];
+	b[indxI+1] = eta[ atom->type[i] ] * x2[i];
+      }
+    }
+    
+#pragma omp for schedule(dynamic,50)
+    for( ii = nn; ii < NN; ++ii ) {
+      i = ilist[ii];
+      if (atom->mask[i] & groupbit) {
+	indxI = 2 * i;
+	b[indxI]   = 0;
+	b[indxI+1] = 0;
+      }
+    }
+
+#pragma omp for schedule(dynamic,50)
+    for (i = 0; i < NN; ++i) {
+      indxI = 2 * i;
+      for(int t=0; t<nthreads; t++) {
+	b_temp[t][indxI  ] = 0.0;
+	b_temp[t][indxI+1] = 0.0;
+      }
+    }
+    
+    // Wait for b accumulated and b_temp zeroed
+#pragma omp barrier
+#pragma omp for schedule(dynamic,50)
+    for( ii = 0; ii < nn; ++ii ) {
+      i = ilist[ii];
+      if (atom->mask[i] & groupbit) {
+	indxI = 2 * i;
+	for( itr_j=A->firstnbr[i]; itr_j<A->firstnbr[i]+A->numnbrs[i]; itr_j++) {
+	  j = A->jlist[itr_j];
+	  indxJ = 2 * j;
+	  b[indxI  ] += A->val[itr_j] * x1[j];
+	  b[indxI+1] += A->val[itr_j] * x2[j];
+
+	  b_temp[tid][indxJ  ] += A->val[itr_j] * x1[i];
+	  b_temp[tid][indxJ+1] += A->val[itr_j] * x2[i];
+	}
+      }
+    }
+
+    // Wait till b_temp accumulated
+#pragma omp barrier
+#pragma omp for schedule(dynamic,50)
+    for (i = 0; i < NN; ++i) {
+      indxI = 2 * i;
+      for (int t = 0; t < nthreads; ++t) {
+	b[indxI  ] += b_temp[t][indxI  ];
+	b[indxI+1] += b_temp[t][indxI+1];
+      }
+    }
+
+  } // omp parallel
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixQEqReaxOMP::dual_sparse_matvec( sparse_matrix *A, double *x, double *b )
+{
+#pragma omp parallel default(shared)
+  {
+    int i, j, itr_j;
+    int nn, NN, ii;
+    int *ilist;
+    int indxI, indxJ;
+
+    int nthreads = comm->nthreads;
+    int tid = omp_get_thread_num();
+    
+    if (reaxc) {
+      nn = reaxc->list->inum;
+      NN = reaxc->list->inum + reaxc->list->gnum;
+      ilist = reaxc->list->ilist;
+    } else {
+      nn = list->inum;
+      NN = list->inum + list->gnum;
+      ilist = list->ilist;
+    }
+    
+#pragma omp for schedule(dynamic,50)
+    for( ii = 0; ii < nn; ++ii ) {
+      i = ilist[ii];
+      if (atom->mask[i] & groupbit) {
+	indxI = 2 * i;
+	b[indxI  ] = eta[ atom->type[i] ] * x[indxI  ];
+	b[indxI+1] = eta[ atom->type[i] ] * x[indxI+1];
+      }
+    }
+    
+#pragma omp for schedule(dynamic,50)
+    for( ii = nn; ii < NN; ++ii ) {
+      i = ilist[ii];
+      if (atom->mask[i] & groupbit) {
+	indxI = 2 * i;
+	b[indxI]   = 0;
+	b[indxI+1] = 0;
+      }
+    }
+
+#pragma omp for schedule(dynamic,50)
+    for (i = 0; i < NN; ++i) {
+      indxI = 2 * i;
+      for(int t=0; t<nthreads; t++) {
+	b_temp[t][indxI  ] = 0.0;
+	b_temp[t][indxI+1] = 0.0;
+      }
+    }
+    
+    // Wait for b accumulated and b_temp zeroed
+#pragma omp barrier
+#pragma omp for schedule(dynamic,50)
+    for( ii = 0; ii < nn; ++ii ) {
+      i = ilist[ii];
+      if (atom->mask[i] & groupbit) {
+	indxI = 2 * i;
+	for( itr_j=A->firstnbr[i]; itr_j<A->firstnbr[i]+A->numnbrs[i]; itr_j++) {
+	  j = A->jlist[itr_j];
+	  indxJ = 2 * j;
+	  b[indxI  ] += A->val[itr_j] * x[indxJ  ];
+	  b[indxI+1] += A->val[itr_j] * x[indxJ+1];
+
+	  b_temp[tid][indxJ  ] += A->val[itr_j] * x[indxI  ];
+	  b_temp[tid][indxJ+1] += A->val[itr_j] * x[indxI+1];
+	}
+      }
+    }
+
+    // Wait till b_temp accumulated
+#pragma omp barrier
+#pragma omp for schedule(dynamic,50)
+    for (i = 0; i < NN; ++i) {
+      indxI = 2 * i;
+      for (int t = 0; t < nthreads; ++t) {
+	b[indxI  ] += b_temp[t][indxI  ];
+	b[indxI+1] += b_temp[t][indxI+1];
+      }
+    }
+
+  } // omp parallel
+}
diff --git a/src/USER-OMP/fix_qeq_reax_omp.h b/src/USER-OMP/fix_qeq_reax_omp.h
new file mode 100644
index 0000000000..c06185ca2e
--- /dev/null
+++ b/src/USER-OMP/fix_qeq_reax_omp.h
@@ -0,0 +1,84 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Hasan Metin Aktulga, Purdue University
+   (now at Lawrence Berkeley National Laboratory, hmaktulga@lbl.gov)
+
+   Please cite the related publication:
+   H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
+   "Parallel Reactive Molecular Dynamics: Numerical Methods and
+   Algorithmic Techniques", Parallel Computing, in press.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(qeq/reax/omp,FixQEqReaxOMP)
+
+#else
+
+#ifndef LMP_FIX_QEQ_REAX_OMP_H
+#define LMP_FIX_QEQ_REAX_OMP_H
+
+#include "fix_qeq_reax.h"
+
+namespace LAMMPS_NS {
+
+class FixQEqReaxOMP : public FixQEqReax {
+
+ public:
+  FixQEqReaxOMP(class LAMMPS *, int, char **);
+  ~FixQEqReaxOMP();
+  virtual void init();
+  virtual void init_storage();
+  virtual void pre_force(int);
+  virtual void post_constructor();
+  
+ protected:
+  double **b_temp;
+
+  class PairReaxCOMP *reaxc;
+
+  int do_aspc;
+  int aspc_order, aspc_order_max;
+  double aspc_omega;
+  double * aspc_b;
+  
+  virtual void pertype_parameters(char*);
+  virtual void allocate_storage();
+  virtual void deallocate_storage();
+  virtual void allocate_matrix();
+  virtual void init_matvec();
+  virtual void compute_H();
+
+  virtual int CG(double*,double*);
+  virtual void sparse_matvec(sparse_matrix*,double*,double*);
+  virtual void calculate_Q();
+  
+  /* virtual double parallel_norm( double*, int ); */
+  /* virtual double parallel_dot( double*, double*, int ); */
+  /* virtual double parallel_vector_acc( double*, int ); */
+
+  virtual void vector_sum(double*,double,double*,double,double*,int);
+  virtual void vector_add(double*, double, double*,int);
+
+  // dual CG support  
+  virtual int dual_CG(double*,double*,double*,double*);
+  virtual void dual_sparse_matvec(sparse_matrix*,double*,double*,double*);
+  virtual void dual_sparse_matvec(sparse_matrix*,double*,double*);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/fix_reaxc_species_omp.cpp b/src/USER-OMP/fix_reaxc_species_omp.cpp
new file mode 100644
index 0000000000..e9ac388252
--- /dev/null
+++ b/src/USER-OMP/fix_reaxc_species_omp.cpp
@@ -0,0 +1,83 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Ray Shan (Sandia, tnshan@sandia.gov)
+   			 Oleg Sergeev (VNIIA, sergeev@vniia.ru)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "stdlib.h"
+#include "math.h"
+#include "atom.h"
+#include "string.h"
+#include "fix_ave_atom.h"
+#include "fix_reaxc_species_omp.h"
+#include "domain.h"
+#include "update.h"
+#include "pair_reaxc_omp.h"
+#include "modify.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "neigh_request.h"
+#include "comm.h"
+#include "force.h"
+#include "compute.h"
+#include "input.h"
+#include "variable.h"
+#include "memory.h"
+#include "error.h"
+#include "reaxc_list.h"
+
+using namespace LAMMPS_NS;
+using namespace FixConst;
+
+/* ---------------------------------------------------------------------- */
+
+FixReaxCSpeciesOMP::FixReaxCSpeciesOMP(LAMMPS *lmp, int narg, char **arg) :
+  FixReaxCSpecies(lmp, narg, arg)
+{
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixReaxCSpeciesOMP::init()
+{
+  if (atom->tag_enable == 0)
+    error->all(FLERR,"Cannot use fix reax/c/species unless atoms have IDs");
+
+  reaxc = (PairReaxCOMP *) force->pair_match("reax/c/omp",1);
+  if (reaxc == NULL) error->all(FLERR,"Cannot use fix reax/c/species/omp without "
+		  "pair_style reax/c/omp");
+
+  reaxc->fixspecies_flag = 1;
+  nvalid = update->ntimestep+nfreq;
+
+  // check if this fix has been called twice
+  int count = 0;
+  for (int i = 0; i < modify->nfix; i++)
+    if (strcmp(modify->fix[i]->style,"reax/c/species/omp") == 0) count++;
+  if (count > 1 && comm->me == 0)
+    error->warning(FLERR,"More than one fix reax/c/species");
+
+  if (!setupflag) {
+    // create a compute to store properties
+    create_compute();
+
+    // create a fix to point to fix_ave_atom for averaging stored properties
+    create_fix();
+
+    setupflag = 1;
+  }
+
+}
diff --git a/src/USER-OMP/fix_reaxc_species_omp.h b/src/USER-OMP/fix_reaxc_species_omp.h
new file mode 100644
index 0000000000..006bb3e87a
--- /dev/null
+++ b/src/USER-OMP/fix_reaxc_species_omp.h
@@ -0,0 +1,44 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(reax/c/species/omp,FixReaxCSpeciesOMP)
+
+#else
+
+#ifndef LMP_FIX_REAXC_SPECIES_OMP_H
+#define LMP_FIX_REAXC_SPECIES_OMP_H
+
+#include "pair_reaxc_omp.h"
+#include "fix_reaxc_species.h"
+
+#define BUFLEN 1000
+
+namespace LAMMPS_NS {
+  
+  class FixReaxCSpeciesOMP : public FixReaxCSpecies {
+    
+  public:
+    FixReaxCSpeciesOMP(class LAMMPS *, int, char **);
+    ~FixReaxCSpeciesOMP(){};
+    virtual void init();
+    
+  private:
+    class PairReaxCOMP *reaxc;
+    
+  };
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_reaxc_omp.cpp b/src/USER-OMP/pair_reaxc_omp.cpp
new file mode 100644
index 0000000000..c7a6c27f48
--- /dev/null
+++ b/src/USER-OMP/pair_reaxc_omp.cpp
@@ -0,0 +1,584 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Hasan Metin Aktulga, Purdue University
+   (now at Lawrence Berkeley National Laboratory, hmaktulga@lbl.gov)
+   Per-atom energy/virial added by Ray Shan (Sandia)
+   Fix reax/c/bonds and fix reax/c/species for pair_style reax/c added by
+   	Ray Shan (Sandia)
+------------------------------------------------------------------------- */
+
+#include "pair_reaxc_omp.h"
+#include "atom.h"
+#include "update.h"
+#include "force.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "neigh_request.h"
+#include "modify.h"
+#include "fix.h"
+#include "fix_reaxc.h"
+#include "citeme.h"
+#include "memory.h"
+#include "error.h"
+
+#include "reaxc_types.h"
+#include "reaxc_allocate.h"
+#include "reaxc_control.h"
+#include "reaxc_ffield.h"
+#include "reaxc_forces_omp.h"
+#include "reaxc_init_md_omp.h"
+#include "reaxc_io_tools.h"
+#include "reaxc_list.h"
+#include "reaxc_lookup.h"
+#include "reaxc_reset_tools.h"
+#include "reaxc_tool_box.h"
+#include "reaxc_traj.h"
+#include "reaxc_vector.h"
+#include "fix_reaxc_bonds.h"
+
+using namespace LAMMPS_NS;
+
+#ifdef OMP_TIMING
+double ompTimingData[LASTTIMINGINDEX];
+int ompTimingCount[LASTTIMINGINDEX];
+int ompTimingCGCount[LASTTIMINGINDEX];
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+PairReaxCOMP::PairReaxCOMP(LAMMPS *lmp) : PairReaxC(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  suffix_flag |= Suffix::OMP;
+  system->pair_ptr = this;
+
+  num_nbrs_offset = NULL;
+
+#ifdef OMP_TIMING
+  for (int i=0;i<LASTTIMINGINDEX;i++) {
+    ompTimingData[i] = 0;
+    ompTimingCount[i] = 0;
+    ompTimingCGCount[i] = 0;
+  }
+#endif
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairReaxCOMP::~PairReaxCOMP()
+{
+  reax_list * bonds = lists+BONDS;
+  for (int i=0; i<bonds->num_intrs; ++i)
+    sfree(bonds->select.bond_list[i].bo_data.CdboReduction, "CdboReduction");
+
+  memory->destroy(num_nbrs_offset);
+
+#ifdef OMP_TIMING
+  int myrank;
+
+  MPI_Comm_rank(mpi_data->world,&myrank);
+  
+  // Write screen output
+  if (myrank == 0 && screen) {
+    fprintf(screen,"\n\nWrite_Lists    took %11.3lf seconds", ompTimingData[COMPUTEWLINDEX]);
+
+    fprintf(screen,"\n\nCompute_Forces took %11.3lf seconds:", ompTimingData[COMPUTEINDEX]);
+    fprintf(screen,"\n ->Initial Forces: %11.3lf seconds", ompTimingData[COMPUTEIFINDEX]);
+    fprintf(screen,"\n ->Bond Order:     %11.3lf seconds", ompTimingData[COMPUTEBOINDEX]);
+    fprintf(screen,"\n ->Atom Energy:    %11.3lf seconds", ompTimingData[COMPUTEATOMENERGYINDEX]);
+    fprintf(screen,"\n ->Bond:           %11.3lf seconds", ompTimingData[COMPUTEBONDSINDEX]); 
+    fprintf(screen,"\n ->Hydrogen bonds: %11.3lf seconds", ompTimingData[COMPUTEHBONDSINDEX]);
+    fprintf(screen,"\n ->Torsion Angles: %11.3lf seconds", ompTimingData[COMPUTETORSIONANGLESBOINDEX]);
+    fprintf(screen,"\n ->Valence Angles: %11.3lf seconds", ompTimingData[COMPUTEVALENCEANGLESBOINDEX]);
+    fprintf(screen,"\n ->Non-Bonded For: %11.3lf seconds", ompTimingData[COMPUTENBFINDEX]); 
+    fprintf(screen,"\n ->Total Forces:   %11.3lf seconds", ompTimingData[COMPUTETFINDEX]);
+
+    fprintf(screen,"\n\nfixQEQ:          %11.3lf seconds", ompTimingData[COMPUTEQEQINDEX]);
+    fprintf(screen,"\n ->QEQ init:       %11.3lf seconds", ompTimingData[COMPUTEINITMVINDEX]);
+
+    double avg = double(ompTimingCGCount[COMPUTECG1INDEX]) / double(ompTimingCount[COMPUTECG1INDEX]);
+    fprintf(screen,"\n ->QEQ CG1:        %11.3lf seconds with %4.1lf iterations on average.", ompTimingData[COMPUTECG1INDEX], avg);
+
+    avg = double(ompTimingCGCount[COMPUTECG2INDEX]) / double(ompTimingCount[COMPUTECG2INDEX]);
+    fprintf(screen,"\n ->QEQ CG2:        %11.3lf seconds with %4.1lf iterations on average.", ompTimingData[COMPUTECG2INDEX], avg);
+    fprintf(screen,"\n ->QEQ CalcQ:      %11.3lf seconds\n", ompTimingData[COMPUTECALCQINDEX]);
+  }
+
+  // Write logfile output
+  if (myrank == 0 && logfile) {
+    fprintf(logfile,"\n\nWrite_Lists    took %11.3lf seconds", ompTimingData[COMPUTEWLINDEX]);
+
+    fprintf(logfile,"\n\nCompute_Forces took %11.3lf seconds:", ompTimingData[COMPUTEINDEX]);
+    fprintf(logfile,"\n ->Initial Forces: %11.3lf seconds", ompTimingData[COMPUTEIFINDEX]);
+    fprintf(logfile,"\n ->Bond Order:     %11.3lf seconds", ompTimingData[COMPUTEBOINDEX]);
+    fprintf(logfile,"\n ->Atom Energy:    %11.3lf seconds", ompTimingData[COMPUTEATOMENERGYINDEX]);
+    fprintf(logfile,"\n ->Bond:           %11.3lf seconds", ompTimingData[COMPUTEBONDSINDEX]); 
+    fprintf(logfile,"\n ->Hydrogen bonds: %11.3lf seconds", ompTimingData[COMPUTEHBONDSINDEX]);
+    fprintf(logfile,"\n ->Torsion Angles: %11.3lf seconds", ompTimingData[COMPUTETORSIONANGLESBOINDEX]);
+    fprintf(logfile,"\n ->Valence Angles: %11.3lf seconds", ompTimingData[COMPUTEVALENCEANGLESBOINDEX]);
+    fprintf(logfile,"\n ->Non-Bonded For: %11.3lf seconds", ompTimingData[COMPUTENBFINDEX]); 
+    fprintf(logfile,"\n ->Total Forces:   %11.3lf seconds", ompTimingData[COMPUTETFINDEX]);
+
+    fprintf(logfile,"\n\nfixQEQ:          %11.3lf seconds", ompTimingData[COMPUTEQEQINDEX]);
+    fprintf(logfile,"\n ->QEQ init:       %11.3lf seconds", ompTimingData[COMPUTEINITMVINDEX]);
+
+    double avg = double(ompTimingCGCount[COMPUTECG1INDEX]) / double(ompTimingCount[COMPUTECG1INDEX]);
+    fprintf(logfile,"\n ->QEQ CG1:        %11.3lf seconds with %4.1lf iterations on average.", ompTimingData[COMPUTECG1INDEX], avg);
+
+    avg = double(ompTimingCGCount[COMPUTECG2INDEX]) / double(ompTimingCount[COMPUTECG2INDEX]);
+    fprintf(logfile,"\n ->QEQ CG2:        %11.3lf seconds with %4.1lf iterations on average.", ompTimingData[COMPUTECG2INDEX], avg);
+    fprintf(logfile,"\n ->QEQ CalcQ:      %11.3lf seconds\n", ompTimingData[COMPUTECALCQINDEX]);
+  }
+#endif
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairReaxCOMP::compute(int eflag, int vflag)
+{
+  double evdwl,ecoul;
+  double t_start, t_end;
+  
+  // communicate num_bonds once every reneighboring
+  // 2 num arrays stored by fix, grab ptr to them
+  
+  if (neighbor->ago == 0) comm->forward_comm_fix(fix_reax);
+  int *num_bonds = fix_reax->num_bonds;
+  int *num_hbonds = fix_reax->num_hbonds;
+  
+  evdwl = ecoul = 0.0;
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else ev_unset();
+  
+  if (vflag_global) control->virial = 1;
+  else control->virial = 0;
+  
+  system->n = atom->nlocal; // my atoms
+  system->N = atom->nlocal + atom->nghost; // mine + ghosts
+  system->bigN = static_cast<int> (atom->natoms);  // all atoms in the system
+  
+  system->big_box.V = 0;
+  system->big_box.box_norms[0] = 0;
+  system->big_box.box_norms[1] = 0;
+  system->big_box.box_norms[2] = 0;
+  if( comm->me == 0 ) t_start = MPI_Wtime();
+  // setup data structures
+  
+  setup();
+  
+  Reset( system, control, data, workspace, &lists, world );
+  
+  // Why not update workspace like in MPI-only code?
+  // Using the MPI-only way messes up the hb energy 
+  //workspace->realloc.num_far = write_reax_lists();
+  write_reax_lists();
+  
+  // timing for filling in the reax lists
+  if( comm->me == 0 ) {
+    t_end = MPI_Wtime();
+    data->timing.nbrs = t_end - t_start;
+  }
+
+  // forces
+  
+#ifdef OMP_TIMING
+  double startTimeBase,endTimeBase;
+  startTimeBase = MPI_Wtime();
+#endif
+
+  Compute_ForcesOMP(system,control,data,workspace,&lists,out_control,mpi_data);
+  read_reax_forces(vflag);
+  
+#ifdef OMP_TIMING
+  endTimeBase = MPI_Wtime();
+  ompTimingData[COMPUTEINDEX] += (endTimeBase-startTimeBase);
+#endif
+  
+#pragma omp parallel for schedule(static)
+  for(int k = 0; k < system->N; ++k) {
+    num_bonds[k] = system->my_atoms[k].num_bonds;
+    num_hbonds[k] = system->my_atoms[k].num_hbonds;
+  }
+  
+  // energies and pressure
+  
+  if (eflag_global) {
+    evdwl += data->my_en.e_bond;
+    evdwl += data->my_en.e_ov;
+    evdwl += data->my_en.e_un;
+    evdwl += data->my_en.e_lp;
+    evdwl += data->my_en.e_ang;
+    evdwl += data->my_en.e_pen;
+    evdwl += data->my_en.e_coa;
+    evdwl += data->my_en.e_hb;
+    evdwl += data->my_en.e_tor;
+    evdwl += data->my_en.e_con;
+    evdwl += data->my_en.e_vdW;
+    
+    ecoul += data->my_en.e_ele;
+    ecoul += data->my_en.e_pol;
+    
+    // Store the different parts of the energy
+    // in a list for output by compute pair command
+    
+    pvector[0] = data->my_en.e_bond;
+    pvector[1] = data->my_en.e_ov + data->my_en.e_un;
+    pvector[2] = data->my_en.e_lp;
+    pvector[3] = 0.0;
+    pvector[4] = data->my_en.e_ang;
+    pvector[5] = data->my_en.e_pen;
+    pvector[6] = data->my_en.e_coa;
+    pvector[7] = data->my_en.e_hb;
+    pvector[8] = data->my_en.e_tor;
+    pvector[9] = data->my_en.e_con;
+    pvector[10] = data->my_en.e_vdW;
+    pvector[11] = data->my_en.e_ele;
+    pvector[12] = 0.0;
+    pvector[13] = data->my_en.e_pol;
+  }
+
+  if (vflag_fdotr) virial_fdotr_compute();
+  
+// Set internal timestep counter to that of LAMMPS
+  
+  data->step = update->ntimestep;
+
+  Output_Results( system, control, data, &lists, out_control, mpi_data );
+  
+  // populate tmpid and tmpbo arrays for fix reax/c/species
+  int i, j;
+  
+  if(fixspecies_flag) {
+    if (system->N > nmax) {
+      memory->destroy(tmpid);
+      memory->destroy(tmpbo);
+      nmax = system->N;
+      memory->create(tmpid,nmax,MAXSPECBOND,"pair:tmpid");
+      memory->create(tmpbo,nmax,MAXSPECBOND,"pair:tmpbo");
+    }
+    
+#pragma omp parallel for collapse(2) schedule(static) default(shared)
+    for (i = 0; i < system->N; i ++)
+      for (j = 0; j < MAXSPECBOND; j ++) {
+        tmpbo[i][j] = 0.0;
+	tmpid[i][j] = 0;
+      }
+    
+    FindBond();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairReaxCOMP::init_style( )
+{
+  if (!atom->q_flag) 
+    error->all(FLERR,"Pair reax/c/omp requires atom attribute q");
+
+  // firstwarn = 1;
+
+  int iqeq = modify->find_fix_by_style("qeq/reax/omp");
+  if (iqeq < 0 && qeqflag == 1)
+    error->all(FLERR,"Pair reax/c/omp requires use of fix qeq/reax/omp");
+
+  system->n = atom->nlocal; // my atoms
+  system->N = atom->nlocal + atom->nghost; // mine + ghosts
+  system->bigN = static_cast<int> (atom->natoms);  // all atoms in the system
+  system->wsize = comm->nprocs;
+
+  system->big_box.V = 0;
+  system->big_box.box_norms[0] = 0;
+  system->big_box.box_norms[1] = 0;
+  system->big_box.box_norms[2] = 0;
+
+  if (atom->tag_enable == 0)
+    error->all(FLERR,"Pair style reax/c/omp requires atom IDs");
+  if (force->newton_pair == 0)
+    error->all(FLERR,"Pair style reax/c/omp requires newton pair on");
+
+  // need a half neighbor list w/ Newton off and ghost neighbors
+  // built whenever re-neighboring occurs
+
+  int irequest = neighbor->request(this,instance_me);
+  neighbor->requests[irequest]->newton = 2;
+  neighbor->requests[irequest]->ghost = 1;
+
+  cutmax = MAX3(control->nonb_cut, control->hbond_cut, 2*control->bond_cut);
+
+  for( int i = 0; i < LIST_N; ++i )
+    lists[i].allocated = 0;
+
+  if (fix_reax == NULL) {
+    char **fixarg = new char*[3];
+    fixarg[0] = (char *) "REAXC";
+    fixarg[1] = (char *) "all";
+    fixarg[2] = (char *) "REAXC";
+    modify->add_fix(3,fixarg);
+    delete [] fixarg;
+    fix_reax = (FixReaxC *) modify->fix[modify->nfix-1];
+  }
+
+#pragma omp parallel 
+  { control->nthreads = omp_get_num_threads(); }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairReaxCOMP::setup( )
+{
+  int oldN;
+  int mincap = system->mincap;
+  double safezone = system->safezone;
+
+  system->n = atom->nlocal; // my atoms
+  system->N = atom->nlocal + atom->nghost; // mine + ghosts
+  oldN = system->N;
+  system->bigN = static_cast<int> (atom->natoms);  // all atoms in the system
+
+  if (system->N > nmax) {
+    memory->destroy(num_nbrs_offset);
+    // Don't update nmax here. It is updated at end of compute().
+    memory->create(num_nbrs_offset, system->N, "pair:num_nbrs_offset");
+  }
+
+  if (setup_flag == 0) {
+
+    setup_flag = 1;
+
+    int *num_bonds = fix_reax->num_bonds;
+    int *num_hbonds = fix_reax->num_hbonds;
+
+    control->vlist_cut = neighbor->cutneighmax;
+
+    // determine the local and total capacity
+
+    system->local_cap = MAX( (int)(system->n * safezone), mincap );
+    system->total_cap = MAX( (int)(system->N * safezone), mincap );
+
+    // initialize my data structures
+
+    PreAllocate_Space( system, control, workspace, world );
+    write_reax_atoms();
+
+    int num_nbrs = estimate_reax_lists();
+    if(!Make_List(system->total_cap, num_nbrs, TYP_FAR_NEIGHBOR,
+                  lists+FAR_NBRS, world))
+      error->all(FLERR,"Pair reax/c problem in far neighbor list");
+
+    write_reax_lists();
+
+    InitializeOMP( system, control, data, workspace, &lists, out_control,
+                mpi_data, world );
+
+    for( int k = 0; k < system->N; ++k ) {
+      num_bonds[k] = system->my_atoms[k].num_bonds;
+      num_hbonds[k] = system->my_atoms[k].num_hbonds;
+    }
+
+  } else {
+
+    // fill in reax datastructures
+
+    write_reax_atoms();
+
+    // reset the bond list info for new atoms
+
+    for(int k = oldN; k < system->N; ++k)
+      Set_End_Index( k, Start_Index( k, lists+BONDS ), lists+BONDS );
+    
+    // estimate far neighbor list size
+    // Not present in MPI-only version
+    workspace->realloc.num_far = estimate_reax_lists();
+    
+    // check if I need to shrink/extend my data-structs
+
+    ReAllocate( system, control, data, workspace, &lists, mpi_data );
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairReaxCOMP::write_reax_atoms()
+{
+  int *num_bonds = fix_reax->num_bonds;
+  int *num_hbonds = fix_reax->num_hbonds;
+  
+  if (system->N > system->total_cap)
+    error->all(FLERR,"Too many ghost atoms");
+  
+#pragma omp parallel for schedule(static) default(shared)
+  for( int i = 0; i < system->N; ++i ){
+    system->my_atoms[i].orig_id = atom->tag[i];
+    system->my_atoms[i].type = map[atom->type[i]];
+    system->my_atoms[i].x[0] = atom->x[i][0];
+    system->my_atoms[i].x[1] = atom->x[i][1];
+    system->my_atoms[i].x[2] = atom->x[i][2];
+    system->my_atoms[i].q = atom->q[i];
+    system->my_atoms[i].num_bonds = num_bonds[i];
+    system->my_atoms[i].num_hbonds = num_hbonds[i];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int PairReaxCOMP::estimate_reax_lists()
+{
+  int i;  
+  int *ilist = list->ilist;
+  int *numneigh = list->numneigh;
+  int numall = list->inum + list->gnum;
+  int mincap = system->mincap;
+  
+  // for good performance in the OpenMP implementation, each thread needs
+  // to know where to place the neighbors of the atoms it is responsible for.
+  // The sumscan values for the list->numneigh will be used to determine the
+  // neighbor offset of each atom. Note that this may cause some significant 
+  // memory overhead if delayed neighboring is used - so it may be desirable 
+  // to work on this part to reduce the memory footprint of the far_nbrs list.
+  
+  int num_nbrs = 0;
+  
+  for (int itr_i = 0; itr_i < numall; ++itr_i) {
+    i = ilist[itr_i];
+    num_nbrs += numneigh[i];
+  }
+  
+  int new_estimate = MAX (num_nbrs, mincap*MIN_NBRS);
+  
+  return new_estimate;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int PairReaxCOMP::write_reax_lists()
+{
+#ifdef OMP_TIMING
+  double startTimeBase, endTimeBase;
+  startTimeBase = MPI_Wtime();
+#endif
+
+  int itr_i, itr_j, i, j, num_mynbrs;
+  int *jlist;
+  double d_sqr, dist, cutoff_sqr;
+  rvec dvec;
+  
+  double **x = atom->x;
+  int *ilist = list->ilist;
+  int *numneigh = list->numneigh;
+  int **firstneigh = list->firstneigh;
+  reax_list *far_nbrs = lists + FAR_NBRS;
+  far_neighbor_data *far_list = far_nbrs->select.far_nbr_list;
+  
+  int num_nbrs = 0;
+  int inum = list->inum;
+  int gnum = list->gnum;
+  int numall = inum + gnum;
+  
+  // sumscan of the number of neighbors per atom to determine the offsets
+  // most likely, we are overallocating. desirable to work on this part
+  // to reduce the memory footprint of the far_nbrs list.
+  
+  num_nbrs = 0;
+  
+  for (itr_i = 0; itr_i < numall; ++itr_i) {
+    i = ilist[itr_i];
+    num_nbrs_offset[i] = num_nbrs;
+    num_nbrs += numneigh[i];
+  }
+
+//#pragma omp parallel for schedule(guided) default(shared)	   
+#pragma omp parallel for schedule(dynamic,50) default(shared)		\
+        private(itr_i, itr_j, i, j, jlist, cutoff_sqr, num_mynbrs, d_sqr, dvec, dist)
+  for (itr_i = 0; itr_i < numall; ++itr_i) {
+    i = ilist[itr_i];
+    jlist = firstneigh[i];
+    Set_Start_Index( i, num_nbrs_offset[i], far_nbrs );
+    
+    if (i < inum) 
+      cutoff_sqr = control->nonb_cut*control->nonb_cut;
+    else 
+      cutoff_sqr = control->bond_cut*control->bond_cut;      
+    
+    num_mynbrs = 0;
+    
+    for (itr_j = 0; itr_j < numneigh[i]; ++itr_j) {
+      j = jlist[itr_j];
+      j &= NEIGHMASK;
+      get_distance( x[j], x[i], &d_sqr, &dvec );
+      
+      if (d_sqr <= cutoff_sqr) {
+        dist = sqrt( d_sqr );
+        set_far_nbr( &far_list[num_nbrs_offset[i] + num_mynbrs], j, dist, dvec );
+        ++num_mynbrs;
+      }
+    }
+    Set_End_Index( i, num_nbrs_offset[i] + num_mynbrs, far_nbrs );
+  }
+
+#ifdef OMP_TIMING
+  endTimeBase = MPI_Wtime();
+  ompTimingData[COMPUTEWLINDEX] += (endTimeBase-startTimeBase);
+#endif
+  
+  return num_nbrs;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairReaxCOMP::read_reax_forces(int vflag)
+{
+#pragma omp parallel for schedule(static) default(shared)
+  for( int i = 0; i < system->N; ++i ) {
+    system->my_atoms[i].f[0] = workspace->f[i][0];
+    system->my_atoms[i].f[1] = workspace->f[i][1];
+    system->my_atoms[i].f[2] = workspace->f[i][2];
+
+    atom->f[i][0] = -workspace->f[i][0];
+    atom->f[i][1] = -workspace->f[i][1];
+    atom->f[i][2] = -workspace->f[i][2];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairReaxCOMP::FindBond()
+{
+  int i, ii, j, pj, jtag, nj, jtmp, jj;
+  double bo_tmp, bo_cut, rij, rsq;
+
+  bond_data *bo_ij;
+  bo_cut = 0.10;
+
+#pragma omp parallel for schedule(static) default(shared) \
+  private(i, nj, pj, bo_ij, j, bo_tmp)
+  for (i = 0; i < system->n; i++) {
+    nj = 0;
+    for( pj = Start_Index(i, lists); pj < End_Index(i, lists); ++pj ) {
+      bo_ij = &( lists->select.bond_list[pj] );
+      j = bo_ij->nbr;
+      if (j < i) continue;
+
+      bo_tmp = bo_ij->bo_data.BO;
+
+      if (bo_tmp >= bo_cut ) {
+	tmpid[i][nj] = j;
+	tmpbo[i][nj] = bo_tmp;
+	nj ++;
+	if (nj > MAXSPECBOND) error->all(FLERR,"Increase MAXSPECBOND in fix_reaxc_species.h");
+      }
+    }
+  }
+}
+
diff --git a/src/USER-OMP/pair_reaxc_omp.h b/src/USER-OMP/pair_reaxc_omp.h
new file mode 100644
index 0000000000..5d5c7e145b
--- /dev/null
+++ b/src/USER-OMP/pair_reaxc_omp.h
@@ -0,0 +1,113 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Hasan Metin Aktulga, Purdue University
+   (now at Lawrence Berkeley National Laboratory, hmaktulga@lbl.gov)
+
+   Please cite the related publication:
+   H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
+   "Parallel Reactive Molecular Dynamics: Numerical Methods and
+   Algorithmic Techniques", Parallel Computing, in press.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(reax/c/omp,PairReaxCOMP)
+
+#else
+
+#ifndef LMP_PAIR_REAXC_OMP_H
+#define LMP_PAIR_REAXC_OMP_H
+
+#include "pair_reaxc.h"
+#include "thr_omp.h"
+#include "suffix.h"
+
+namespace LAMMPS_NS {
+
+class PairReaxCOMP : public PairReaxC, public ThrOMP {
+ public:
+  PairReaxCOMP(class LAMMPS *);
+  ~PairReaxCOMP();
+  virtual void compute(int, int);
+  virtual void init_style();
+
+  inline FixOMP *getFixOMP() {
+	return fix;
+  };
+
+  inline void ev_setup_thr_proxy(int eflagparm, int vflagparm, int nallparm, 
+				 double *eatomparm, double **vatomparm, ThrData *thrparm) {
+    ev_setup_thr(eflagparm, vflagparm, nallparm, eatomparm, vatomparm, thrparm);
+  };
+  
+  // reduce per thread data as needed
+  inline void reduce_thr_proxy(void * const styleparm, const int eflagparm, 
+			       const int vflagparm, ThrData * const thrparm) {
+    reduce_thr(styleparm, eflagparm, vflagparm, thrparm);
+  }
+
+  inline void ev_tally_thr_proxy(Pair * const pairparm, const int iparm, const int jparm, 
+				 const int nlocalparm, const int newton_pairparm, 
+				 const double evdwlparm, const double ecoulparm,
+				 const double fpairparm, const double delxparm, 
+				 const double delyparm, const double delzparm, 
+				 ThrData * const thrparm) {
+    ev_tally_thr(pairparm, iparm, jparm, nlocalparm, newton_pairparm, 
+		 evdwlparm, ecoulparm, fpairparm, delxparm, delyparm, delzparm, thrparm);
+  }
+
+  inline void ev_tally_xyz_thr_proxy(Pair * const pairparm, const int iparm, const int jparm, 
+				     const int nlocalparm, const int newton_pairparm, 
+				     const double evdwlparm, const double ecoulparm, 
+				     const double fxparm, const double fyparm, const double fzparm,
+				     const double delxparm, const double delyparm, 
+				     const double delzparm, ThrData * const thrparm) {
+    ev_tally_xyz_thr(pairparm, iparm, jparm, nlocalparm, newton_pairparm, 
+		     evdwlparm, ecoulparm, fxparm, fyparm, fzparm, 
+		     delxparm, delyparm, delzparm, thrparm);
+  }
+  
+  inline void ev_tally3_thr_proxy(Pair * const pairparm,int i, int j, int k, 
+				  double evdwl, double ecoul, double *fj, double *fk, 
+				  double *drji, double *drki, ThrData * const thrparm) {
+    ev_tally3_thr(pairparm, i, j, k, evdwl, ecoul, fj, fk, drji, drki, thrparm);
+  }
+
+ protected:
+  virtual void setup();
+  virtual void write_reax_atoms();
+  virtual int estimate_reax_lists();
+  virtual int write_reax_lists();
+  virtual void read_reax_forces(int);
+  virtual void FindBond();
+
+  // work array used in write_reax_lists()
+  int * num_nbrs_offset;
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: Too many ghost atoms
+
+Number of ghost atoms has increased too much during simulation and has exceeded
+the size of reax/c arrays.  Increase safe_zone and min_cap in pair_style reax/c
+command
+
+*/
diff --git a/src/USER-OMP/reaxc_bond_orders_omp.cpp b/src/USER-OMP/reaxc_bond_orders_omp.cpp
new file mode 100644
index 0000000000..1d32cbd34f
--- /dev/null
+++ b/src/USER-OMP/reaxc_bond_orders_omp.cpp
@@ -0,0 +1,711 @@
+/*----------------------------------------------------------------------
+  PuReMD - Purdue ReaxFF Molecular Dynamics Program
+
+  Copyright (2010) Purdue University
+  Hasan Metin Aktulga, hmaktulga@lbl.gov
+  Joseph Fogarty, jcfogart@mail.usf.edu
+  Sagar Pandit, pandit@usf.edu
+  Ananth Y Grama, ayg@cs.purdue.edu
+
+  Please cite the related publication:
+  H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
+  "Parallel Reactive Molecular Dynamics: Numerical Methods and
+  Algorithmic Techniques", Parallel Computing, in press.
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of
+  the License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+  See the GNU General Public License for more details:
+  <http://www.gnu.org/licenses/>.
+  ----------------------------------------------------------------------*/
+
+#include  <omp.h>
+#include "pair_reaxc_omp.h"
+#include "reaxc_types.h"
+#include "reaxc_bond_orders_omp.h"
+#include "reaxc_list.h"
+#include "reaxc_vector.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void Add_dBond_to_ForcesOMP( reax_system *system, int i, int pj,
+			     storage *workspace, reax_list **lists ) {
+  reax_list *bonds = (*lists) + BONDS;
+  bond_data *nbr_j, *nbr_k;
+  bond_order_data *bo_ij, *bo_ji;
+  dbond_coefficients coef;
+  int pk, k, j;
+
+  PairReaxCOMP *pair_reax_ptr = static_cast<class PairReaxCOMP*>(system->pair_ptr);
+  
+  int tid = omp_get_thread_num();
+  ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid);
+  long reductionOffset = (system->N * tid);
+  
+  /* Virial Tallying variables */
+  double f_scaler;
+  rvec fi_tmp, fj_tmp, fk_tmp, delij, delji, delki, delkj, temp;
+
+  /* Initializations */
+  nbr_j = &(bonds->select.bond_list[pj]);
+  j = nbr_j->nbr;
+  bo_ij = &(nbr_j->bo_data);
+  bo_ji = &(bonds->select.bond_list[ nbr_j->sym_index ].bo_data);
+
+  double c = bo_ij->Cdbo + bo_ji->Cdbo;
+  coef.C1dbo = bo_ij->C1dbo * c;
+  coef.C2dbo = bo_ij->C2dbo * c;
+  coef.C3dbo = bo_ij->C3dbo * c;
+
+  c = bo_ij->Cdbopi + bo_ji->Cdbopi;
+  coef.C1dbopi = bo_ij->C1dbopi * c;
+  coef.C2dbopi = bo_ij->C2dbopi * c;
+  coef.C3dbopi = bo_ij->C3dbopi * c;
+  coef.C4dbopi = bo_ij->C4dbopi * c;
+
+  c = bo_ij->Cdbopi2 + bo_ji->Cdbopi2;
+  coef.C1dbopi2 = bo_ij->C1dbopi2 * c;
+  coef.C2dbopi2 = bo_ij->C2dbopi2 * c;
+  coef.C3dbopi2 = bo_ij->C3dbopi2 * c;
+  coef.C4dbopi2 = bo_ij->C4dbopi2 * c;
+
+  c = workspace->CdDelta[i] + workspace->CdDelta[j];
+  coef.C1dDelta = bo_ij->C1dbo * c;
+  coef.C2dDelta = bo_ij->C2dbo * c;
+  coef.C3dDelta = bo_ij->C3dbo * c;
+
+  // The same "c" refactoring here can be replicated below in Add_dBond_to_Forces_NPTOMP(), but
+  // I'd prefer to wait for a test to verify changes before doing so (just to be safe).
+
+  // forces on i
+  // rvec_Scale(     temp, coef.C1dbo,    bo_ij->dBOp );
+  // rvec_ScaledAdd( temp, coef.C2dbo,    workspace->dDeltap_self[i] );
+  // rvec_ScaledAdd( temp, coef.C1dDelta, bo_ij->dBOp );
+  // rvec_ScaledAdd( temp, coef.C2dDelta, workspace->dDeltap_self[i] );
+  // rvec_ScaledAdd( temp, coef.C1dbopi,  bo_ij->dln_BOp_pi );
+  // rvec_ScaledAdd( temp, coef.C2dbopi,  bo_ij->dBOp );
+  // rvec_ScaledAdd( temp, coef.C3dbopi,  workspace->dDeltap_self[i]);
+  // rvec_ScaledAdd( temp, coef.C1dbopi2, bo_ij->dln_BOp_pi2 );
+  // rvec_ScaledAdd( temp, coef.C2dbopi2, bo_ij->dBOp );
+  // rvec_ScaledAdd( temp, coef.C3dbopi2, workspace->dDeltap_self[i] );
+
+  c = (coef.C1dbo + coef.C1dDelta + coef.C2dbopi + coef.C2dbopi2);
+  rvec_Scale(     temp, c,    bo_ij->dBOp );
+
+  c = (coef.C2dbo + coef.C2dDelta + coef.C3dbopi + coef.C3dbopi2);
+  rvec_ScaledAdd( temp, c,    workspace->dDeltap_self[i] );
+
+  rvec_ScaledAdd( temp, coef.C1dbopi,  bo_ij->dln_BOp_pi );
+  rvec_ScaledAdd( temp, coef.C1dbopi2, bo_ij->dln_BOp_pi2 );
+
+  rvec_Add(workspace->forceReduction[reductionOffset+i],temp );
+
+  if( system->pair_ptr->vflag_atom) {
+    rvec_Scale(fi_tmp, -1.0, temp);
+    rvec_ScaledSum( delij, 1., system->my_atoms[i].x,-1., system->my_atoms[j].x );
+
+    pair_reax_ptr->ev_tally_xyz_thr_proxy(system->pair_ptr,i,j,system->N,0,0,0,
+					  fi_tmp[0],fi_tmp[1],fi_tmp[2],
+					  delij[0],delij[1],delij[2],thr);
+  }
+
+  // forces on j
+  // rvec_Scale(     temp, -coef.C1dbo,    bo_ij->dBOp );
+  // rvec_ScaledAdd( temp,  coef.C3dbo,    workspace->dDeltap_self[j] );
+  // rvec_ScaledAdd( temp, -coef.C1dDelta, bo_ij->dBOp );
+  // rvec_ScaledAdd( temp,  coef.C3dDelta, workspace->dDeltap_self[j]);
+  // rvec_ScaledAdd( temp, -coef.C1dbopi,  bo_ij->dln_BOp_pi );
+  // rvec_ScaledAdd( temp, -coef.C2dbopi,  bo_ij->dBOp );
+  // rvec_ScaledAdd( temp,  coef.C4dbopi,  workspace->dDeltap_self[j]);
+  // rvec_ScaledAdd( temp, -coef.C1dbopi2, bo_ij->dln_BOp_pi2 );
+  // rvec_ScaledAdd( temp, -coef.C2dbopi2, bo_ij->dBOp );
+  // rvec_ScaledAdd( temp,  coef.C4dbopi2, workspace->dDeltap_self[j]);
+
+
+  c = -(coef.C1dbo + coef.C1dDelta + coef.C2dbopi + coef.C2dbopi2);
+  rvec_Scale(     temp, c,    bo_ij->dBOp );
+
+  c = (coef.C3dbo + coef.C3dDelta + coef.C4dbopi + coef.C4dbopi2);
+  rvec_ScaledAdd( temp,  c,    workspace->dDeltap_self[j] );
+
+  rvec_ScaledAdd( temp, -coef.C1dbopi,  bo_ij->dln_BOp_pi );
+  rvec_ScaledAdd( temp, -coef.C1dbopi2, bo_ij->dln_BOp_pi2 );
+
+
+  rvec_Add(workspace->forceReduction[reductionOffset+j],temp );
+
+  if( system->pair_ptr->vflag_atom) {
+    rvec_Scale(fj_tmp, -1.0, temp);
+    rvec_ScaledSum( delji, 1., system->my_atoms[j].x,-1., system->my_atoms[i].x );
+
+    pair_reax_ptr->ev_tally_xyz_thr_proxy(system->pair_ptr,j,i,system->N,0,0,0,
+					  fj_tmp[0],fj_tmp[1],fj_tmp[2],
+					  delji[0],delji[1],delji[2],thr);
+  }
+
+  // forces on k: i neighbor
+  for( pk = Start_Index(i, bonds); pk < End_Index(i, bonds); ++pk ) {
+    nbr_k = &(bonds->select.bond_list[pk]);
+    k = nbr_k->nbr;
+    
+    // rvec_Scale(     temp, -coef.C2dbo,    nbr_k->bo_data.dBOp);
+    // rvec_ScaledAdd( temp, -coef.C2dDelta, nbr_k->bo_data.dBOp);
+    // rvec_ScaledAdd( temp, -coef.C3dbopi,  nbr_k->bo_data.dBOp);
+    // rvec_ScaledAdd( temp, -coef.C3dbopi2, nbr_k->bo_data.dBOp);
+
+    const double c = -(coef.C2dbo + coef.C2dDelta + coef.C3dbopi + coef.C3dbopi2);
+    rvec_Scale(temp, c, nbr_k->bo_data.dBOp);
+
+    rvec_Add(workspace->forceReduction[reductionOffset+k],temp );
+
+    if( system->pair_ptr->vflag_atom ) {
+      rvec_Scale(fk_tmp, -1.0, temp);
+      rvec_ScaledSum(delki,1.,system->my_atoms[k].x,-1.,system->my_atoms[i].x);
+
+      pair_reax_ptr->ev_tally_xyz_thr_proxy(system->pair_ptr,k,i,system->N,0,0,0,
+					    fk_tmp[0],fk_tmp[1],fk_tmp[2],
+					    delki[0],delki[1],delki[2],thr);
+      rvec_ScaledSum(delkj,1.,system->my_atoms[k].x,-1.,system->my_atoms[j].x);
+
+      pair_reax_ptr->ev_tally_xyz_thr_proxy(system->pair_ptr,k,j,system->N,0,0,0,
+					    fk_tmp[0],fk_tmp[1],fk_tmp[2],
+					    delkj[0],delkj[1],delkj[2],thr);
+    }
+  }
+  
+  // forces on k: j neighbor
+  for( pk = Start_Index(j, bonds); pk < End_Index(j, bonds); ++pk ) {
+    nbr_k = &(bonds->select.bond_list[pk]);
+    k = nbr_k->nbr;
+    
+    // rvec_Scale(     temp, -coef.C3dbo,    nbr_k->bo_data.dBOp );
+    // rvec_ScaledAdd( temp, -coef.C3dDelta, nbr_k->bo_data.dBOp);
+    // rvec_ScaledAdd( temp, -coef.C4dbopi,  nbr_k->bo_data.dBOp);
+    // rvec_ScaledAdd( temp, -coef.C4dbopi2, nbr_k->bo_data.dBOp);
+
+    const double c = -(coef.C3dbo + coef.C3dDelta + coef.C4dbopi + coef.C4dbopi2);
+    rvec_Scale(temp, c, nbr_k->bo_data.dBOp);
+
+    rvec_Add(workspace->forceReduction[reductionOffset+k],temp );
+
+    if( system->pair_ptr->vflag_atom ) {
+      rvec_Scale(fk_tmp, -1.0, temp);
+      rvec_ScaledSum(delki,1.,system->my_atoms[k].x,-1.,system->my_atoms[i].x);
+
+      pair_reax_ptr->ev_tally_xyz_thr_proxy(system->pair_ptr,k,i,system->N,0,0,0,
+					    fk_tmp[0],fk_tmp[1],fk_tmp[2],
+					    delki[0],delki[1],delki[2],thr);
+      
+      rvec_ScaledSum(delkj,1.,system->my_atoms[k].x,-1.,system->my_atoms[j].x);
+
+      pair_reax_ptr->ev_tally_xyz_thr_proxy(system->pair_ptr,k,j,system->N,0,0,0,
+					    fk_tmp[0],fk_tmp[1],fk_tmp[2],
+					    delkj[0],delkj[1],delkj[2],thr);
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void Add_dBond_to_Forces_NPTOMP( reax_system *system, int i, int pj, simulation_data *data,
+				 storage *workspace, reax_list **lists ) {
+  reax_list *bonds = (*lists) + BONDS;
+  bond_data *nbr_j, *nbr_k;
+  bond_order_data *bo_ij, *bo_ji;
+  dbond_coefficients coef;
+  rvec temp, ext_press;
+  ivec rel_box;
+  int pk, k, j;
+
+  PairReaxCOMP *pair_reax_ptr = static_cast<class PairReaxCOMP*>(system->pair_ptr);
+
+  int tid = omp_get_thread_num();
+  ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid);
+  long reductionOffset = (system->N * tid);
+
+  /* Initializations */
+  nbr_j = &(bonds->select.bond_list[pj]);
+  j = nbr_j->nbr;
+  bo_ij = &(nbr_j->bo_data);
+  bo_ji = &(bonds->select.bond_list[ nbr_j->sym_index ].bo_data);
+
+  coef.C1dbo = bo_ij->C1dbo * (bo_ij->Cdbo + bo_ji->Cdbo);
+  coef.C2dbo = bo_ij->C2dbo * (bo_ij->Cdbo + bo_ji->Cdbo);
+  coef.C3dbo = bo_ij->C3dbo * (bo_ij->Cdbo + bo_ji->Cdbo);
+
+  coef.C1dbopi = bo_ij->C1dbopi * (bo_ij->Cdbopi + bo_ji->Cdbopi);
+  coef.C2dbopi = bo_ij->C2dbopi * (bo_ij->Cdbopi + bo_ji->Cdbopi);
+  coef.C3dbopi = bo_ij->C3dbopi * (bo_ij->Cdbopi + bo_ji->Cdbopi);
+  coef.C4dbopi = bo_ij->C4dbopi * (bo_ij->Cdbopi + bo_ji->Cdbopi);
+
+  coef.C1dbopi2 = bo_ij->C1dbopi2 * (bo_ij->Cdbopi2 + bo_ji->Cdbopi2);
+  coef.C2dbopi2 = bo_ij->C2dbopi2 * (bo_ij->Cdbopi2 + bo_ji->Cdbopi2);
+  coef.C3dbopi2 = bo_ij->C3dbopi2 * (bo_ij->Cdbopi2 + bo_ji->Cdbopi2);
+  coef.C4dbopi2 = bo_ij->C4dbopi2 * (bo_ij->Cdbopi2 + bo_ji->Cdbopi2);
+
+  coef.C1dDelta = bo_ij->C1dbo * (workspace->CdDelta[i]+workspace->CdDelta[j]);
+  coef.C2dDelta = bo_ij->C2dbo * (workspace->CdDelta[i]+workspace->CdDelta[j]);
+  coef.C3dDelta = bo_ij->C3dbo * (workspace->CdDelta[i]+workspace->CdDelta[j]);
+
+
+  /************************************
+   * forces related to atom i          *
+   * first neighbors of atom i         *
+   ************************************/
+  for( pk = Start_Index(i, bonds); pk < End_Index(i, bonds); ++pk ) {
+    nbr_k = &(bonds->select.bond_list[pk]);
+    k = nbr_k->nbr;
+
+    rvec_Scale(temp, -coef.C2dbo, nbr_k->bo_data.dBOp);       /*2nd, dBO*/
+    rvec_ScaledAdd(temp, -coef.C2dDelta, nbr_k->bo_data.dBOp);/*dDelta*/
+    rvec_ScaledAdd(temp, -coef.C3dbopi, nbr_k->bo_data.dBOp); /*3rd, dBOpi*/
+    rvec_ScaledAdd(temp, -coef.C3dbopi2, nbr_k->bo_data.dBOp);/*3rd, dBOpi2*/
+
+    /* force */
+    rvec_Add(workspace->forceReduction[reductionOffset+k],temp );
+
+    /* pressure */
+    rvec_iMultiply( ext_press, nbr_k->rel_box, temp );
+    rvec_Add( workspace->my_ext_pressReduction[tid], ext_press );
+  }
+
+  /* then atom i itself  */
+  rvec_Scale( temp, coef.C1dbo, bo_ij->dBOp );                      /*1st,dBO*/
+  rvec_ScaledAdd( temp, coef.C2dbo, workspace->dDeltap_self[i] );   /*2nd,dBO*/
+  rvec_ScaledAdd( temp, coef.C1dDelta, bo_ij->dBOp );               /*1st,dBO*/
+  rvec_ScaledAdd( temp, coef.C2dDelta, workspace->dDeltap_self[i] );/*2nd,dBO*/
+  rvec_ScaledAdd( temp, coef.C1dbopi, bo_ij->dln_BOp_pi );        /*1st,dBOpi*/
+  rvec_ScaledAdd( temp, coef.C2dbopi, bo_ij->dBOp );              /*2nd,dBOpi*/
+  rvec_ScaledAdd( temp, coef.C3dbopi, workspace->dDeltap_self[i]);/*3rd,dBOpi*/
+
+  rvec_ScaledAdd( temp, coef.C1dbopi2, bo_ij->dln_BOp_pi2 );  /*1st,dBO_pi2*/
+  rvec_ScaledAdd( temp, coef.C2dbopi2, bo_ij->dBOp );         /*2nd,dBO_pi2*/
+  rvec_ScaledAdd( temp, coef.C3dbopi2, workspace->dDeltap_self[i] );/*3rd*/
+
+  /* force */
+  rvec_Add(workspace->forceReduction[reductionOffset+i],temp );
+
+  for( pk = Start_Index(j, bonds); pk < End_Index(j, bonds); ++pk ) {
+    nbr_k = &(bonds->select.bond_list[pk]);
+    k = nbr_k->nbr;
+
+    rvec_Scale( temp, -coef.C3dbo, nbr_k->bo_data.dBOp );      /*3rd,dBO*/
+    rvec_ScaledAdd( temp, -coef.C3dDelta, nbr_k->bo_data.dBOp);/*dDelta*/
+    rvec_ScaledAdd( temp, -coef.C4dbopi, nbr_k->bo_data.dBOp); /*4th,dBOpi*/
+    rvec_ScaledAdd( temp, -coef.C4dbopi2, nbr_k->bo_data.dBOp);/*4th,dBOpi2*/
+
+    /* force */
+    rvec_Add(workspace->forceReduction[reductionOffset+k],temp );
+
+    /* pressure */
+    if( k != i ) {
+      ivec_Sum( rel_box, nbr_k->rel_box, nbr_j->rel_box ); //rel_box(k, i)
+      rvec_iMultiply( ext_press, rel_box, temp );
+      rvec_Add( workspace->my_ext_pressReduction[tid], ext_press );
+    }
+  }
+
+  /* then atom j itself */
+  rvec_Scale( temp, -coef.C1dbo, bo_ij->dBOp );                    /*1st, dBO*/
+  rvec_ScaledAdd( temp, coef.C3dbo, workspace->dDeltap_self[j] );  /*2nd, dBO*/
+  rvec_ScaledAdd( temp, -coef.C1dDelta, bo_ij->dBOp );             /*1st, dBO*/
+  rvec_ScaledAdd( temp, coef.C3dDelta, workspace->dDeltap_self[j]);/*2nd, dBO*/
+
+  rvec_ScaledAdd( temp, -coef.C1dbopi, bo_ij->dln_BOp_pi );       /*1st,dBOpi*/
+  rvec_ScaledAdd( temp, -coef.C2dbopi, bo_ij->dBOp );             /*2nd,dBOpi*/
+  rvec_ScaledAdd( temp, coef.C4dbopi, workspace->dDeltap_self[j]);/*3rd,dBOpi*/
+
+  rvec_ScaledAdd( temp, -coef.C1dbopi2, bo_ij->dln_BOp_pi2 );    /*1st,dBOpi2*/
+  rvec_ScaledAdd( temp, -coef.C2dbopi2, bo_ij->dBOp );           /*2nd,dBOpi2*/
+  rvec_ScaledAdd( temp,coef.C4dbopi2,workspace->dDeltap_self[j]);/*3rd,dBOpi2*/
+
+  /* force */
+  rvec_Add(workspace->forceReduction[reductionOffset+j],temp );
+
+  /* pressure */
+  rvec_iMultiply( ext_press, nbr_j->rel_box, temp );
+    rvec_Add( workspace->my_ext_pressReduction[tid], ext_press );
+}
+
+/* ---------------------------------------------------------------------- */
+
+int BOp_OMP( storage *workspace, reax_list *bonds, double bo_cut,
+	     int i, int btop_i, far_neighbor_data *nbr_pj,
+	     single_body_parameters *sbp_i, single_body_parameters *sbp_j,
+	     two_body_parameters *twbp,
+	     int btop_j, double C12, double C34, double C56, double BO, double BO_s, double BO_pi, double BO_pi2) {
+  int j;
+  double rr2;
+  double Cln_BOp_s, Cln_BOp_pi, Cln_BOp_pi2;
+  bond_data *ibond, *jbond;
+  bond_order_data *bo_ij, *bo_ji;
+
+  j = nbr_pj->nbr;
+  rr2 = 1.0 / SQR(nbr_pj->d);
+
+  // Top portion of BOp() moved to reaxc_forces_omp.cpp::Init_Forces_noQEq_OMP()
+
+  /* Initially BO values are the uncorrected ones, page 1 */
+
+  /****** bonds i-j and j-i ******/
+  ibond = &( bonds->select.bond_list[btop_i] );
+  jbond = &( bonds->select.bond_list[btop_j] );
+  
+  ibond->nbr = j;
+  jbond->nbr = i;
+  ibond->d = nbr_pj->d;
+  jbond->d = nbr_pj->d;
+  rvec_Copy( ibond->dvec, nbr_pj->dvec );
+  rvec_Scale( jbond->dvec, -1, nbr_pj->dvec );
+  ivec_Copy( ibond->rel_box, nbr_pj->rel_box );
+  ivec_Scale( jbond->rel_box, -1, nbr_pj->rel_box );
+  ibond->dbond_index = btop_i;
+  jbond->dbond_index = btop_i;
+  ibond->sym_index = btop_j;
+  jbond->sym_index = btop_i;
+  
+  bo_ij = &( ibond->bo_data );
+  bo_ji = &( jbond->bo_data );
+  bo_ji->BO     = bo_ij->BO     = BO;
+  bo_ji->BO_s   = bo_ij->BO_s   = BO_s;
+  bo_ji->BO_pi  = bo_ij->BO_pi  = BO_pi;
+  bo_ji->BO_pi2 = bo_ij->BO_pi2 = BO_pi2;
+  
+  /* Bond Order page2-3, derivative of total bond order prime */
+  Cln_BOp_s   = twbp->p_bo2 * C12 * rr2;
+  Cln_BOp_pi  = twbp->p_bo4 * C34 * rr2;
+  Cln_BOp_pi2 = twbp->p_bo6 * C56 * rr2;
+  
+  /* Only dln_BOp_xx wrt. dr_i is stored here, note that
+     dln_BOp_xx/dr_i = -dln_BOp_xx/dr_j and all others are 0 */
+  rvec_Scale(bo_ij->dln_BOp_s,-bo_ij->BO_s*Cln_BOp_s,ibond->dvec);
+  rvec_Scale(bo_ij->dln_BOp_pi,-bo_ij->BO_pi*Cln_BOp_pi,ibond->dvec);
+  rvec_Scale(bo_ij->dln_BOp_pi2,
+	     -bo_ij->BO_pi2*Cln_BOp_pi2,ibond->dvec);
+  rvec_Scale(bo_ji->dln_BOp_s,   -1., bo_ij->dln_BOp_s);
+  rvec_Scale(bo_ji->dln_BOp_pi,  -1., bo_ij->dln_BOp_pi );
+  rvec_Scale(bo_ji->dln_BOp_pi2, -1., bo_ij->dln_BOp_pi2 );
+  
+  rvec_Scale( bo_ij->dBOp,
+	      -(bo_ij->BO_s * Cln_BOp_s +
+		bo_ij->BO_pi * Cln_BOp_pi +
+		bo_ij->BO_pi2 * Cln_BOp_pi2), ibond->dvec );
+  rvec_Scale( bo_ji->dBOp, -1., bo_ij->dBOp );
+  
+  bo_ij->BO_s -= bo_cut;
+  bo_ij->BO   -= bo_cut;
+  bo_ji->BO_s -= bo_cut;
+  bo_ji->BO   -= bo_cut;
+  
+  bo_ij->Cdbo = bo_ij->Cdbopi = bo_ij->Cdbopi2 = 0.0;
+  bo_ji->Cdbo = bo_ji->Cdbopi = bo_ji->Cdbopi2 = 0.0;
+  
+  return 1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void BOOMP( reax_system *system, control_params *control, simulation_data *data,
+	    storage *workspace, reax_list **lists, output_controls *out_control ) 
+{
+#ifdef OMP_TIMING
+  double endTimeBase, startTimeBase;
+  startTimeBase = MPI_Wtime();
+#endif
+  
+  double p_lp1 = system->reax_param.gp.l[15];
+  int  num_bonds = 0;
+  double p_boc1 = system->reax_param.gp.l[0];
+  double p_boc2 = system->reax_param.gp.l[1];
+  reax_list *bonds = (*lists) + BONDS;
+  int  natoms = system->N;
+  int  nthreads = control->nthreads;
+  
+#pragma omp parallel default(shared)
+  {
+    int  i, j, pj, type_i, type_j;
+    int  start_i, end_i, sym_index;
+    double val_i, Deltap_i, Deltap_boc_i;
+    double val_j, Deltap_j, Deltap_boc_j;
+    double f1, f2, f3, f4, f5, f4f5, exp_f4, exp_f5;
+    double exp_p1i, exp_p2i, exp_p1j, exp_p2j, explp1;
+    double temp, u1_ij, u1_ji, Cf1A_ij, Cf1B_ij, Cf1_ij, Cf1_ji;
+    double Cf45_ij, Cf45_ji; //u_ij, u_ji
+    double A0_ij, A1_ij, A2_ij, A2_ji, A3_ij, A3_ji;
+    single_body_parameters *sbp_i, *sbp_j;
+    two_body_parameters *twbp;
+    bond_order_data *bo_ij, *bo_ji;
+    
+    int tid = omp_get_thread_num();
+    
+    /* Calculate Deltaprime, Deltaprime_boc values */
+#pragma omp for schedule(static)
+    for (i = 0; i < system->N; ++i) {
+      type_i = system->my_atoms[i].type;
+      if(type_i < 0) continue;
+      sbp_i = &(system->reax_param.sbp[type_i]);
+      workspace->Deltap[i] = workspace->total_bond_order[i] - sbp_i->valency;
+      workspace->Deltap_boc[i] =
+	workspace->total_bond_order[i] - sbp_i->valency_val;
+
+      workspace->total_bond_order[i] = 0;
+    }
+
+    // Wait till initialization complete
+#pragma omp barrier
+    
+    /* Corrected Bond Order calculations */
+//#pragma omp for schedule(dynamic,50)
+#pragma omp for schedule(guided)
+    for (i = 0; i < system->N; ++i) {
+      type_i = system->my_atoms[i].type;
+      if(type_i < 0) continue;
+      sbp_i = &(system->reax_param.sbp[type_i]);
+      val_i = sbp_i->valency;
+      Deltap_i = workspace->Deltap[i];
+      Deltap_boc_i = workspace->Deltap_boc[i];
+      start_i = Start_Index(i, bonds);
+      end_i = End_Index(i, bonds);
+      
+      for (pj = start_i; pj < end_i; ++pj) {
+	j = bonds->select.bond_list[pj].nbr;
+	type_j = system->my_atoms[j].type;
+	if(type_j < 0) continue;
+	bo_ij = &( bonds->select.bond_list[pj].bo_data );
+
+	if( i < j || workspace->bond_mark[j] > 3) {
+	  twbp = &( system->reax_param.tbp[type_i][type_j] );
+
+	  if( twbp->ovc < 0.001 && twbp->v13cor < 0.001 ) {
+	    bo_ij->C1dbo = 1.000000;
+	    bo_ij->C2dbo = 0.000000;
+	    bo_ij->C3dbo = 0.000000;
+	    
+	    bo_ij->C1dbopi = bo_ij->BO_pi;
+	    bo_ij->C2dbopi = 0.000000;
+	    bo_ij->C3dbopi = 0.000000;
+	    bo_ij->C4dbopi = 0.000000;
+	    
+	    bo_ij->C1dbopi2 = bo_ij->BO_pi2;
+	    bo_ij->C2dbopi2 = 0.000000;
+	    bo_ij->C3dbopi2 = 0.000000;
+	    bo_ij->C4dbopi2 = 0.000000;
+	    
+	  }
+	  else {
+	    val_j = system->reax_param.sbp[type_j].valency;
+	    Deltap_j = workspace->Deltap[j];
+	    Deltap_boc_j = workspace->Deltap_boc[j];
+	  
+	    /* on page 1 */
+	    if( twbp->ovc >= 0.001 ) {
+	      /* Correction for overcoordination */
+	      exp_p1i = exp( -p_boc1 * Deltap_i );
+	      exp_p2i = exp( -p_boc2 * Deltap_i );
+	      exp_p1j = exp( -p_boc1 * Deltap_j );
+	      exp_p2j = exp( -p_boc2 * Deltap_j );
+	      
+	      f2 = exp_p1i + exp_p1j;
+	      f3 = -1.0 / p_boc2 * log( 0.5 * ( exp_p2i  + exp_p2j ) );
+	      f1 = 0.5 * ( ( val_i + f2 )/( val_i + f2 + f3 ) +
+			   ( val_j + f2 )/( val_j + f2 + f3 ) );
+	      
+	      /* Now come the derivates */
+	      /* Bond Order pages 5-7, derivative of f1 */
+	      temp = f2 + f3;
+	      u1_ij = val_i + temp;
+	      u1_ji = val_j + temp;
+	      Cf1A_ij = 0.5 * f3 * (1.0 / SQR( u1_ij ) +
+				    1.0 / SQR( u1_ji ));
+	      Cf1B_ij = -0.5 * (( u1_ij - f3 ) / SQR( u1_ij ) +
+				( u1_ji - f3 ) / SQR( u1_ji ));
+	      
+	      Cf1_ij = 0.50 * ( -p_boc1 * exp_p1i / u1_ij -
+				((val_i+f2) / SQR(u1_ij)) *
+				( -p_boc1 * exp_p1i +
+				  exp_p2i / ( exp_p2i + exp_p2j ) ) +
+				-p_boc1 * exp_p1i / u1_ji -
+				((val_j+f2) / SQR(u1_ji)) *
+				( -p_boc1 * exp_p1i +
+				  exp_p2i / ( exp_p2i + exp_p2j ) ));
+	      
+	      
+	      Cf1_ji = -Cf1A_ij * p_boc1 * exp_p1j +
+		Cf1B_ij * exp_p2j / ( exp_p2i + exp_p2j );
+	    }
+	    else {
+	      /* No overcoordination correction! */
+	      f1 = 1.0;
+	      Cf1_ij = Cf1_ji = 0.0;
+	    }
+
+	    if( twbp->v13cor >= 0.001 ) {
+	      /* Correction for 1-3 bond orders */
+	      exp_f4 =exp(-(twbp->p_boc4 * SQR( bo_ij->BO ) -
+			    Deltap_boc_i) * twbp->p_boc3 + twbp->p_boc5);
+	      exp_f5 =exp(-(twbp->p_boc4 * SQR( bo_ij->BO ) -
+			    Deltap_boc_j) * twbp->p_boc3 + twbp->p_boc5);
+	      
+	      f4 = 1. / (1. + exp_f4);
+	      f5 = 1. / (1. + exp_f5);
+	      f4f5 = f4 * f5;
+	      
+	      /* Bond Order pages 8-9, derivative of f4 and f5 */
+	      Cf45_ij = -f4 * exp_f4;
+	      Cf45_ji = -f5 * exp_f5;
+	    }
+	    else {
+	      f4 = f5 = f4f5 = 1.0;
+	      Cf45_ij = Cf45_ji = 0.0;
+	    }
+	    
+	    /* Bond Order page 10, derivative of total bond order */
+	    A0_ij = f1 * f4f5;
+	    A1_ij = -2 * twbp->p_boc3 * twbp->p_boc4 * bo_ij->BO *
+	      (Cf45_ij + Cf45_ji);
+	    A2_ij = Cf1_ij / f1 + twbp->p_boc3 * Cf45_ij;
+	    A2_ji = Cf1_ji / f1 + twbp->p_boc3 * Cf45_ji;
+	    A3_ij = A2_ij + Cf1_ij / f1;
+	    A3_ji = A2_ji + Cf1_ji / f1;
+	    
+	    /* find corrected bond orders and their derivative coef */
+	    bo_ij->BO    = bo_ij->BO    * A0_ij;
+	    bo_ij->BO_pi = bo_ij->BO_pi * A0_ij *f1;
+	    bo_ij->BO_pi2= bo_ij->BO_pi2* A0_ij *f1;
+	    bo_ij->BO_s  = bo_ij->BO - ( bo_ij->BO_pi + bo_ij->BO_pi2 );
+	    
+	    bo_ij->C1dbo = A0_ij + bo_ij->BO * A1_ij;
+	    bo_ij->C2dbo = bo_ij->BO * A2_ij;
+	    bo_ij->C3dbo = bo_ij->BO * A2_ji;
+	    
+	    bo_ij->C1dbopi = f1*f1*f4*f5;
+	    bo_ij->C2dbopi = bo_ij->BO_pi * A1_ij;
+	    bo_ij->C3dbopi = bo_ij->BO_pi * A3_ij;
+	    bo_ij->C4dbopi = bo_ij->BO_pi * A3_ji;
+	    
+	    bo_ij->C1dbopi2 = f1*f1*f4*f5;
+	    bo_ij->C2dbopi2 = bo_ij->BO_pi2 * A1_ij;
+	    bo_ij->C3dbopi2 = bo_ij->BO_pi2 * A3_ij;
+	    bo_ij->C4dbopi2 = bo_ij->BO_pi2 * A3_ji;
+	  }
+	  
+	  /* neglect bonds that are < 1e-10 */
+	  if( bo_ij->BO < 1e-10 )
+	    bo_ij->BO = 0.0;
+	  if( bo_ij->BO_s < 1e-10 )
+	    bo_ij->BO_s = 0.0;
+	  if( bo_ij->BO_pi < 1e-10 )
+	    bo_ij->BO_pi = 0.0;
+	  if( bo_ij->BO_pi2 < 1e-10 )
+	    bo_ij->BO_pi2 = 0.0;
+	  
+	  workspace->total_bond_order[i] += bo_ij->BO; //now keeps total_BO
+	}
+	// else {
+	//   /* We only need to update bond orders from bo_ji
+	//      everything else is set in uncorrected_bo calculations */
+	//   sym_index = bonds->select.bond_list[pj].sym_index;
+	//   bo_ji = &(bonds->select.bond_list[ sym_index ].bo_data);
+	//   bo_ij->BO = bo_ji->BO;
+	//   bo_ij->BO_s = bo_ji->BO_s;
+	//   bo_ij->BO_pi = bo_ji->BO_pi;
+	//   bo_ij->BO_pi2 = bo_ji->BO_pi2;
+	  
+	//   workspace->total_bond_order[i] += bo_ij->BO;// now keeps total_BO
+	// }
+      }
+      
+    }
+
+    // Wait for bo_ij to be updated
+#pragma omp barrier
+
+    // Try to combine the following for-loop back into the for-loop above
+    /*-------------------------*/
+#pragma omp for schedule(guided)
+    for (i = 0; i < system->N; ++i) {
+      type_i = system->my_atoms[i].type;
+      if(type_i < 0) continue;
+      start_i = Start_Index(i, bonds);
+      end_i = End_Index(i, bonds);
+      
+      for (pj = start_i; pj < end_i; ++pj) {
+	j = bonds->select.bond_list[pj].nbr;
+	type_j = system->my_atoms[j].type;
+	if(type_j < 0) continue;
+
+	if( i < j || workspace->bond_mark[j] > 3) {
+	  // Computed in previous for-loop
+	} else {
+	  /* We only need to update bond orders from bo_ji
+	     everything else is set in uncorrected_bo calculations */
+	  sym_index = bonds->select.bond_list[pj].sym_index;
+	  
+	  bo_ij = &( bonds->select.bond_list[pj].bo_data );
+	  bo_ji = &(bonds->select.bond_list[ sym_index ].bo_data);
+	  bo_ij->BO = bo_ji->BO;
+	  bo_ij->BO_s = bo_ji->BO_s;
+	  bo_ij->BO_pi = bo_ji->BO_pi;
+	  bo_ij->BO_pi2 = bo_ji->BO_pi2;
+	  
+	  workspace->total_bond_order[i] += bo_ij->BO;// now keeps total_BO
+	}
+      }
+      
+    }
+
+    /*-------------------------*/
+    
+    // Need to wait for total_bond_order to be accumulated.
+#pragma omp barrier
+    
+    /* Calculate some helper variables that are  used at many places
+       throughout force calculations */
+#pragma omp for schedule(guided)
+    for(j = 0; j < system->N; ++j ) {
+      type_j = system->my_atoms[j].type;
+      if(type_j < 0) continue;
+      sbp_j = &(system->reax_param.sbp[ type_j ]);
+      
+      workspace->Delta[j] = workspace->total_bond_order[j] - sbp_j->valency;
+      workspace->Delta_e[j] = workspace->total_bond_order[j] - sbp_j->valency_e;
+      workspace->Delta_boc[j] = workspace->total_bond_order[j] -
+	sbp_j->valency_boc;
+      workspace->Delta_val[j] = workspace->total_bond_order[j] -
+	sbp_j->valency_val;
+      
+      workspace->vlpex[j] = workspace->Delta_e[j] -
+	2.0 * (int)(workspace->Delta_e[j]/2.0);
+      explp1 = exp(-p_lp1 * SQR(2.0 + workspace->vlpex[j]));
+      workspace->nlp[j] = explp1 - (int)(workspace->Delta_e[j] / 2.0);
+      workspace->Delta_lp[j] = sbp_j->nlp_opt - workspace->nlp[j];
+      workspace->Clp[j] = 2.0 * p_lp1 * explp1 * (2.0 + workspace->vlpex[j]);
+      workspace->dDelta_lp[j] = workspace->Clp[j];
+      
+      if( sbp_j->mass > 21.0 ) {
+	workspace->nlp_temp[j] = 0.5 * (sbp_j->valency_e - sbp_j->valency);
+	workspace->Delta_lp_temp[j] = sbp_j->nlp_opt - workspace->nlp_temp[j];
+	workspace->dDelta_lp_temp[j] = 0.;
+      }
+      else {
+	workspace->nlp_temp[j] = workspace->nlp[j];
+	workspace->Delta_lp_temp[j] = sbp_j->nlp_opt - workspace->nlp_temp[j];
+	workspace->dDelta_lp_temp[j] = workspace->Clp[j];
+      }
+    }
+    
+  } // parallel region
+
+#ifdef OMP_TIMING
+	endTimeBase = MPI_Wtime();
+	ompTimingData[COMPUTEBOINDEX] += (endTimeBase-startTimeBase);
+
+#endif
+}
diff --git a/src/USER-OMP/reaxc_bond_orders_omp.h b/src/USER-OMP/reaxc_bond_orders_omp.h
new file mode 100644
index 0000000000..272309cadd
--- /dev/null
+++ b/src/USER-OMP/reaxc_bond_orders_omp.h
@@ -0,0 +1,43 @@
+/*----------------------------------------------------------------------
+  PuReMD - Purdue ReaxFF Molecular Dynamics Program
+
+  Copyright (2010) Purdue University
+  Hasan Metin Aktulga, hmaktulga@lbl.gov
+  Joseph Fogarty, jcfogart@mail.usf.edu
+  Sagar Pandit, pandit@usf.edu
+  Ananth Y Grama, ayg@cs.purdue.edu
+
+  Please cite the related publication:
+  H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
+  "Parallel Reactive Molecular Dynamics: Numerical Methods and
+  Algorithmic Techniques", Parallel Computing, in press.
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of
+  the License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+  See the GNU General Public License for more details:
+  <http://www.gnu.org/licenses/>.
+  ----------------------------------------------------------------------*/
+
+#ifndef __BOND_ORDERS_OMP_H_
+#define __BOND_ORDERS_OMP_H_
+
+#include "reaxc_types.h"
+#include "reaxc_bond_orders.h"
+
+void Add_dBond_to_ForcesOMP( reax_system*, int, int, storage*, reax_list** );
+void Add_dBond_to_Forces_NPTOMP( reax_system *system, int, int, simulation_data*,
+                              storage*, reax_list** );
+
+int BOp_OMP(storage*, reax_list*, double, int, int, far_neighbor_data*,
+	    single_body_parameters*, single_body_parameters*, two_body_parameters*,
+	    int, double, double, double, double, double, double, double);
+
+void BOOMP( reax_system*, control_params*, simulation_data*,
+         storage*, reax_list**, output_controls* );
+#endif
diff --git a/src/USER-OMP/reaxc_bonds_omp.cpp b/src/USER-OMP/reaxc_bonds_omp.cpp
new file mode 100644
index 0000000000..4dc666b3ee
--- /dev/null
+++ b/src/USER-OMP/reaxc_bonds_omp.cpp
@@ -0,0 +1,174 @@
+/*----------------------------------------------------------------------
+  PuReMD - Purdue ReaxFF Molecular Dynamics Program
+
+  Copyright (2010) Purdue University
+  Hasan Metin Aktulga, hmaktulga@lbl.gov
+  Joseph Fogarty, jcfogart@mail.usf.edu
+  Sagar Pandit, pandit@usf.edu
+  Ananth Y Grama, ayg@cs.purdue.edu
+
+  Please cite the related publication:
+  H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
+  "Parallel Reactive Molecular Dynamics: Numerical Methods and
+  Algorithmic Techniques", Parallel Computing, in press.
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of
+  the License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+  See the GNU General Public License for more details:
+  <http://www.gnu.org/licenses/>.
+  ----------------------------------------------------------------------*/
+
+#include "pair_reaxc_omp.h"
+#include  <omp.h>
+
+#include "reaxc_bonds_omp.h"
+#include "reaxc_bond_orders_omp.h"
+#include "reaxc_list.h"
+#include "reaxc_tool_box.h"
+#include "reaxc_vector.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void BondsOMP( reax_system *system, control_params *control,
+            simulation_data *data, storage *workspace, reax_list **lists,
+            output_controls *out_control )
+{
+#ifdef OMP_TIMING
+  double endTimeBase, startTimeBase;
+  startTimeBase = MPI_Wtime();
+#endif
+  
+  int natoms = system->n;
+  int nthreads = control->nthreads;
+  reax_list *bonds = (*lists) + BONDS;
+  double gp3 = system->reax_param.gp.l[3];
+  double gp4 = system->reax_param.gp.l[4];
+  double gp7 = system->reax_param.gp.l[7];
+  double gp10 = system->reax_param.gp.l[10];
+  double gp37 = (int) system->reax_param.gp.l[37];
+  double total_Ebond = 0.0;
+
+#pragma omp parallel default(shared) reduction(+: total_Ebond)
+ { 
+  int  i, j, pj;
+  int start_i, end_i;
+  int type_i, type_j;
+  double ebond, ebond_thr=0.0, pow_BOs_be2, exp_be12, CEbo;
+  double gp3, gp4, gp7, gp10, gp37;
+  double exphu, exphua1, exphub1, exphuov, hulpov, estriph, estriph_thr=0.0;
+  double decobdbo, decobdboua, decobdboub;
+  single_body_parameters *sbp_i, *sbp_j;
+  two_body_parameters *twbp;
+  bond_order_data *bo_ij;
+  int  tid = omp_get_thread_num();
+  long reductionOffset = (system->N * tid);
+  
+  class PairReaxCOMP *pair_reax_ptr;
+  pair_reax_ptr = static_cast<class PairReaxCOMP*>(system->pair_ptr);
+  class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid);
+  
+  pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, 
+				    system->pair_ptr->vflag_either, natoms, 
+				    system->pair_ptr->eatom, system->pair_ptr->vatom, thr);
+  
+#pragma omp for schedule(guided)
+  for (i = 0; i < natoms; ++i) {
+    start_i = Start_Index(i, bonds);
+    end_i = End_Index(i, bonds);
+    
+    for (pj = start_i; pj < end_i; ++pj) {
+      j = bonds->select.bond_list[pj].nbr;
+      
+      if( system->my_atoms[i].orig_id > system->my_atoms[j].orig_id ) continue;
+
+      if( system->my_atoms[i].orig_id == system->my_atoms[j].orig_id ) {
+        if (system->my_atoms[j].x[2] <  system->my_atoms[i].x[2]) continue;
+      	if (system->my_atoms[j].x[2] == system->my_atoms[i].x[2] && 
+      	    system->my_atoms[j].x[1] <  system->my_atoms[i].x[1]) continue;
+        if (system->my_atoms[j].x[2] == system->my_atoms[i].x[2] && 
+      	    system->my_atoms[j].x[1] == system->my_atoms[i].x[1] && 
+      	    system->my_atoms[j].x[0] <  system->my_atoms[i].x[0]) continue;
+      }
+
+      /* set the pointers */
+      type_i = system->my_atoms[i].type;
+      type_j = system->my_atoms[j].type;
+      sbp_i = &( system->reax_param.sbp[type_i] );
+      sbp_j = &( system->reax_param.sbp[type_j] );
+      twbp = &( system->reax_param.tbp[type_i][type_j] );
+      bo_ij = &( bonds->select.bond_list[pj].bo_data );
+      
+      /* calculate the constants */
+      pow_BOs_be2 = pow( bo_ij->BO_s, twbp->p_be2 );
+      exp_be12 = exp( twbp->p_be1 * ( 1.0 - pow_BOs_be2 ) );
+      CEbo = -twbp->De_s * exp_be12 *
+	( 1.0 - twbp->p_be1 * twbp->p_be2 * pow_BOs_be2 );
+      
+      /* calculate the Bond Energy */      
+      total_Ebond += ebond =
+	-twbp->De_s * bo_ij->BO_s * exp_be12
+	-twbp->De_p * bo_ij->BO_pi
+	-twbp->De_pp * bo_ij->BO_pi2;
+      
+      /* tally into per-atom energy */
+      if (system->pair_ptr->evflag)
+	pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, j, natoms, 1, 
+					  ebond, 0.0, 0.0, 0.0, 0.0, 0.0, thr);
+      
+      /* calculate derivatives of Bond Orders */
+      bo_ij->Cdbo += CEbo;
+      bo_ij->Cdbopi -= (CEbo + twbp->De_p);
+      bo_ij->Cdbopi2 -= (CEbo + twbp->De_pp);
+      
+      /* Stabilisation terminal triple bond */
+      if (bo_ij->BO >= 1.00) {
+	if (gp37 == 2 ||
+	    (sbp_i->mass == 12.0000 && sbp_j->mass == 15.9990) ||
+	    (sbp_j->mass == 12.0000 && sbp_i->mass == 15.9990)) {
+	  exphu = exp( -gp7 * SQR(bo_ij->BO - 2.50) );
+	  exphua1 = exp(-gp3 * (workspace->total_bond_order[i]-bo_ij->BO));
+	  exphub1 = exp(-gp3 * (workspace->total_bond_order[j]-bo_ij->BO));
+	  exphuov = exp(gp4 * (workspace->Delta[i] + workspace->Delta[j]));
+	  hulpov = 1.0 / (1.0 + 25.0 * exphuov);
+	  
+	  estriph = gp10 * exphu * hulpov * (exphua1 + exphub1);
+	  total_Ebond += estriph;
+	  
+	  decobdbo = gp10 * exphu * hulpov * (exphua1 + exphub1) *
+	    ( gp3 - 2.0 * gp7 * (bo_ij->BO-2.50) );
+	  decobdboua = -gp10 * exphu * hulpov *
+	    (gp3*exphua1 + 25.0*gp4*exphuov*hulpov*(exphua1+exphub1));
+	  decobdboub = -gp10 * exphu * hulpov *
+	    (gp3*exphub1 + 25.0*gp4*exphuov*hulpov*(exphua1+exphub1));
+	  
+	  /* tally into per-atom energy */
+	  if (system->pair_ptr->evflag)
+	    pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, j, natoms, 1, 
+					      estriph, 0.0, 0.0, 0.0, 0.0, 0.0, thr);
+	  
+	  bo_ij->Cdbo += decobdbo;
+	  workspace->CdDelta[i] += decobdboua;
+	  workspace->CdDeltaReduction[reductionOffset+j] += decobdboub;
+        }
+      }
+    }
+  } // for(i)
+
+ } // omp
+ 
+ data->my_en.e_bond += total_Ebond;
+ 
+#ifdef OMP_TIMING
+ endTimeBase = MPI_Wtime();
+ ompTimingData[COMPUTEBONDSINDEX] += (endTimeBase-startTimeBase); 
+#endif
+
+}
diff --git a/src/USER-OMP/reaxc_bonds_omp.h b/src/USER-OMP/reaxc_bonds_omp.h
new file mode 100644
index 0000000000..8c07fd8957
--- /dev/null
+++ b/src/USER-OMP/reaxc_bonds_omp.h
@@ -0,0 +1,35 @@
+/*----------------------------------------------------------------------
+  PuReMD - Purdue ReaxFF Molecular Dynamics Program
+
+  Copyright (2010) Purdue University
+  Hasan Metin Aktulga, hmaktulga@lbl.gov
+  Joseph Fogarty, jcfogart@mail.usf.edu
+  Sagar Pandit, pandit@usf.edu
+  Ananth Y Grama, ayg@cs.purdue.edu
+
+  Please cite the related publication:
+  H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
+  "Parallel Reactive Molecular Dynamics: Numerical Methods and
+  Algorithmic Techniques", Parallel Computing, in press.
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of
+  the License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+  See the GNU General Public License for more details:
+  <http://www.gnu.org/licenses/>.
+  ----------------------------------------------------------------------*/
+
+#ifndef __BONDS_OMP_H_
+#define __BONDS_OMP_H_
+
+#include "reaxc_types.h"
+
+void BondsOMP( reax_system*, control_params*, simulation_data*,
+            storage*, reax_list**, output_controls* );
+
+#endif
diff --git a/src/USER-OMP/reaxc_forces_omp.cpp b/src/USER-OMP/reaxc_forces_omp.cpp
new file mode 100644
index 0000000000..1bf04129e0
--- /dev/null
+++ b/src/USER-OMP/reaxc_forces_omp.cpp
@@ -0,0 +1,637 @@
+/*----------------------------------------------------------------------
+  PuReMD - Purdue ReaxFF Molecular Dynamics Program
+
+  Copyright (2010) Purdue University
+  Hasan Metin Aktulga, hmaktulga@lbl.gov
+  Joseph Fogarty, jcfogart@mail.usf.edu
+  Sagar Pandit, pandit@usf.edu
+  Ananth Y Grama, ayg@cs.purdue.edu
+
+  Please cite the related publication:
+  H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
+  "Parallel Reactive Molecular Dynamics: Numerical Methods and
+  Algorithmic Techniques", Parallel Computing, in press.
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of
+  the License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+  See the GNU General Public License for more details:
+  <http://www.gnu.org/licenses/>.
+  ----------------------------------------------------------------------*/
+
+#include "pair_reaxc_omp.h"
+#include "omp.h"
+#include "thr_data.h"
+
+#include "reaxc_forces_omp.h"
+#include "reaxc_bond_orders_omp.h"
+#include "reaxc_bonds_omp.h"
+#include "reaxc_hydrogen_bonds_omp.h"
+#include "reaxc_io_tools.h"
+#include "reaxc_list.h"
+#include "reaxc_lookup.h"
+#include "reaxc_multi_body_omp.h"
+#include "reaxc_nonbonded_omp.h"
+#include "reaxc_tool_box.h"
+#include "reaxc_torsion_angles_omp.h"
+#include "reaxc_valence_angles_omp.h"
+#include "reaxc_vector.h"
+
+using namespace LAMMPS_NS;
+
+// Functions defined in reaxc_forces.cpp
+extern interaction_function Interaction_Functions[];
+extern double Compute_H(double, double, double*);
+extern double Compute_tabH(double, int, int);
+extern void Dummy_Interaction(reax_system*, control_params*, simulation_data*, storage*, reax_list**, output_controls*);
+
+/* ---------------------------------------------------------------------- */
+
+void Init_Force_FunctionsOMP( control_params *control )
+{
+  Interaction_Functions[0] = BOOMP;
+  Interaction_Functions[1] = BondsOMP; //Dummy_Interaction;
+  Interaction_Functions[2] = Atom_EnergyOMP; //Dummy_Interaction;
+  Interaction_Functions[3] = Valence_AnglesOMP; //Dummy_Interaction;
+  Interaction_Functions[4] = Torsion_AnglesOMP; //Dummy_Interaction;
+  if( control->hbond_cut > 0 )
+    Interaction_Functions[5] = Hydrogen_BondsOMP;
+  else Interaction_Functions[5] = Dummy_Interaction;
+  Interaction_Functions[6] = Dummy_Interaction; //empty
+  Interaction_Functions[7] = Dummy_Interaction; //empty
+  Interaction_Functions[8] = Dummy_Interaction; //empty
+  Interaction_Functions[9] = Dummy_Interaction; //empty
+}
+
+/* ---------------------------------------------------------------------- */
+
+// Only difference with MPI-only version is inclusion of OMP_TIMING statements
+void Compute_Bonded_ForcesOMP( reax_system *system, control_params *control,
+                            simulation_data *data, storage *workspace,
+                            reax_list **lists, output_controls *out_control,
+                            MPI_Comm comm )
+{
+  int i;
+  
+#ifdef OMP_TIMING
+  double startTimeBase, endTimeBase;
+  startTimeBase = MPI_Wtime();
+#endif
+
+  /* Implement all force calls as function pointers */
+  for( i = 0; i < NUM_INTRS; i++ ) {
+    (Interaction_Functions[i])( system, control, data, workspace,
+                                lists, out_control );
+  }
+
+#ifdef OMP_TIMING
+  endTimeBase = MPI_Wtime();
+  ompTimingData[COMPUTEBFINDEX] += (endTimeBase-startTimeBase);
+#endif
+
+}
+
+// Only difference with MPI-only version is inclusion of OMP_TIMING statements
+void Compute_NonBonded_ForcesOMP( reax_system *system, control_params *control,
+                               simulation_data *data, storage *workspace,
+                               reax_list **lists, output_controls *out_control,
+                               MPI_Comm comm )
+{
+  /* van der Waals and Coulomb interactions */
+#ifdef OMP_TIMING
+  double endTimeBase, startTimeBase;
+  startTimeBase = MPI_Wtime();
+#endif
+
+  if( control->tabulate == 0 )
+    vdW_Coulomb_Energy_OMP( system, control, data, workspace,
+			    lists, out_control );
+  else
+    Tabulated_vdW_Coulomb_Energy_OMP( system, control, data, workspace,
+				      lists, out_control );
+  
+#ifdef OMP_TIMING
+  endTimeBase = MPI_Wtime();
+  ompTimingData[COMPUTENBFINDEX] += (endTimeBase-startTimeBase);
+#endif
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* this version of Compute_Total_Force computes forces from
+   coefficients accumulated by all interaction functions.
+   Saves enormous time & space! */
+void Compute_Total_ForceOMP( reax_system *system, control_params *control,
+                          simulation_data *data, storage *workspace,
+                          reax_list **lists, mpi_datatypes *mpi_data )
+{  
+#ifdef OMP_TIMING
+  double startTimeBase,endTimeBase;
+  startTimeBase = MPI_Wtime();
+#endif
+  
+  int natoms = system->N;
+  int nthreads = control->nthreads;
+  long totalReductionSize = system->N * nthreads;    
+  reax_list *bonds = (*lists) + BONDS;
+  
+#pragma omp parallel default(shared) //default(none)  
+  {
+    int i, j, k, pj, pk, start_j, end_j;
+    int tid = omp_get_thread_num();
+    bond_order_data *bo_jk;
+    
+    class PairReaxCOMP *pair_reax_ptr;
+    pair_reax_ptr = static_cast<class PairReaxCOMP*>(system->pair_ptr);
+    class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid);
+    
+    pair_reax_ptr->ev_setup_thr_proxy(0, 1, natoms, system->pair_ptr->eatom, 
+ 				      system->pair_ptr->vatom, thr);
+    
+#pragma omp for schedule(guided)
+    for (i = 0; i < system->N; ++i) {
+      for (j = 0; j < nthreads; ++j)
+	workspace->CdDelta[i] += workspace->CdDeltaReduction[system->N*j+i];
+    }
+    
+#pragma omp for schedule(dynamic,50)
+    for (j = 0; j < system->N; ++j) {
+      start_j = Start_Index(j, bonds);
+      end_j = End_Index(j, bonds);
+      
+      for (pk = start_j; pk < end_j; ++pk) {
+ 	bo_jk = &( bonds->select.bond_list[pk].bo_data );
+ 	for (k = 0; k < nthreads; ++k)
+ 	  bo_jk->Cdbo += bo_jk->CdboReduction[k];
+      }
+    }
+    
+// #pragma omp for schedule(guided) //(dynamic,50)
+//     for (i = 0; i < system->N; ++i)
+//       for (pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj)
+//  	if (i < bonds->select.bond_list[pj].nbr) {
+//  	  if (control->virial == 0)
+// 	    Add_dBond_to_ForcesOMP( system, i, pj, workspace, lists );
+// 	  else
+//  	    Add_dBond_to_Forces_NPTOMP(system, i, pj, data, workspace, lists );
+//  	}
+
+    if(control->virial == 0) {
+
+#pragma omp for schedule(dynamic,50)
+      for (i = 0; i < system->N; ++i) {
+	const int startj = Start_Index(i, bonds);
+	const int endj  = End_Index(i, bonds);
+	for (pj = startj; pj < endj; ++pj)
+	  if (i < bonds->select.bond_list[pj].nbr) 
+	    Add_dBond_to_ForcesOMP( system, i, pj, workspace, lists );
+      }
+
+    } else {
+
+#pragma omp for schedule(dynamic,50)
+      for (i = 0; i < system->N; ++i) {
+	const int startj = Start_Index(i, bonds);
+	const int endj  = End_Index(i, bonds);
+	for (pj = startj; pj < endj; ++pj)
+	  if (i < bonds->select.bond_list[pj].nbr) 
+	    Add_dBond_to_Forces_NPTOMP(system, i, pj, data, workspace, lists );
+      }
+
+    } // if(virial == 0)
+
+    pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, 0, 1, thr);
+
+#pragma omp for schedule(guided)
+    for (i = 0; i < system->N; ++i) {
+      for (j = 0; j < nthreads; ++j)
+ 	rvec_Add( workspace->f[i], workspace->forceReduction[system->N*j+i] );
+    }
+
+    
+#pragma omp for schedule(guided) 
+    for (i = 0; i < totalReductionSize; i++) {
+      workspace->forceReduction[i][0] = 0;
+      workspace->forceReduction[i][1] = 0;
+      workspace->forceReduction[i][2] = 0;
+      workspace->CdDeltaReduction[i] = 0;
+    }
+  } // parallel region
+  
+  if (control->virial)
+    for (int i=0; i < nthreads; ++i) {
+      rvec_Add(data->my_ext_press, workspace->my_ext_pressReduction[i]);
+      workspace->my_ext_pressReduction[i][0] = 0;
+      workspace->my_ext_pressReduction[i][1] = 0;
+      workspace->my_ext_pressReduction[i][2] = 0;
+    }
+
+#ifdef OMP_TIMING
+  endTimeBase = MPI_Wtime();
+  ompTimingData[COMPUTETFINDEX] += (endTimeBase-startTimeBase);
+#endif
+}
+
+/* ---------------------------------------------------------------------- */
+
+void Validate_ListsOMP( reax_system *system, storage *workspace, reax_list **lists,
+                     int step, int n, int N, int numH, MPI_Comm comm )
+{
+  int i, comp, Hindex;
+  reax_list *bonds, *hbonds;
+  reallocate_data *realloc = &(workspace->realloc);
+  double saferzone = system->saferzone;
+
+#pragma omp parallel default(shared) private(i, comp, Hindex)
+  {
+  
+  /* bond list */
+  if( N > 0 ) {
+    bonds = *lists + BONDS;
+
+#pragma omp for schedule(guided)
+    for( i = 0; i < N; ++i ) {
+      system->my_atoms[i].num_bonds = MAX(Num_Entries(i,bonds)*2, MIN_BONDS);
+
+      if( i < N-1 )
+        comp = Start_Index(i+1, bonds);
+      else comp = bonds->num_intrs;
+
+      if( End_Index(i, bonds) > comp ) {
+        fprintf( stderr, "step%d-bondchk failed: i=%d end(i)=%d str(i+1)=%d\n",
+                 step, i, End_Index(i,bonds), comp );
+        MPI_Abort( comm, INSUFFICIENT_MEMORY );
+      }
+    }
+  }
+  
+  
+  /* hbonds list */
+  if( numH > 0 ) {
+    hbonds = *lists + HBONDS;
+
+#pragma omp for schedule(guided)
+    for( i = 0; i < n; ++i ) {
+      Hindex = system->my_atoms[i].Hindex;
+      if( Hindex > -1 ) {
+        system->my_atoms[i].num_hbonds =
+          (int)(MAX( Num_Entries(Hindex, hbonds)*saferzone, MIN_HBONDS ));
+
+        if( Hindex < numH-1 )
+          comp = Start_Index(Hindex+1, hbonds);
+        else comp = hbonds->num_intrs;
+
+        if( End_Index(Hindex, hbonds) > comp ) {
+          fprintf(stderr,"step%d-hbondchk failed: H=%d end(H)=%d str(H+1)=%d\n",
+                  step, Hindex, End_Index(Hindex,hbonds), comp );
+          MPI_Abort( comm, INSUFFICIENT_MEMORY );
+        }
+      }
+    }
+  }
+  
+  } // omp parallel
+}
+
+
+void Init_Forces_noQEq_OMP( reax_system *system, control_params *control,
+			    simulation_data *data, storage *workspace,
+			    reax_list **lists, output_controls *out_control,
+			    MPI_Comm comm ) {
+#ifdef OMP_TIMING
+  double startTimeBase, endTimeBase;
+  startTimeBase = MPI_Wtime();
+#endif
+  
+  int i, j, pi, pj;
+  int start_i, end_i, start_j, end_j;
+  int type_i, type_j;
+  int ihb, jhb, ihb_top, jhb_top;
+  int local, flag;
+  double r_ij, cutoff;
+  single_body_parameters *sbp_i, *sbp_j;
+  two_body_parameters *twbp;
+  far_neighbor_data *nbr_pj;
+  reax_atom *atom_i, *atom_j;
+  bond_data *ibond, *jbond;
+  reax_list *far_nbrs = *lists + FAR_NBRS;
+  reax_list *bonds = *lists + BONDS;
+  reax_list *hbonds = *lists + HBONDS;
+  int num_bonds = 0;
+  int num_hbonds = 0;
+  int btop_i = 0;
+  int btop_j = 0;
+  int renbr = (data->step-data->prev_steps) % control->reneighbor == 0;
+
+  // We will use CdDeltaReduction as a temporary (double) buffer to accumulate total_bond_order
+  // This is safe because CdDeltaReduction is currently zeroed and its accumulation doesn't start until BondsOMP()
+  double * tmp_bond_order = workspace->CdDeltaReduction;
+
+  // We do the same with forceReduction as a temporary (rvec) buffer to accumulate dDeltap_self
+  // This is safe because forceReduction is currently zeroed and its accumulation does start until Hydrogen_BondsOMP()
+  rvec * tmp_ddelta = workspace->forceReduction;
+
+  /* uncorrected bond orders */
+  cutoff = control->bond_cut;
+  
+#pragma omp parallel default(shared)					\
+  private(i, atom_i, type_i, pi, start_i, end_i, sbp_i, btop_i, ibond, ihb, ihb_top, \
+	  j, atom_j, type_j, pj, start_j, end_j, sbp_j, nbr_pj, jbond, jhb, twbp)
+  {
+
+    int nthreads = control->nthreads;
+    int tid = omp_get_thread_num();
+    long reductionOffset = system->N * tid;
+    long totalReductionSize = system->N * nthreads;
+
+#pragma omp for schedule(dynamic,50) reduction(+ : num_bonds)
+//#pragma omp for schedule(guided) reduction(+ : num_bonds)
+  for (i = 0; i < system->N; ++i) {
+    atom_i = &(system->my_atoms[i]);
+    type_i  = atom_i->type;
+    sbp_i = &(system->reax_param.sbp[type_i]);
+
+    start_i = Start_Index(i, far_nbrs);
+    end_i   = End_Index(i, far_nbrs);
+
+    for( pj = start_i; pj < end_i; ++pj ) {
+      nbr_pj = &( far_nbrs->select.far_nbr_list[pj] );
+      if (nbr_pj->d <= cutoff) {
+	j = nbr_pj->nbr;
+	atom_j = &(system->my_atoms[j]);
+	type_j = atom_j->type;
+	sbp_j = &(system->reax_param.sbp[type_j]);
+	twbp = &(system->reax_param.tbp[type_i][type_j]);
+
+// #pragma omp critical
+// 	{
+// 	  btop_i = End_Index(i, bonds);
+// 	  if( BOp(workspace, bonds, control->bo_cut, i, btop_i, nbr_pj, sbp_i, sbp_j, twbp) ) {
+//             num_bonds++;
+//             btop_i++;
+//             Set_End_Index(i, btop_i, bonds);
+// 	  }
+	  
+// 	}
+	
+	// Trying to minimize time spent in critical section by moving initial part of BOp()
+	// outside of critical section.
+
+	// Start top portion of BOp()
+	int jj = nbr_pj->nbr;
+	double C12, C34, C56;
+	double BO, BO_s, BO_pi, BO_pi2;
+	double bo_cut = control->bo_cut;
+
+	if( sbp_i->r_s > 0.0 && sbp_j->r_s > 0.0 ) {
+	  C12 = twbp->p_bo1 * pow( nbr_pj->d / twbp->r_s, twbp->p_bo2 );
+	  BO_s = (1.0 + bo_cut) * exp( C12 );
+	}
+	else BO_s = C12 = 0.0;
+	
+	if( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0 ) {
+	  C34 = twbp->p_bo3 * pow( nbr_pj->d / twbp->r_p, twbp->p_bo4 );
+	  BO_pi = exp( C34 );
+	}
+	else BO_pi = C34 = 0.0;
+	
+	if( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0 ) {
+	  C56 = twbp->p_bo5 * pow( nbr_pj->d / twbp->r_pp, twbp->p_bo6 );
+	  BO_pi2= exp( C56 );
+	}
+	else BO_pi2 = C56 = 0.0;
+	
+	/* Initially BO values are the uncorrected ones, page 1 */
+	BO = BO_s + BO_pi + BO_pi2;
+	// End top portion of BOp()
+	
+	if(BO >= bo_cut) {
+	  int btop_j;
+	  
+	  // Update indices in critical section
+#pragma omp critical
+ 	  {
+	    btop_i = End_Index( i, bonds );
+	    btop_j = End_Index( j, bonds );
+	    Set_End_Index( j, btop_j+1, bonds );
+	    Set_End_Index( i, btop_i+1, bonds );
+ 	  } // omp critical
+	  
+	  // Finish remaining BOp() work
+	  BOp_OMP(workspace, bonds, bo_cut,
+		  i , btop_i, nbr_pj, sbp_i, sbp_j, twbp, btop_j,
+		  C12, C34, C56, BO, BO_s, BO_pi, BO_pi2);
+	  
+ 	  bond_data * ibond = &(bonds->select.bond_list[btop_i]);
+ 	  bond_order_data * bo_ij = &(ibond->bo_data);
+	  
+ 	  bond_data * jbond = &(bonds->select.bond_list[btop_j]);
+ 	  bond_order_data * bo_ji = &(jbond->bo_data);
+	  
+	  workspace->total_bond_order[i]      += bo_ij->BO;
+	  tmp_bond_order[reductionOffset + j] += bo_ji->BO;
+	  
+	  rvec_Add(workspace->dDeltap_self[i],      bo_ij->dBOp);
+	  rvec_Add(tmp_ddelta[reductionOffset + j], bo_ji->dBOp);
+	  
+	  btop_i++;
+	  num_bonds++;
+	} // if(BO>=bo_cut)
+	
+      } // if(cutoff)
+            
+    } // for(pj)
+  } // for(i)
+
+  // Need to wait for all indices and tmp arrays accumulated.
+#pragma omp barrier
+
+#pragma omp for schedule(dynamic,50)
+  for(i=0; i<system->N; i++) 
+    for(int t=0; t<nthreads; t++) {
+      const int indx = t*system->N + i;
+      workspace->dDeltap_self[i][0]  += tmp_ddelta[indx][0];
+      workspace->dDeltap_self[i][1]  += tmp_ddelta[indx][1];
+      workspace->dDeltap_self[i][2]  += tmp_ddelta[indx][2];
+      workspace->total_bond_order[i] += tmp_bond_order[indx];
+    }
+
+  // Not needed anymore with newest BOp_OMP()?
+  //#pragma omp for schedule(guided)
+// #pragma omp for schedule(dynamic,50)
+//   for (i = 0; i < system->N; ++i) {
+//     start_i = Start_Index(i, bonds);
+//     end_i   = End_Index(i, bonds);
+    
+//     for (pi = start_i; pi < end_i; ++pi) {
+//       ibond = &(bonds->select.bond_list[pi]);
+//       j = ibond->nbr;
+      
+//       if (i < j) {
+//  	start_j = Start_Index(j, bonds);
+//  	end_j   = End_Index(j, bonds);
+	
+//  	for (pj = start_j; pj < end_j; ++pj) {
+//  	  jbond = &(bonds->select.bond_list[pj]);
+	  
+//  	  if (jbond->nbr == i) {
+//  	    ibond->sym_index = pj;
+//  	    jbond->sym_index = pi;
+//  	    break;
+//  	  }
+//  	}
+//       }
+//     }
+//    }
+  
+  /* hydrogen bond list */
+  if (control->hbond_cut > 0) {
+    cutoff = control->hbond_cut;
+
+//#pragma omp for schedule(guided) reduction(+ : num_hbonds)    
+#pragma omp for schedule(dynamic,50) reduction(+ : num_hbonds)    
+     for (i = 0; i < system->n; ++i) {
+       atom_i = &(system->my_atoms[i]);
+       type_i  = atom_i->type;
+       sbp_i = &(system->reax_param.sbp[type_i]);
+       ihb = sbp_i->p_hbond;
+
+#pragma omp critical
+       {
+       
+       if (ihb == 1 || ihb == 2) {
+	 start_i = Start_Index(i, far_nbrs);
+	 end_i   = End_Index(i, far_nbrs);
+	 
+	 for (pj = start_i; pj < end_i; ++pj) {
+	   nbr_pj = &( far_nbrs->select.far_nbr_list[pj] );
+	   j = nbr_pj->nbr;
+	   atom_j = &(system->my_atoms[j]);
+	   type_j = atom_j->type;
+	   if(type_j < 0) continue;
+	   sbp_j = &(system->reax_param.sbp[type_j]);
+	   jhb = sbp_j->p_hbond;
+
+	   if (nbr_pj->d <= control->hbond_cut) {
+	     int iflag = 0;
+	     int jflag = 0;
+	     
+	     if(ihb==1 && jhb==2) iflag = 1;
+	     else if(j<system->n && ihb == 2 && jhb == 1) jflag = 1;
+	     
+	     if(iflag || jflag) {
+	     
+	       // This critical section enforces H-bonds to be added by threads one at a time.
+// #pragma omp critical
+// 	       {
+		 if(iflag) {
+		   ihb_top = End_Index(atom_i->Hindex, hbonds);
+		   Set_End_Index(atom_i->Hindex, ihb_top+1, hbonds);
+		 } else if(jflag) {
+		   jhb_top = End_Index(atom_j->Hindex, hbonds);
+		   Set_End_Index(atom_j->Hindex, jhb_top+1, hbonds);
+		 }
+	       // } // omp critical
+
+	       if(iflag) {
+		 hbonds->select.hbond_list[ihb_top].nbr = j;
+		 hbonds->select.hbond_list[ihb_top].scl = 1;
+		 hbonds->select.hbond_list[ihb_top].ptr = nbr_pj;
+	       } else if(jflag) {
+		 hbonds->select.hbond_list[jhb_top].nbr = i;
+		 hbonds->select.hbond_list[jhb_top].scl = -1;
+		 hbonds->select.hbond_list[jhb_top].ptr = nbr_pj;
+	       }
+	       
+	       num_hbonds++;
+	     } // if(iflag || jflag)
+
+	   }
+	 }
+       }
+       
+       } // omp critical
+     }
+
+  } // if(control->hbond > 0)
+
+  // Zero buffers for others to use as intended.
+#pragma omp for schedule(guided)
+  for(i=0; i<totalReductionSize; i++) {
+    tmp_ddelta[i][0]  = 0.0;
+    tmp_ddelta[i][1]  = 0.0;
+    tmp_ddelta[i][2]  = 0.0;
+    tmp_bond_order[i] = 0.0;
+  }
+
+  } // omp
+
+  workspace->realloc.num_bonds = num_bonds;
+  workspace->realloc.num_hbonds = num_hbonds;
+
+  Validate_ListsOMP( system, workspace, lists, data->step,
+                  system->n, system->N, system->numH, comm );
+
+#ifdef OMP_TIMING
+  endTimeBase = MPI_Wtime();
+  ompTimingData[COMPUTEIFINDEX] += (endTimeBase-startTimeBase);
+#endif
+}
+
+/* ---------------------------------------------------------------------- */
+
+void Compute_ForcesOMP( reax_system *system, control_params *control,
+                     simulation_data *data, storage *workspace,
+                     reax_list **lists, output_controls *out_control,
+                     mpi_datatypes *mpi_data )
+{
+  int qeq_flag;
+  MPI_Comm comm = mpi_data->world;
+  
+  // Init Forces  
+#if defined(LOG_PERFORMANCE)
+  double t_start = 0;
+  if( system->my_rank == MASTER_NODE )
+    t_start = Get_Time( );
+#endif
+
+  Init_Forces_noQEq_OMP( system, control, data, workspace,
+			 lists, out_control, comm );
+  
+#if defined(LOG_PERFORMANCE)
+  //MPI_Barrier( comm );
+  if( system->my_rank == MASTER_NODE )
+    Update_Timing_Info( &t_start, &(data->timing.init_forces) );
+#endif
+  
+  // Bonded Interactions
+  Compute_Bonded_ForcesOMP( system, control, data, workspace,
+                         lists, out_control, mpi_data->world );
+
+#if defined(LOG_PERFORMANCE)
+  if( system->my_rank == MASTER_NODE )
+    Update_Timing_Info( &t_start, &(data->timing.bonded) );
+#endif
+  
+  // Nonbonded Interactions
+  Compute_NonBonded_ForcesOMP( system, control, data, workspace,
+                            lists, out_control, mpi_data->world );
+  
+#if defined(LOG_PERFORMANCE)
+  if( system->my_rank == MASTER_NODE )
+    Update_Timing_Info( &t_start, &(data->timing.nonb) );
+#endif
+  
+  // Total Force
+  Compute_Total_ForceOMP( system, control, data, workspace, lists, mpi_data );
+  
+#if defined(LOG_PERFORMANCE)
+  if( system->my_rank == MASTER_NODE )
+    Update_Timing_Info( &t_start, &(data->timing.bonded) );
+#endif
+}
diff --git a/src/USER-OMP/reaxc_forces_omp.h b/src/USER-OMP/reaxc_forces_omp.h
new file mode 100644
index 0000000000..f4941fc7fa
--- /dev/null
+++ b/src/USER-OMP/reaxc_forces_omp.h
@@ -0,0 +1,36 @@
+/*----------------------------------------------------------------------
+  PuReMD - Purdue ReaxFF Molecular Dynamics Program
+
+  Copyright (2010) Purdue University
+  Hasan Metin Aktulga, hmaktulga@lbl.gov
+  Joseph Fogarty, jcfogart@mail.usf.edu
+  Sagar Pandit, pandit@usf.edu
+  Ananth Y Grama, ayg@cs.purdue.edu
+
+  Please cite the related publication:
+  H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
+  "Parallel Reactive Molecular Dynamics: Numerical Methods and
+  Algorithmic Techniques", Parallel Computing, in press.
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of
+  the License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+  See the GNU General Public License for more details:
+  <http://www.gnu.org/licenses/>.
+  ----------------------------------------------------------------------*/
+
+#ifndef __FORCES_OMP_H_
+#define __FORCES_OMP_H_
+
+#include "reaxc_types.h"
+#include "reaxc_defs.h"
+
+void Init_Force_FunctionsOMP( control_params* );
+void Compute_ForcesOMP( reax_system*, control_params*, simulation_data*,
+                     storage*, reax_list**, output_controls*, mpi_datatypes* );
+#endif
diff --git a/src/USER-OMP/reaxc_hydrogen_bonds_omp.cpp b/src/USER-OMP/reaxc_hydrogen_bonds_omp.cpp
new file mode 100644
index 0000000000..5047e2775e
--- /dev/null
+++ b/src/USER-OMP/reaxc_hydrogen_bonds_omp.cpp
@@ -0,0 +1,242 @@
+/*----------------------------------------------------------------------
+  PuReMD - Purdue ReaxFF Molecular Dynamics Program
+
+  Copyright (2010) Purdue University
+  Hasan Metin Aktulga, hmaktulga@lbl.gov
+  Joseph Fogarty, jcfogart@mail.usf.edu
+  Sagar Pandit, pandit@usf.edu
+  Ananth Y Grama, ayg@cs.purdue.edu
+
+  Please cite the related publication:
+  H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
+  "Parallel Reactive Molecular Dynamics: Numerical Methods and
+  Algorithmic Techniques", Parallel Computing, in press.
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of
+  the License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+  See the GNU General Public License for more details:
+  <http://www.gnu.org/licenses/>.
+  ----------------------------------------------------------------------*/
+
+#include "pair_reaxc_omp.h"
+#include  <omp.h>
+
+#include "reaxc_hydrogen_bonds_omp.h"
+#include "reaxc_bond_orders_omp.h"
+#include "reaxc_list.h"
+#include "reaxc_valence_angles.h"     // To access Calculate_Theta()
+#include "reaxc_valence_angles_omp.h" // To access Calculate_dCos_ThetaOMP()
+#include "reaxc_vector.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void Hydrogen_BondsOMP( reax_system *system, control_params *control,
+                     simulation_data *data, storage *workspace,
+                     reax_list **lists, output_controls *out_control )
+{
+#ifdef OMP_TIMING
+  double endTimeBase, startTimeBase;
+  startTimeBase = MPI_Wtime();
+#endif
+  
+  const int nthreads = control->nthreads;
+  long totalReductionSize = system->N;
+  
+#pragma omp parallel default(shared) //default(none)
+ {
+  int  i, j, k, pi, pk;
+  int  type_i, type_j, type_k;
+  int  start_j, end_j, hb_start_j, hb_end_j;
+  int  hblist[MAX_BONDS];
+  int  itr, top;
+  int  num_hb_intrs = 0;
+  ivec rel_jk;
+  double r_jk, theta, cos_theta, sin_xhz4, cos_xhz1, sin_theta2;
+  double e_hb, e_hb_thr = 0.0, exp_hb2, exp_hb3, CEhb1, CEhb2, CEhb3;
+  rvec dcos_theta_di, dcos_theta_dj, dcos_theta_dk;
+  rvec dvec_jk, force, ext_press;
+  hbond_parameters *hbp;
+  bond_order_data *bo_ij;
+  bond_data *pbond_ij;
+  far_neighbor_data *nbr_jk;
+  reax_list *bonds, *hbonds;
+  bond_data *bond_list;
+  hbond_data *hbond_list;
+
+  // tally variables
+  double fi_tmp[3], fk_tmp[3], delij[3], delkj[3];
+
+  bonds = (*lists) + BONDS;
+  bond_list = bonds->select.bond_list;
+  hbonds = (*lists) + HBONDS;
+  hbond_list = hbonds->select.hbond_list;
+
+  int natoms = system->n;
+  int tid = omp_get_thread_num();
+  const int idelta = 1 + natoms/nthreads;
+  int ifrom = tid*idelta;
+  int ito   = ((ifrom + idelta) > natoms) ? natoms : ifrom + idelta;
+
+  long reductionOffset = (system->N * tid);
+
+  class PairReaxCOMP *pair_reax_ptr;
+  pair_reax_ptr = static_cast<class PairReaxCOMP*>(system->pair_ptr);
+
+  class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid);
+
+  pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, 
+				    system->pair_ptr->vflag_either, 
+				    natoms, system->pair_ptr->eatom, 
+				    system->pair_ptr->vatom, thr);
+  
+  /* loops below discover the Hydrogen bonds between i-j-k triplets.
+     here j is H atom and there has to be some bond between i and j.
+     Hydrogen bond is between j and k.
+     so in this function i->X, j->H, k->Z when we map
+     variables onto the ones in the handout.*/
+  //  for( j = 0; j < system->n; ++j )
+  for( j = ifrom; j < ito; ++j ) {
+    /* j has to be of type H */
+    if( system->reax_param.sbp[system->my_atoms[j].type].p_hbond == 1 ) {
+      /*set j's variables */
+      type_j     = system->my_atoms[j].type;
+      start_j    = Start_Index(j, bonds);
+      end_j      = End_Index(j, bonds);
+      hb_start_j = Start_Index( system->my_atoms[j].Hindex, hbonds );
+      hb_end_j   = End_Index( system->my_atoms[j].Hindex, hbonds );
+      if(type_j < 0) continue;
+      
+      top = 0;
+      for( pi = start_j; pi < end_j; ++pi )  {
+        pbond_ij = &( bond_list[pi] );
+        i = pbond_ij->nbr;
+        type_i = system->my_atoms[i].type;
+	if(type_i < 0) continue;
+        bo_ij = &(pbond_ij->bo_data);
+
+        if( system->reax_param.sbp[type_i].p_hbond == 2 &&
+            bo_ij->BO >= HB_THRESHOLD )
+          hblist[top++] = pi;
+      }
+
+      for( pk = hb_start_j; pk < hb_end_j; ++pk ) {
+        /* set k's varibles */
+        k = hbond_list[pk].nbr;
+        type_k = system->my_atoms[k].type;
+	if(type_k < 0) continue;
+        nbr_jk = hbond_list[pk].ptr;
+        r_jk = nbr_jk->d;
+        rvec_Scale( dvec_jk, hbond_list[pk].scl, nbr_jk->dvec );
+
+        for( itr = 0; itr < top; ++itr ) {
+          pi = hblist[itr];
+          pbond_ij = &( bonds->select.bond_list[pi] );
+          i = pbond_ij->nbr;
+
+          if( system->my_atoms[i].orig_id != system->my_atoms[k].orig_id ) {
+            bo_ij = &(pbond_ij->bo_data);
+            type_i = system->my_atoms[i].type;
+	    if(type_i < 0) continue;
+            hbp = &(system->reax_param.hbp[ type_i ][ type_j ][ type_k ]);
+            ++num_hb_intrs;
+
+            Calculate_Theta( pbond_ij->dvec, pbond_ij->d, dvec_jk, r_jk,
+                             &theta, &cos_theta );
+            /* the derivative of cos(theta) */
+            Calculate_dCos_ThetaOMP( pbond_ij->dvec, pbond_ij->d, dvec_jk, r_jk,
+				     &dcos_theta_di, &dcos_theta_dj,
+				     &dcos_theta_dk );
+
+            /* hydrogen bond energy*/
+            sin_theta2 = sin( theta/2.0 );
+            sin_xhz4 = SQR(sin_theta2);
+            sin_xhz4 *= sin_xhz4;
+            cos_xhz1 = ( 1.0 - cos_theta );
+            exp_hb2 = exp( -hbp->p_hb2 * bo_ij->BO );
+            exp_hb3 = exp( -hbp->p_hb3 * ( hbp->r0_hb / r_jk +
+                                           r_jk / hbp->r0_hb - 2.0 ) );
+
+            e_hb_thr += e_hb = hbp->p_hb1 * (1.0 - exp_hb2) * exp_hb3 * sin_xhz4;
+
+            CEhb1 = hbp->p_hb1 * hbp->p_hb2 * exp_hb2 * exp_hb3 * sin_xhz4;
+            CEhb2 = -hbp->p_hb1/2.0 * (1.0 - exp_hb2) * exp_hb3 * cos_xhz1;
+            CEhb3 = -hbp->p_hb3 *
+              (-hbp->r0_hb / SQR(r_jk) + 1.0 / hbp->r0_hb) * e_hb;
+
+            /* hydrogen bond forces */
+            bo_ij->Cdbo += CEhb1; // dbo term
+
+            if( control->virial == 0 ) {
+	      // dcos terms
+              rvec_ScaledAdd(workspace->forceReduction[reductionOffset+i], +CEhb2, dcos_theta_di );
+              rvec_ScaledAdd(workspace->forceReduction[reductionOffset+j], +CEhb2, dcos_theta_dj );
+              rvec_ScaledAdd(workspace->forceReduction[reductionOffset+k], +CEhb2, dcos_theta_dk );
+              // dr terms
+              rvec_ScaledAdd(workspace->forceReduction[reductionOffset+j], -CEhb3/r_jk, dvec_jk );
+              rvec_ScaledAdd(workspace->forceReduction[reductionOffset+k], +CEhb3/r_jk, dvec_jk );
+            }
+            else {
+              /* for pressure coupling, terms that are not related to bond order
+                 derivatives are added directly into pressure vector/tensor */
+              rvec_Scale( force, +CEhb2, dcos_theta_di ); // dcos terms
+	      rvec_Add(workspace->forceReduction[reductionOffset+i], force );
+              rvec_iMultiply( ext_press, pbond_ij->rel_box, force );
+              rvec_ScaledAdd( workspace->my_ext_pressReduction[tid],1.0, ext_press );
+
+              rvec_ScaledAdd(workspace->forceReduction[reductionOffset+j], +CEhb2, dcos_theta_dj );
+
+              ivec_Scale( rel_jk, hbond_list[pk].scl, nbr_jk->rel_box );
+              rvec_Scale( force, +CEhb2, dcos_theta_dk );
+              rvec_Add(workspace->forceReduction[reductionOffset+k], force );
+              rvec_iMultiply( ext_press, rel_jk, force );
+              rvec_ScaledAdd( workspace->my_ext_pressReduction[tid],1.0, ext_press );
+              // dr terms
+              rvec_ScaledAdd(workspace->forceReduction[reductionOffset+j],-CEhb3/r_jk, dvec_jk );
+
+              rvec_Scale( force, CEhb3/r_jk, dvec_jk );
+              rvec_Add(workspace->forceReduction[reductionOffset+k], force );
+              rvec_iMultiply( ext_press, rel_jk, force );
+              rvec_ScaledAdd( workspace->my_ext_pressReduction[tid],1.0, ext_press );
+            }
+
+            /* tally into per-atom virials */
+            if (system->pair_ptr->vflag_atom || system->pair_ptr->evflag) {
+              rvec_ScaledSum( delij, 1., system->my_atoms[j].x,
+                                    -1., system->my_atoms[i].x );
+              rvec_ScaledSum( delkj, 1., system->my_atoms[j].x,
+                                     -1., system->my_atoms[k].x );
+
+              rvec_Scale(fi_tmp, CEhb2, dcos_theta_di);
+              rvec_Scale(fk_tmp, CEhb2, dcos_theta_dk);
+              rvec_ScaledAdd(fk_tmp, CEhb3/r_jk, dvec_jk);
+
+              pair_reax_ptr->ev_tally3_thr_proxy(system->pair_ptr,i,j,k,e_hb,0.0,fi_tmp,fk_tmp,delij,delkj,thr);
+	    }
+          }
+        }
+      }
+
+    }
+  }
+#pragma omp critical
+  {
+    data->my_en.e_hb += e_hb_thr;
+  }
+  
+  pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, system->pair_ptr->eflag_either, 
+				  system->pair_ptr->vflag_either, thr);
+}
+
+#ifdef OMP_TIMING
+  endTimeBase = MPI_Wtime();
+  ompTimingData[COMPUTEHBONDSINDEX] += (endTimeBase-startTimeBase);
+#endif
+}
diff --git a/src/USER-OMP/reaxc_hydrogen_bonds_omp.h b/src/USER-OMP/reaxc_hydrogen_bonds_omp.h
new file mode 100644
index 0000000000..b4a2d78dbb
--- /dev/null
+++ b/src/USER-OMP/reaxc_hydrogen_bonds_omp.h
@@ -0,0 +1,35 @@
+/*----------------------------------------------------------------------
+  PuReMD - Purdue ReaxFF Molecular Dynamics Program
+
+  Copyright (2010) Purdue University
+  Hasan Metin Aktulga, hmaktulga@lbl.gov
+  Joseph Fogarty, jcfogart@mail.usf.edu
+  Sagar Pandit, pandit@usf.edu
+  Ananth Y Grama, ayg@cs.purdue.edu
+
+  Please cite the related publication:
+  H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
+  "Parallel Reactive Molecular Dynamics: Numerical Methods and
+  Algorithmic Techniques", Parallel Computing, in press.
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of
+  the License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+  See the GNU General Public License for more details:
+  <http://www.gnu.org/licenses/>.
+  ----------------------------------------------------------------------*/
+
+#ifndef __HBONDS_OMP_H_
+#define __HBONDS_OMP_H_
+
+#include "reaxc_types.h"
+
+void Hydrogen_BondsOMP( reax_system*, control_params*, simulation_data*,
+                     storage*, reax_list**, output_controls* );
+
+#endif
diff --git a/src/USER-OMP/reaxc_init_md_omp.cpp b/src/USER-OMP/reaxc_init_md_omp.cpp
new file mode 100644
index 0000000000..1584d5e53e
--- /dev/null
+++ b/src/USER-OMP/reaxc_init_md_omp.cpp
@@ -0,0 +1,188 @@
+/*----------------------------------------------------------------------
+  PuReMD - Purdue ReaxFF Molecular Dynamics Program
+
+  Copyright (2010) Purdue University
+  Hasan Metin Aktulga, hmaktulga@lbl.gov
+  Joseph Fogarty, jcfogart@mail.usf.edu
+  Sagar Pandit, pandit@usf.edu
+  Ananth Y Grama, ayg@cs.purdue.edu
+
+  Please cite the related publication:
+  H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
+  "Parallel Reactive Molecular Dynamics: Numerical Methods and
+  Algorithmic Techniques", Parallel Computing, in press.
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of
+  the License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+  See the GNU General Public License for more details:
+  <http://www.gnu.org/licenses/>.
+  ----------------------------------------------------------------------*/
+
+#include "pair_reaxc_omp.h"
+#include "reaxc_init_md_omp.h"
+#include "reaxc_allocate.h"
+#include "reaxc_forces.h"
+#include "reaxc_forces_omp.h"
+#include "reaxc_io_tools.h"
+#include "reaxc_list.h"
+#include "reaxc_lookup.h"
+#include "reaxc_reset_tools.h"
+#include "reaxc_system_props.h"
+#include "reaxc_tool_box.h"
+#include "reaxc_vector.h"
+#include "omp.h"
+
+// Functions definedd in reaxc_init_md.cpp
+extern int Init_MPI_Datatypes(reax_system*, storage*, mpi_datatypes*, MPI_Comm, char*);
+extern int Init_System(reax_system*, control_params*, char*);
+extern int Init_Simulation_Data(reax_system*, control_params*, simulation_data*, char*);
+extern int Init_Workspace(reax_system*, control_params*, storage*, MPI_Comm, char*);
+
+/* ---------------------------------------------------------------------- */
+
+int Init_ListsOMP( reax_system *system, control_params *control,
+                 simulation_data *data, storage *workspace, reax_list **lists,
+                 mpi_datatypes *mpi_data, char *msg )
+{
+  int i, total_hbonds, total_bonds, bond_cap, num_3body, cap_3body, Htop;
+  int *hb_top, *bond_top;
+  MPI_Comm comm;
+  
+  int TWICE = 2;
+  int mincap = system->mincap;
+  double safezone = system->safezone;
+  double saferzone = system->saferzone;
+
+  comm = mpi_data->world;
+  bond_top = (int*) calloc( system->total_cap, sizeof(int) );
+  hb_top = (int*) calloc( system->local_cap, sizeof(int) );
+  Estimate_Storages( system, control, lists,
+                     &Htop, hb_top, bond_top, &num_3body, comm );
+
+  if( control->hbond_cut > 0 ) {
+    /* init H indexes */
+    total_hbonds = 0;
+    for( i = 0; i < system->n; ++i ) {
+      system->my_atoms[i].num_hbonds = hb_top[i];
+      total_hbonds += hb_top[i];
+    }
+    total_hbonds = (int)(MAX( total_hbonds*saferzone, mincap*MIN_HBONDS ));
+
+    if( !Make_List( system->Hcap, total_hbonds, TYP_HBOND,
+                    *lists+HBONDS, comm ) ) {
+      fprintf( stderr, "not enough space for hbonds list. terminating!\n" );
+      MPI_Abort( comm, INSUFFICIENT_MEMORY );
+    }
+  }
+
+  total_bonds = 0;
+  for( i = 0; i < system->N; ++i ) {
+    system->my_atoms[i].num_bonds = bond_top[i];
+    total_bonds += bond_top[i];
+  }
+  bond_cap = (int)(MAX( total_bonds*safezone, mincap*MIN_BONDS ));
+
+  if( !Make_List( system->total_cap, bond_cap, TYP_BOND,
+                  *lists+BONDS, comm ) ) {
+    fprintf( stderr, "not enough space for bonds list. terminating!\n" );
+    MPI_Abort( comm, INSUFFICIENT_MEMORY );
+  }
+
+  int nthreads = control->nthreads;
+  reax_list *bonds = (*lists)+BONDS;
+  
+  for (i = 0; i < bonds->num_intrs; ++i)
+    bonds->select.bond_list[i].bo_data.CdboReduction = 
+      (double*) smalloc(sizeof(double)*nthreads, "CdboReduction", comm);
+
+  /* 3bodies list */
+  cap_3body = (int)(MAX( num_3body*safezone, MIN_3BODIES ));
+  if( !Make_List( bond_cap, cap_3body, TYP_THREE_BODY,
+		  *lists+THREE_BODIES, comm ) ){
+
+    fprintf( stderr, "Problem in initializing angles list. Terminating!\n" );
+    MPI_Abort( comm, INSUFFICIENT_MEMORY );
+  }
+
+  free( hb_top );
+  free( bond_top );
+
+  return SUCCESS;
+}
+
+/* ---------------------------------------------------------------------- */
+
+// The only difference with the MPI-only function is calls to Init_ListsOMP and Init_Force_FunctionsOMP().
+void InitializeOMP( reax_system *system, control_params *control,
+                 simulation_data *data, storage *workspace,
+                 reax_list **lists, output_controls *out_control,
+                 mpi_datatypes *mpi_data, MPI_Comm comm )
+{
+  char msg[MAX_STR];
+
+
+  if( Init_MPI_Datatypes(system, workspace, mpi_data, comm, msg) == FAILURE ) {
+    fprintf( stderr, "p%d: init_mpi_datatypes: could not create datatypes\n",
+             system->my_rank );
+    fprintf( stderr, "p%d: mpi_data couldn't be initialized! terminating.\n",
+             system->my_rank );
+    MPI_Abort( mpi_data->world, CANNOT_INITIALIZE );
+  }
+
+  if( Init_System(system, control, msg) == FAILURE ){
+    fprintf( stderr, "p%d: %s\n", system->my_rank, msg );
+    fprintf( stderr, "p%d: system could not be initialized! terminating.\n",
+             system->my_rank );
+    MPI_Abort( mpi_data->world, CANNOT_INITIALIZE );
+  }
+
+  if( Init_Simulation_Data( system, control, data, msg ) == FAILURE ) {
+    fprintf( stderr, "p%d: %s\n", system->my_rank, msg );
+    fprintf( stderr, "p%d: sim_data couldn't be initialized! terminating.\n",
+             system->my_rank );
+    MPI_Abort( mpi_data->world, CANNOT_INITIALIZE );
+  }
+
+  if( Init_Workspace( system, control, workspace, mpi_data->world, msg ) ==
+      FAILURE ) {
+    fprintf( stderr, "p%d:init_workspace: not enough memory\n",
+             system->my_rank );
+    fprintf( stderr, "p%d:workspace couldn't be initialized! terminating.\n",
+             system->my_rank );
+    MPI_Abort( mpi_data->world, CANNOT_INITIALIZE );
+  }
+
+  if( Init_ListsOMP( system, control, data, workspace, lists, mpi_data, msg ) ==
+      FAILURE ) {
+      fprintf( stderr, "p%d: %s\n", system->my_rank, msg );
+      fprintf( stderr, "p%d: system could not be initialized! terminating.\n",
+               system->my_rank );
+      MPI_Abort( mpi_data->world, CANNOT_INITIALIZE );
+    }
+
+  if( Init_Output_Files(system,control,out_control,mpi_data,msg)== FAILURE) {
+    fprintf( stderr, "p%d: %s\n", system->my_rank, msg );
+    fprintf( stderr, "p%d: could not open output files! terminating...\n",
+             system->my_rank );
+    MPI_Abort( mpi_data->world, CANNOT_INITIALIZE );
+  }
+
+  if( control->tabulate ) {
+    if( Init_Lookup_Tables( system, control, workspace, mpi_data, msg ) == FAILURE ) {
+      fprintf( stderr, "p%d: %s\n", system->my_rank, msg );
+      fprintf( stderr, "p%d: couldn't create lookup table! terminating.\n",
+               system->my_rank );
+      MPI_Abort( mpi_data->world, CANNOT_INITIALIZE );
+    }
+  }
+
+
+  Init_Force_FunctionsOMP( control );
+}
+
diff --git a/src/USER-OMP/reaxc_init_md_omp.h b/src/USER-OMP/reaxc_init_md_omp.h
new file mode 100644
index 0000000000..bb4e9c7061
--- /dev/null
+++ b/src/USER-OMP/reaxc_init_md_omp.h
@@ -0,0 +1,34 @@
+/*----------------------------------------------------------------------
+  PuReMD - Purdue ReaxFF Molecular Dynamics Program
+
+  Copyright (2010) Purdue University
+  Hasan Metin Aktulga, hmaktulga@lbl.gov
+  Joseph Fogarty, jcfogart@mail.usf.edu
+  Sagar Pandit, pandit@usf.edu
+  Ananth Y Grama, ayg@cs.purdue.edu
+
+  Please cite the related publication:
+  H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
+  "Parallel Reactive Molecular Dynamics: Numerical Methods and
+  Algorithmic Techniques", Parallel Computing, in press.
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of
+  the License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+  See the GNU General Public License for more details:
+  <http://www.gnu.org/licenses/>.
+  ----------------------------------------------------------------------*/
+
+#ifndef __INIT_MD_OMP_H_
+#define __INIT_MD_OMP_H_
+
+#include "reaxc_types.h"
+
+void InitializeOMP( reax_system*, control_params*, simulation_data*, storage*,
+                 reax_list**, output_controls*, mpi_datatypes*, MPI_Comm );
+#endif
diff --git a/src/USER-OMP/reaxc_multi_body_omp.cpp b/src/USER-OMP/reaxc_multi_body_omp.cpp
new file mode 100644
index 0000000000..a355ce8609
--- /dev/null
+++ b/src/USER-OMP/reaxc_multi_body_omp.cpp
@@ -0,0 +1,285 @@
+/*----------------------------------------------------------------------
+  PuReMD - Purdue ReaxFF Molecular Dynamics Program
+
+  Copyright (2010) Purdue University
+  Hasan Metin Aktulga, hmaktulga@lbl.gov
+  Joseph Fogarty, jcfogart@mail.usf.edu
+  Sagar Pandit, pandit@usf.edu
+  Ananth Y Grama, ayg@cs.purdue.edu
+
+  Please cite the related publication:
+  H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
+  "Parallel Reactive Molecular Dynamics: Numerical Methods and
+  Algorithmic Techniques", Parallel Computing, in press.
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of
+  the License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+  See the GNU General Public License for more details:
+  <http://www.gnu.org/licenses/>.
+  ----------------------------------------------------------------------*/
+
+#include "pair_reaxc_omp.h"
+#include  <omp.h>
+#include "thr_data.h"
+
+#include "reaxc_multi_body_omp.h"
+#include "reaxc_bond_orders_omp.h"
+#include "reaxc_list.h"
+#include "reaxc_vector.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void Atom_EnergyOMP( reax_system *system, control_params *control,
+                  simulation_data *data, storage *workspace, reax_list **lists,
+                  output_controls *out_control )
+{
+#ifdef OMP_TIMING
+  double endTimeBase, startTimeBase;
+  startTimeBase = MPI_Wtime();
+#endif
+  
+  /* Initialize parameters */
+  double p_lp1 = system->reax_param.gp.l[15];
+  double p_lp3 = system->reax_param.gp.l[5];
+  double p_ovun3 = system->reax_param.gp.l[32];
+  double p_ovun4 = system->reax_param.gp.l[31];
+  double p_ovun6 = system->reax_param.gp.l[6];
+  double p_ovun7 = system->reax_param.gp.l[8];
+  double p_ovun8 = system->reax_param.gp.l[9];
+
+  int natoms = system->n;
+  int nthreads = control->nthreads;
+  reax_list *bonds = (*lists) + BONDS;
+  
+  double total_Elp = 0.0;
+  double total_Eun = 0.0;
+  double total_Eov = 0.0;
+  
+#pragma omp parallel default(shared) reduction(+:total_Elp, total_Eun, total_Eov)
+{
+  int i, j, pj, type_i, type_j;
+  double Delta_lpcorr, dfvl;
+  double e_lp, expvd2, inv_expvd2, dElp, CElp, DlpVi;
+  double e_lph, Di, vov3, deahu2dbo, deahu2dsbo;
+  double e_ov, CEover1, CEover2, CEover3, CEover4;
+  double exp_ovun1, exp_ovun2, sum_ovun1, sum_ovun2;
+  double exp_ovun2n, exp_ovun6, exp_ovun8;
+  double inv_exp_ovun1, inv_exp_ovun2, inv_exp_ovun2n, inv_exp_ovun8;
+  double e_un, CEunder1, CEunder2, CEunder3, CEunder4;
+  double eng_tmp, f_tmp;
+  double p_lp2, p_ovun2, p_ovun5;
+  int numbonds;
+  
+  single_body_parameters *sbp_i, *sbp_j;
+  two_body_parameters *twbp;
+  bond_data *pbond;
+  bond_order_data *bo_ij;
+  
+  int tid = omp_get_thread_num();
+  
+  long reductionOffset = (system->N * tid);
+  class PairReaxCOMP *pair_reax_ptr;
+  pair_reax_ptr = static_cast<class PairReaxCOMP*>(system->pair_ptr);
+  class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid);
+  
+  pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, 
+				    system->pair_ptr->vflag_either, natoms, 
+				    system->pair_ptr->eatom, system->pair_ptr->vatom, thr);
+  
+#pragma omp for schedule(guided)
+  for ( i = 0; i < system->n; ++i) {
+    type_i = system->my_atoms[i].type;
+    if(type_i < 0) continue;
+    sbp_i = &(system->reax_param.sbp[ type_i ]);
+    
+    /* lone-pair Energy */
+    p_lp2 = sbp_i->p_lp2;
+    expvd2 = exp( -75 * workspace->Delta_lp[i] );
+    inv_expvd2 = 1. / (1. + expvd2 );
+
+    numbonds = 0;
+    e_lp = 0.0;
+    for( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj )
+      numbonds ++;
+
+    /* calculate the energy */
+    if(numbonds > 0) 
+      total_Elp += e_lp =
+	p_lp2 * workspace->Delta_lp[i] * inv_expvd2;
+    
+    dElp = p_lp2 * inv_expvd2 +
+      75 * p_lp2 * workspace->Delta_lp[i] * expvd2 * SQR(inv_expvd2);
+    CElp = dElp * workspace->dDelta_lp[i];
+    
+    if(numbonds > 0) workspace->CdDelta[i] += CElp;  // lp - 1st term
+    
+    /* tally into per-atom energy */
+    if( system->pair_ptr->evflag)
+      pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, i, system->n, 1, 
+					e_lp, 0.0, 0.0, 0.0, 0.0, 0.0, thr);
+    
+    /* correction for C2 */
+    if( p_lp3 > 0.001 && !strcmp(system->reax_param.sbp[type_i].name, "C") )
+      for( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj ) {
+        j = bonds->select.bond_list[pj].nbr;
+        type_j = system->my_atoms[j].type;
+	if(type_j < 0) continue;
+
+        if( !strcmp( system->reax_param.sbp[type_j].name, "C" ) ) {
+          twbp = &( system->reax_param.tbp[type_i][type_j]);
+          bo_ij = &( bonds->select.bond_list[pj].bo_data );
+          Di = workspace->Delta[i];
+          vov3 = bo_ij->BO - Di - 0.040*pow(Di, 4.);
+	  
+          if( vov3 > 3. ) {
+	    total_Elp += e_lph = p_lp3 * SQR(vov3-3.0);
+
+            deahu2dbo = 2.*p_lp3*(vov3 - 3.);
+            deahu2dsbo = 2.*p_lp3*(vov3 - 3.)*(-1. - 0.16*pow(Di, 3.));
+	    
+            bo_ij->Cdbo += deahu2dbo;
+            workspace->CdDelta[i] += deahu2dsbo;
+	    
+            /* tally into per-atom energy */
+            if( system->pair_ptr->evflag)
+              pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, j, system->n, 1, 
+						e_lph, 0.0, 0.0, 0.0, 0.0, 0.0, thr);
+          }
+        }
+      }
+  }
+#pragma omp barrier
+
+#pragma omp for schedule(guided)
+  for (i = 0; i < system->n; ++i) {
+    type_i = system->my_atoms[i].type;
+    if(type_i < 0) continue;
+    sbp_i = &(system->reax_param.sbp[ type_i ]);
+    
+    /* over-coordination energy */
+    if( sbp_i->mass > 21.0 )
+      dfvl = 0.0;
+    else dfvl = 1.0; // only for 1st-row elements
+    
+    p_ovun2 = sbp_i->p_ovun2;
+    sum_ovun1 = sum_ovun2 = 0;
+    for (pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj) {
+      j = bonds->select.bond_list[pj].nbr;
+      type_j = system->my_atoms[j].type;
+      if(type_j < 0) continue;
+      bo_ij = &(bonds->select.bond_list[pj].bo_data);
+      twbp = &(system->reax_param.tbp[ type_i ][ type_j ]);
+      
+      sum_ovun1 += twbp->p_ovun1 * twbp->De_s * bo_ij->BO;
+      sum_ovun2 += (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j])*
+	( bo_ij->BO_pi + bo_ij->BO_pi2 );
+    }
+    
+    exp_ovun1 = p_ovun3 * exp( p_ovun4 * sum_ovun2 );
+    inv_exp_ovun1 = 1.0 / (1 + exp_ovun1);
+    Delta_lpcorr  = workspace->Delta[i] -
+      (dfvl * workspace->Delta_lp_temp[i]) * inv_exp_ovun1;
+    
+    exp_ovun2 = exp( p_ovun2 * Delta_lpcorr );
+    inv_exp_ovun2 = 1.0 / (1.0 + exp_ovun2);
+    
+    DlpVi = 1.0 / (Delta_lpcorr + sbp_i->valency + 1e-8);
+    CEover1 = Delta_lpcorr * DlpVi * inv_exp_ovun2;
+
+    total_Eov += e_ov = sum_ovun1 * CEover1;
+
+    CEover2 = sum_ovun1 * DlpVi * inv_exp_ovun2 *
+      (1.0 - Delta_lpcorr * ( DlpVi + p_ovun2 * exp_ovun2 * inv_exp_ovun2 ));
+    
+    CEover3 = CEover2 * (1.0 - dfvl * workspace->dDelta_lp[i] * inv_exp_ovun1 );
+    
+    CEover4 = CEover2 * (dfvl * workspace->Delta_lp_temp[i]) *
+      p_ovun4 * exp_ovun1 * SQR(inv_exp_ovun1);
+    
+    
+    /* under-coordination potential */
+    p_ovun2 = sbp_i->p_ovun2;
+    p_ovun5 = sbp_i->p_ovun5;
+    
+    exp_ovun2n = 1.0 / exp_ovun2;
+    exp_ovun6 = exp( p_ovun6 * Delta_lpcorr );
+    exp_ovun8 = p_ovun7 * exp(p_ovun8 * sum_ovun2);
+    inv_exp_ovun2n = 1.0 / (1.0 + exp_ovun2n);
+    inv_exp_ovun8 = 1.0 / (1.0 + exp_ovun8);
+
+    numbonds = 0;
+    e_un = 0.0;
+    for( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj )
+      numbonds ++;
+
+    if(numbonds > 0) total_Eun += e_un =
+		       -p_ovun5 * (1.0 - exp_ovun6) * inv_exp_ovun2n * inv_exp_ovun8;
+    
+    CEunder1 = inv_exp_ovun2n *
+      ( p_ovun5 * p_ovun6 * exp_ovun6 * inv_exp_ovun8 +
+        p_ovun2 * e_un * exp_ovun2n );
+    CEunder2 = -e_un * p_ovun8 * exp_ovun8 * inv_exp_ovun8;
+    CEunder3 = CEunder1 * (1.0 - dfvl*workspace->dDelta_lp[i]*inv_exp_ovun1);
+    CEunder4 = CEunder1 * (dfvl*workspace->Delta_lp_temp[i]) *
+      p_ovun4 * exp_ovun1 * SQR(inv_exp_ovun1) + CEunder2;
+    
+    /* tally into per-atom energy */
+    if (system->pair_ptr->evflag) {
+      eng_tmp = e_ov;
+      if(numbonds > 0) eng_tmp+= e_un;
+      pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, i, system->n, 1, 
+					eng_tmp, 0.0, 0.0, 0.0, 0.0, 0.0, thr);
+    }
+    
+    /* forces */
+    workspace->CdDelta[i] += CEover3;   // OvCoor - 2nd term
+    if(numbonds > 0) workspace->CdDelta[i] += CEunder3;  // UnCoor - 1st term
+
+    for (pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj) {
+      pbond = &(bonds->select.bond_list[pj]);
+      j = pbond->nbr;
+      bo_ij = &(pbond->bo_data);
+      twbp  = &(system->reax_param.tbp[ system->my_atoms[i].type ]
+                [system->my_atoms[pbond->nbr].type]);
+      
+      bo_ij->Cdbo += CEover1 * twbp->p_ovun1 * twbp->De_s; // OvCoor-1st
+      workspace->CdDeltaReduction[reductionOffset+j] += 
+	CEover4 * (1.0 - dfvl*workspace->dDelta_lp[j]) * (bo_ij->BO_pi + bo_ij->BO_pi2); // OvCoor-3a
+      
+      bo_ij->Cdbopi += CEover4 *
+        (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]); // OvCoor-3b
+      bo_ij->Cdbopi2 += CEover4 *
+        (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]);  // OvCoor-3b
+      
+      workspace->CdDeltaReduction[reductionOffset+j] += 
+	CEunder4 * (1.0 - dfvl*workspace->dDelta_lp[j]) * (bo_ij->BO_pi + bo_ij->BO_pi2);   // UnCoor - 2a
+      
+      bo_ij->Cdbopi += CEunder4 *
+        (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]);  // UnCoor-2b
+      bo_ij->Cdbopi2 += CEunder4 *
+        (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]);  // UnCoor-2b
+    }
+  }
+  
+  pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, system->pair_ptr->eflag_either, 
+				  system->pair_ptr->vflag_either, thr);
+  
+ }
+ 
+ data->my_en.e_lp += total_Elp;
+ data->my_en.e_ov += total_Eov;
+ data->my_en.e_un += total_Eun;
+ 
+#ifdef OMP_TIMING
+  endTimeBase = MPI_Wtime();
+  ompTimingData[COMPUTEATOMENERGYINDEX] += (endTimeBase-startTimeBase);
+#endif
+}
diff --git a/src/USER-OMP/reaxc_multi_body_omp.h b/src/USER-OMP/reaxc_multi_body_omp.h
new file mode 100644
index 0000000000..b0746569ca
--- /dev/null
+++ b/src/USER-OMP/reaxc_multi_body_omp.h
@@ -0,0 +1,35 @@
+/*----------------------------------------------------------------------
+  PuReMD - Purdue ReaxFF Molecular Dynamics Program
+
+  Copyright (2010) Purdue University
+  Hasan Metin Aktulga, hmaktulga@lbl.gov
+  Joseph Fogarty, jcfogart@mail.usf.edu
+  Sagar Pandit, pandit@usf.edu
+  Ananth Y Grama, ayg@cs.purdue.edu
+
+  Please cite the related publication:
+  H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
+  "Parallel Reactive Molecular Dynamics: Numerical Methods and
+  Algorithmic Techniques", Parallel Computing, in press.
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of
+  the License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+  See the GNU General Public License for more details:
+  <http://www.gnu.org/licenses/>.
+  ----------------------------------------------------------------------*/
+
+#ifndef __MULTI_BODY_OMP_H_
+#define __MULTI_BODY_OMP_H_
+
+#include "reaxc_types.h"
+
+void Atom_EnergyOMP( reax_system*, control_params*, simulation_data*,
+                  storage*, reax_list**, output_controls* );
+
+#endif
diff --git a/src/USER-OMP/reaxc_nonbonded_omp.cpp b/src/USER-OMP/reaxc_nonbonded_omp.cpp
new file mode 100644
index 0000000000..73dbc1dda4
--- /dev/null
+++ b/src/USER-OMP/reaxc_nonbonded_omp.cpp
@@ -0,0 +1,383 @@
+/*----------------------------------------------------------------------
+  PuReMD - Purdue ReaxFF Molecular Dynamics Program
+
+  Copyright (2010) Purdue University
+  Hasan Metin Aktulga, hmaktulga@lbl.gov
+  Joseph Fogarty, jcfogart@mail.usf.edu
+  Sagar Pandit, pandit@usf.edu
+  Ananth Y Grama, ayg@cs.purdue.edu
+
+  Please cite the related publication:
+  H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
+  "Parallel Reactive Molecular Dynamics: Numerical Methods and
+  Algorithmic Techniques", Parallel Computing, in press.
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of
+  the License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+  See the GNU General Public License for more details:
+  <http://www.gnu.org/licenses/>.
+  ----------------------------------------------------------------------*/
+
+#include "pair_reaxc_omp.h"
+#include  <omp.h>
+#include "thr_data.h"
+
+#include "reaxc_types.h"
+
+#include "reaxc_nonbonded.h"
+#include "reaxc_nonbonded_omp.h"
+#include "reaxc_bond_orders_omp.h"
+#include "reaxc_list.h"
+#include "reaxc_vector.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void vdW_Coulomb_Energy_OMP( reax_system *system, control_params *control,
+			     simulation_data *data, storage *workspace,
+			     reax_list **lists, output_controls *out_control ) {
+
+  int natoms = system->n; 
+  int nthreads = control->nthreads;
+  long totalReductionSize = system->N * nthreads;
+  reax_list *far_nbrs = (*lists) + FAR_NBRS;
+  double p_vdW1 = system->reax_param.gp.l[28];
+  double p_vdW1i = 1.0 / p_vdW1;
+  double total_EvdW = 0.;
+  double total_Eele = 0.;
+  
+#pragma omp parallel default(shared) reduction(+: total_EvdW, total_Eele)  //default(none)
+  {
+  int tid = omp_get_thread_num();
+  int i, j, pj;
+  int start_i, end_i, orig_i, orig_j, flag;
+  double powr_vdW1, powgi_vdW1;
+  double tmp, r_ij, fn13, exp1, exp2;
+  double Tap, dTap, dfn13, CEvd, CEclmb, de_core;
+  double dr3gamij_1, dr3gamij_3;
+  double e_ele, e_ele_thr, e_vdW, e_vdW_thr, e_core, SMALL = 0.0001;
+  double e_lg, de_lg, r_ij5, r_ij6, re6;
+  rvec temp, ext_press;
+  two_body_parameters *twbp;
+  far_neighbor_data *nbr_pj;
+  
+  // Tallying variables:
+  double pe_vdw, f_tmp, delij[3];
+  
+  long reductionOffset = (system->N * tid);
+  
+  class PairReaxCOMP *pair_reax_ptr;
+  pair_reax_ptr = static_cast<class PairReaxCOMP*>(system->pair_ptr);
+  class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid);
+  
+  pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, 
+				    system->pair_ptr->vflag_either, 
+				    natoms, system->pair_ptr->eatom,
+				    system->pair_ptr->vatom, thr);  
+  e_core = 0;
+  e_vdW = 0;
+  e_vdW_thr = 0;
+  e_lg = 0;
+  de_lg = 0.0;
+
+//#pragma omp for schedule(dynamic,50)
+#pragma omp for schedule(guided)
+  for( i = 0; i < natoms; ++i ) {
+    if(system->my_atoms[i].type < 0) continue;
+    start_i = Start_Index(i, far_nbrs);
+    end_i   = End_Index(i, far_nbrs);
+    orig_i  = system->my_atoms[i].orig_id;
+     
+    for( pj = start_i; pj < end_i; ++pj ) {
+      nbr_pj = &(far_nbrs->select.far_nbr_list[pj]);
+      j = nbr_pj->nbr;
+      orig_j  = system->my_atoms[j].orig_id;
+      
+      flag = 0;
+      if(nbr_pj->d <= control->nonb_cut) {
+	if(j < natoms) flag = 1;
+	else if (orig_i < orig_j) flag = 1;
+	else if (orig_i == orig_j) {
+	  if (nbr_pj->dvec[2] > SMALL) flag = 1;
+	  else if (fabs(nbr_pj->dvec[2]) < SMALL) {
+	    if (nbr_pj->dvec[1] > SMALL) flag = 1;
+	    else if (fabs(nbr_pj->dvec[1]) < SMALL && nbr_pj->dvec[0] > SMALL)
+	      flag = 1;
+	  }
+	}
+      }
+      
+      if (flag) {
+	
+	r_ij = nbr_pj->d;
+	twbp = &(system->reax_param.tbp[ system->my_atoms[i].type ]
+		 [ system->my_atoms[j].type ]);
+	
+	/* Calculate Taper and its derivative */
+	// Tap = nbr_pj->Tap;   -- precomputed during compte_H
+	Tap = workspace->Tap[7] * r_ij + workspace->Tap[6];
+	Tap = Tap * r_ij + workspace->Tap[5];
+	Tap = Tap * r_ij + workspace->Tap[4];
+	Tap = Tap * r_ij + workspace->Tap[3];
+	Tap = Tap * r_ij + workspace->Tap[2];
+	Tap = Tap * r_ij + workspace->Tap[1];
+	Tap = Tap * r_ij + workspace->Tap[0];
+	
+	dTap = 7*workspace->Tap[7] * r_ij + 6*workspace->Tap[6];
+	dTap = dTap * r_ij + 5*workspace->Tap[5];
+	dTap = dTap * r_ij + 4*workspace->Tap[4];
+	dTap = dTap * r_ij + 3*workspace->Tap[3];
+	dTap = dTap * r_ij + 2*workspace->Tap[2];
+	dTap += workspace->Tap[1]/r_ij;
+	
+	/*vdWaals Calculations*/
+	if(system->reax_param.gp.vdw_type==1 || system->reax_param.gp.vdw_type==3)
+	  { // shielding
+	    powr_vdW1 = pow(r_ij, p_vdW1);
+	    powgi_vdW1 = pow( 1.0 / twbp->gamma_w, p_vdW1);
+	    
+	    fn13 = pow( powr_vdW1 + powgi_vdW1, p_vdW1i );
+	    exp1 = exp( twbp->alpha * (1.0 - fn13 / twbp->r_vdW) );
+	    exp2 = exp( 0.5 * twbp->alpha * (1.0 - fn13 / twbp->r_vdW) );
+	    
+	    e_vdW = twbp->D * (exp1 - 2.0 * exp2);
+	    total_EvdW += Tap * e_vdW;
+	    
+	    dfn13 = pow( powr_vdW1 + powgi_vdW1, p_vdW1i - 1.0) *
+	      pow(r_ij, p_vdW1 - 2.0);
+
+	    CEvd = dTap * e_vdW -
+	      Tap * twbp->D * (twbp->alpha / twbp->r_vdW) * (exp1 - exp2) * dfn13;
+	  }
+	else{ // no shielding
+	  exp1 = exp( twbp->alpha * (1.0 - r_ij / twbp->r_vdW) );
+	  exp2 = exp( 0.5 * twbp->alpha * (1.0 - r_ij / twbp->r_vdW) );
+	  
+	  e_vdW = twbp->D * (exp1 - 2.0 * exp2);
+	  total_EvdW += Tap * e_vdW;
+	  
+	  CEvd = dTap * e_vdW -
+	      Tap * twbp->D * (twbp->alpha / twbp->r_vdW) * (exp1 - exp2) / r_ij;
+	}
+	
+	if(system->reax_param.gp.vdw_type==2 || system->reax_param.gp.vdw_type==3)
+	  { // innner wall
+	    e_core = twbp->ecore * exp(twbp->acore * (1.0-(r_ij/twbp->rcore)));
+	    total_EvdW += Tap * e_core;
+	    
+	    de_core = -(twbp->acore/twbp->rcore) * e_core;
+	    CEvd += dTap * e_core + Tap * de_core / r_ij;
+	    
+	    //  lg correction, only if lgvdw is yes
+	    if (control->lgflag) {
+	      r_ij5 = pow( r_ij, 5.0 );
+	      r_ij6 = pow( r_ij, 6.0 );
+	      re6 = pow( twbp->lgre, 6.0 );
+	      
+	      e_lg = -(twbp->lgcij/( r_ij6 + re6 ));
+	      total_EvdW += Tap * e_lg;
+	      
+	      de_lg = -6.0 * e_lg *  r_ij5 / ( r_ij6 + re6 ) ;
+	      CEvd += dTap * e_lg + Tap * de_lg / r_ij;
+	    }
+	    
+	  }
+	
+	/*Coulomb Calculations*/
+	dr3gamij_1 = ( r_ij * r_ij * r_ij + twbp->gamma );
+	dr3gamij_3 = pow( dr3gamij_1 , 0.33333333333333 );
+	
+	tmp = Tap / dr3gamij_3;
+	total_Eele += e_ele =
+	  C_ele * system->my_atoms[i].q * system->my_atoms[j].q * tmp;
+	
+	CEclmb = C_ele * system->my_atoms[i].q * system->my_atoms[j].q *
+	  ( dTap -  Tap * r_ij / dr3gamij_1 ) / dr3gamij_3;
+	
+	/* tally into per-atom energy */
+	if( system->pair_ptr->evflag || system->pair_ptr->vflag_atom) {
+	  pe_vdw = Tap * (e_vdW + e_core + e_lg);
+	  rvec_ScaledSum( delij, 1., system->my_atoms[i].x,
+			  -1., system->my_atoms[j].x );
+	  f_tmp = -(CEvd + CEclmb);
+	  pair_reax_ptr->ev_tally_thr_proxy( system->pair_ptr, i, j, natoms, 
+					     1, pe_vdw, e_ele, f_tmp, 
+					     delij[0], delij[1], delij[2], thr);
+	}
+	  
+	if( control->virial == 0 ) {
+	  rvec_ScaledAdd( workspace->f[i], -(CEvd + CEclmb), nbr_pj->dvec );
+	  rvec_ScaledAdd( workspace->forceReduction[reductionOffset+j], 
+			  +(CEvd + CEclmb), nbr_pj->dvec );
+	}
+	else { /* NPT, iNPT or sNPT */
+	  /* for pressure coupling, terms not related to bond order
+	     derivatives are added directly into pressure vector/tensor */
+	  
+	  rvec_Scale( temp, CEvd + CEclmb, nbr_pj->dvec );
+	  rvec_ScaledAdd( workspace->f[reductionOffset+i], -1., temp );
+	  rvec_Add( workspace->forceReduction[reductionOffset+j], temp);
+	  
+	  rvec_iMultiply( ext_press, nbr_pj->rel_box, temp );
+	  
+	  rvec_Add( workspace->my_ext_pressReduction[tid], ext_press );
+	}
+	}
+      }
+    }
+
+  pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, system->pair_ptr->eflag_either, 
+				  system->pair_ptr->vflag_either, thr);    
+  } // parallel region
+  
+  data->my_en.e_vdW = total_EvdW;
+  data->my_en.e_ele = total_Eele;
+  
+  Compute_Polarization_Energy( system, data );
+} 
+
+/* ---------------------------------------------------------------------- */
+  
+void Tabulated_vdW_Coulomb_Energy_OMP(reax_system *system,control_params *control,
+				      simulation_data *data, storage *workspace,
+				      reax_list **lists,
+				      output_controls *out_control ) {
+
+  double SMALL = 0.0001;
+  int  natoms = system->n;
+  reax_list *far_nbrs = (*lists) + FAR_NBRS;
+  int  nthreads = control->nthreads;
+  long totalReductionSize = system->N * nthreads;
+  double total_EvdW = 0.;
+  double total_Eele = 0.;
+  
+#pragma omp parallel default(shared) reduction(+:total_EvdW, total_Eele)
+  {
+  int i, j, pj, r;
+  int type_i, type_j, tmin, tmax;
+  int start_i, end_i, orig_i, orig_j, flag;
+  double r_ij, base, dif;
+  double e_vdW, e_ele;
+  double CEvd, CEclmb;
+  double f_tmp, delij[3];
+  rvec temp, ext_press;
+  far_neighbor_data *nbr_pj;
+  LR_lookup_table *t;
+  int  tid = omp_get_thread_num();
+  long froffset = (system->N * tid);
+
+  class PairReaxCOMP *pair_reax_ptr;
+  pair_reax_ptr = static_cast<class PairReaxCOMP*>(system->pair_ptr);
+  class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid);
+   
+  pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, 
+				    system->pair_ptr->vflag_either, 
+				    natoms, system->pair_ptr->eatom,
+				    system->pair_ptr->vatom, thr);
+  
+//#pragma omp for schedule(dynamic,50)
+#pragma omp for schedule(guided)
+  for (i = 0; i < natoms; ++i) {
+    type_i  = system->my_atoms[i].type;
+    if(type_i < 0) continue;
+    start_i = Start_Index(i,far_nbrs);
+    end_i   = End_Index(i,far_nbrs);
+    orig_i  = system->my_atoms[i].orig_id;
+    
+    for (pj = start_i; pj < end_i; ++pj) {
+      nbr_pj = &(far_nbrs->select.far_nbr_list[pj]);
+      j = nbr_pj->nbr;
+      type_j = system->my_atoms[j].type;
+      if(type_j < 0) continue;
+      orig_j  = system->my_atoms[j].orig_id;
+
+      flag = 0;
+      if(nbr_pj->d <= control->nonb_cut) {
+	if(j < natoms) flag = 1;
+	else if (orig_i < orig_j) flag = 1;
+	else if (orig_i == orig_j) {
+	  if (nbr_pj->dvec[2] > SMALL) flag = 1;
+	  else if (fabs(nbr_pj->dvec[2]) < SMALL) {
+	    if (nbr_pj->dvec[1] > SMALL) flag = 1;
+	    else if (fabs(nbr_pj->dvec[1]) < SMALL && nbr_pj->dvec[0] > SMALL)
+	      flag = 1;
+	  }
+	}
+      
+      }
+
+      if (flag) {
+	
+	r_ij   = nbr_pj->d;
+	tmin  = MIN( type_i, type_j );
+	tmax  = MAX( type_i, type_j );
+	t = &( LR[tmin][tmax] );
+	
+	/* Cubic Spline Interpolation */
+	r = (int)(r_ij * t->inv_dx);
+	if( r == 0 )  ++r;
+	base = (double)(r+1) * t->dx;
+	dif = r_ij - base;
+	
+	e_vdW = ((t->vdW[r].d*dif + t->vdW[r].c)*dif + t->vdW[r].b)*dif +
+	  t->vdW[r].a;
+	
+	e_ele = ((t->ele[r].d*dif + t->ele[r].c)*dif + t->ele[r].b)*dif +
+	  t->ele[r].a;
+	e_ele *= system->my_atoms[i].q * system->my_atoms[j].q;
+	
+	total_EvdW += e_vdW;
+	total_Eele += e_ele;
+	
+	CEvd = ((t->CEvd[r].d*dif + t->CEvd[r].c)*dif + t->CEvd[r].b)*dif +
+	  t->CEvd[r].a;
+	
+	CEclmb = ((t->CEclmb[r].d*dif+t->CEclmb[r].c)*dif+t->CEclmb[r].b)*dif +
+	  t->CEclmb[r].a;
+	CEclmb *= system->my_atoms[i].q * system->my_atoms[j].q;
+	
+	/* tally into per-atom energy */
+	if( system->pair_ptr->evflag || system->pair_ptr->vflag_atom) {
+	  rvec_ScaledSum( delij, 1., system->my_atoms[i].x,
+			  -1., system->my_atoms[j].x );
+	  f_tmp = -(CEvd + CEclmb);
+	  pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, j, natoms, 1, e_vdW, e_ele,
+					    f_tmp, delij[0], delij[1], delij[2], thr);
+	}
+	
+	if( control->virial == 0 ) {
+	  rvec_ScaledAdd( workspace->f[i], -(CEvd + CEclmb), nbr_pj->dvec );
+	  rvec_ScaledAdd( workspace->forceReduction[froffset+j], 
+			  +(CEvd + CEclmb), nbr_pj->dvec );
+	}
+	else { // NPT, iNPT or sNPT
+	  /* for pressure coupling, terms not related to bond order derivatives
+	     are added directly into pressure vector/tensor */
+	  rvec_Scale( temp, CEvd + CEclmb, nbr_pj->dvec );
+	  
+	  rvec_ScaledAdd( workspace->f[i], -1., temp );
+	  rvec_Add( workspace->forceReduction[froffset+j], temp );
+	  
+	  rvec_iMultiply( ext_press, nbr_pj->rel_box, temp );
+	  rvec_Add( workspace->my_ext_pressReduction[tid], ext_press );
+	}
+      }
+    }
+  }
+  
+  pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, system->pair_ptr->eflag_either,
+				  system->pair_ptr->vflag_either, thr);  
+  } // end omp parallel
+  
+  data->my_en.e_vdW = total_EvdW;
+  data->my_en.e_ele = total_Eele;
+  
+  Compute_Polarization_Energy( system, data );
+}
diff --git a/src/USER-OMP/reaxc_nonbonded_omp.h b/src/USER-OMP/reaxc_nonbonded_omp.h
new file mode 100644
index 0000000000..1ea51257cf
--- /dev/null
+++ b/src/USER-OMP/reaxc_nonbonded_omp.h
@@ -0,0 +1,38 @@
+/*----------------------------------------------------------------------
+  PuReMD - Purdue ReaxFF Molecular Dynamics Program
+
+  Copyright (2010) Purdue University
+  Hasan Metin Aktulga, hmaktulga@lbl.gov
+  Joseph Fogarty, jcfogart@mail.usf.edu
+  Sagar Pandit, pandit@usf.edu
+  Ananth Y Grama, ayg@cs.purdue.edu
+
+  Please cite the related publication:
+  H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
+  "Parallel Reactive Molecular Dynamics: Numerical Methods and
+  Algorithmic Techniques", Parallel Computing, in press.
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of
+  the License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+  See the GNU General Public License for more details:
+  <http://www.gnu.org/licenses/>.
+  ----------------------------------------------------------------------*/
+
+#ifndef __NONBONDED_OMP_H_
+#define __NONBONDED_OMP_H_
+
+#include "reaxc_types.h"
+
+void vdW_Coulomb_Energy_OMP( reax_system*, control_params*, simulation_data*,
+			    storage*, reax_list**, output_controls* );
+
+void Tabulated_vdW_Coulomb_Energy_OMP( reax_system*, control_params*,
+				       simulation_data*, storage*,
+				       reax_list**, output_controls* );
+#endif
diff --git a/src/USER-OMP/reaxc_torsion_angles_omp.cpp b/src/USER-OMP/reaxc_torsion_angles_omp.cpp
new file mode 100644
index 0000000000..294eeb9544
--- /dev/null
+++ b/src/USER-OMP/reaxc_torsion_angles_omp.cpp
@@ -0,0 +1,467 @@
+/*----------------------------------------------------------------------
+  PuReMD - Purdue ReaxFF Molecular Dynamics Program
+
+  Copyright (2010) Purdue University
+  Hasan Metin Aktulga, hmaktulga@lbl.gov
+  Joseph Fogarty, jcfogart@mail.usf.edu
+  Sagar Pandit, pandit@usf.edu
+  Ananth Y Grama, ayg@cs.purdue.edu
+
+  Please cite the related publication:
+  H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
+  "Parallel Reactive Molecular Dynamics: Numerical Methods and
+  Algorithmic Techniques", Parallel Computing, in press.
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of
+  the License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+  See the GNU General Public License for more details:
+  <http://www.gnu.org/licenses/>.
+  ----------------------------------------------------------------------*/
+
+#include "pair_reaxc_omp.h"
+#include <omp.h>
+#include "thr_data.h"
+
+#include "reaxc_types.h"
+#include "reaxc_torsion_angles_omp.h"
+#include "reaxc_bond_orders_omp.h"
+#include "reaxc_list.h"
+#include "reaxc_tool_box.h"
+#include "reaxc_vector.h"
+
+#define MIN_SINE 1e-10
+
+using namespace LAMMPS_NS;
+
+// Functions defined in reaxc_torsion_angles.cpp
+extern double Calculate_Omega(rvec, double, rvec, double, rvec, double, rvec, double,
+			    three_body_interaction_data*, three_body_interaction_data*,
+			    rvec, rvec, rvec, rvec, output_controls*);
+
+/* ---------------------------------------------------------------------- */
+
+void Torsion_AnglesOMP( reax_system *system, control_params *control,
+                     simulation_data *data, storage *workspace,
+                     reax_list **lists, output_controls *out_control )
+{
+#ifdef OMP_TIMING
+  double endTimeBase, startTimeBase;
+  startTimeBase = MPI_Wtime();
+#endif
+
+  int natoms = system->n;
+  reax_list *bonds = (*lists) + BONDS;
+  reax_list *thb_intrs = (*lists) + THREE_BODIES;
+  double p_tor2 = system->reax_param.gp.l[23];
+  double p_tor3 = system->reax_param.gp.l[24];
+  double p_tor4 = system->reax_param.gp.l[25];
+  double p_cot2 = system->reax_param.gp.l[27];
+  double total_Etor = 0;
+  double total_Econ = 0;
+  int  nthreads = control->nthreads;
+  
+#pragma omp parallel default(shared) reduction(+: total_Etor, total_Econ)
+ {
+  int i, j, k, l, pi, pj, pk, pl, pij, plk;
+  int type_i, type_j, type_k, type_l;
+  int start_j, end_j, start_k, end_k;
+  int start_pj, end_pj, start_pk, end_pk;
+  int num_frb_intrs = 0;
+  
+  double Delta_j, Delta_k;
+  double r_ij, r_jk, r_kl, r_li;
+  double BOA_ij, BOA_jk, BOA_kl;
+  
+  double exp_tor2_ij, exp_tor2_jk, exp_tor2_kl;
+  double exp_tor1, exp_tor3_DjDk, exp_tor4_DjDk, exp_tor34_inv;
+  double exp_cot2_jk, exp_cot2_ij, exp_cot2_kl;
+  double fn10, f11_DjDk, dfn11, fn12;
+  double theta_ijk, theta_jkl;
+  double sin_ijk, sin_jkl;
+  double cos_ijk, cos_jkl;
+  double tan_ijk_i, tan_jkl_i;
+  double omega, cos_omega, cos2omega, cos3omega;
+  rvec dcos_omega_di, dcos_omega_dj, dcos_omega_dk, dcos_omega_dl;
+  double CV, cmn, CEtors1, CEtors2, CEtors3, CEtors4;
+  double CEtors5, CEtors6, CEtors7, CEtors8, CEtors9;
+  double Cconj, CEconj1, CEconj2, CEconj3;
+  double CEconj4, CEconj5, CEconj6;
+  double e_tor, e_con;
+  rvec dvec_li;
+  rvec force, ext_press;
+  ivec rel_box_jl;
+  // rtensor total_rtensor, temp_rtensor;
+  four_body_header *fbh;
+  four_body_parameters *fbp;
+  bond_data *pbond_ij, *pbond_jk, *pbond_kl;
+  bond_order_data *bo_ij, *bo_jk, *bo_kl;
+  three_body_interaction_data *p_ijk, *p_jkl;
+  
+  // Virial tallying variables
+  double delil[3], deljl[3], delkl[3];
+  double eng_tmp, f_scaler, fi_tmp[3], fj_tmp[3], fk_tmp[3];
+
+  int  tid = omp_get_thread_num();
+  long reductionOffset = (system->N * tid);
+  int num_thb_intrs = 0;
+  class PairReaxCOMP *pair_reax_ptr;
+  pair_reax_ptr = static_cast<class PairReaxCOMP*>(system->pair_ptr);
+  class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid);
+
+  pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either,
+                                    system->pair_ptr->vflag_either,
+                                    system->N, system->pair_ptr->eatom,
+                                    system->pair_ptr->vatom, thr);
+
+#pragma omp for schedule(static)
+  for (j = 0; j < system->N; ++j) {
+    start_j = Start_Index(j, bonds);
+    end_j = End_Index(j, bonds);
+    
+    for (pk = start_j; pk < end_j; ++pk) {
+      bo_jk = &( bonds->select.bond_list[pk].bo_data );
+      for (k = 0; k < nthreads; ++k)
+	bo_jk->CdboReduction[k] = 0.;
+    }
+  }
+  
+#pragma omp for schedule(dynamic,50)
+  for (j = 0; j < natoms; ++j) {
+    type_j = system->my_atoms[j].type;
+    Delta_j = workspace->Delta_boc[j];
+    start_j = Start_Index(j, bonds);
+    end_j = End_Index(j, bonds);
+    
+    for (pk = start_j; pk < end_j; ++pk) {
+      pbond_jk = &( bonds->select.bond_list[pk] );
+      k = pbond_jk->nbr;
+      bo_jk = &( pbond_jk->bo_data );
+      BOA_jk = bo_jk->BO - control->thb_cut;
+      
+      /* see if there are any 3-body interactions involving j&k
+         where j is the central atom. Otherwise there is no point in
+         trying to form a 4-body interaction out of this neighborhood */
+      if (system->my_atoms[j].orig_id < system->my_atoms[k].orig_id &&
+          bo_jk->BO > control->thb_cut/*0*/ && Num_Entries(pk, thb_intrs)) {
+        start_k = Start_Index(k, bonds);
+        end_k = End_Index(k, bonds);
+        pj = pbond_jk->sym_index; // pj points to j on k's list
+	
+        /* do the same check as above:
+           are there any 3-body interactions involving k&j
+           where k is the central atom */
+        if (Num_Entries(pj, thb_intrs)) {
+          type_k = system->my_atoms[k].type;
+          Delta_k = workspace->Delta_boc[k];
+          r_jk = pbond_jk->d;
+	  
+          start_pk = Start_Index(pk, thb_intrs );
+          end_pk = End_Index(pk, thb_intrs );
+          start_pj = Start_Index(pj, thb_intrs );
+          end_pj = End_Index(pj, thb_intrs );
+	  
+          exp_tor2_jk = exp( -p_tor2 * BOA_jk );
+          exp_cot2_jk = exp( -p_cot2 * SQR(BOA_jk - 1.5) );
+          exp_tor3_DjDk = exp( -p_tor3 * (Delta_j + Delta_k) );
+          exp_tor4_DjDk = exp( p_tor4  * (Delta_j + Delta_k) );
+          exp_tor34_inv = 1.0 / (1.0 + exp_tor3_DjDk + exp_tor4_DjDk);
+          f11_DjDk = (2.0 + exp_tor3_DjDk) * exp_tor34_inv;
+	  
+	  
+          /* pick i up from j-k interaction where j is the central atom */
+          for (pi = start_pk; pi < end_pk; ++pi) {
+            p_ijk = &( thb_intrs->select.three_body_list[pi] );
+	    pij = p_ijk->pthb; // pij is pointer to i on j's bond_list
+            pbond_ij = &( bonds->select.bond_list[pij] );
+            bo_ij = &( pbond_ij->bo_data );
+	    
+	    if (bo_ij->BO > control->thb_cut/*0*/) {
+              i = p_ijk->thb;
+              type_i = system->my_atoms[i].type;
+              r_ij = pbond_ij->d;
+              BOA_ij = bo_ij->BO - control->thb_cut;
+
+              theta_ijk = p_ijk->theta;
+              sin_ijk = sin( theta_ijk );
+              cos_ijk = cos( theta_ijk );
+              //tan_ijk_i = 1. / tan( theta_ijk );
+              if( sin_ijk >= 0 && sin_ijk <= MIN_SINE )
+                tan_ijk_i = cos_ijk / MIN_SINE;
+              else if( sin_ijk <= 0 && sin_ijk >= -MIN_SINE )
+                tan_ijk_i = cos_ijk / -MIN_SINE;
+              else tan_ijk_i = cos_ijk / sin_ijk;
+
+              exp_tor2_ij = exp( -p_tor2 * BOA_ij );
+              exp_cot2_ij = exp( -p_cot2 * SQR(BOA_ij -1.5) );
+
+
+              /* pick l up from j-k interaction where k is the central atom */
+              for (pl = start_pj; pl < end_pj; ++pl) {
+                p_jkl = &( thb_intrs->select.three_body_list[pl] );
+                l = p_jkl->thb;
+                plk = p_jkl->pthb; //pointer to l on k's bond_list!
+                pbond_kl = &( bonds->select.bond_list[plk] );
+                bo_kl = &( pbond_kl->bo_data );
+                type_l = system->my_atoms[l].type;
+                fbh = &(system->reax_param.fbp[type_i][type_j]
+                        [type_k][type_l]);
+                fbp = &(system->reax_param.fbp[type_i][type_j]
+                        [type_k][type_l].prm[0]);
+
+		if (i != l && fbh->cnt &&
+                    bo_kl->BO > control->thb_cut/*0*/ &&
+                    bo_ij->BO * bo_jk->BO * bo_kl->BO > control->thb_cut/*0*/) {
+                  ++num_frb_intrs;
+                  //fprintf(stderr,
+		  //      "%5d: %6d %6d %6d %6d\n", num_frb_intrs, 
+		  //      system->my_atoms[i].orig_id,system->my_atoms[j].orig_id,
+		  //      system->my_atoms[k].orig_id,system->my_atoms[l].orig_id);
+
+                  r_kl = pbond_kl->d;
+                  BOA_kl = bo_kl->BO - control->thb_cut;
+		  
+                  theta_jkl = p_jkl->theta;
+                  sin_jkl = sin( theta_jkl );
+                  cos_jkl = cos( theta_jkl );
+                  //tan_jkl_i = 1. / tan( theta_jkl );
+                  if( sin_jkl >= 0 && sin_jkl <= MIN_SINE )
+                    tan_jkl_i = cos_jkl / MIN_SINE;
+                  else if( sin_jkl <= 0 && sin_jkl >= -MIN_SINE )
+                    tan_jkl_i = cos_jkl / -MIN_SINE;
+                  else tan_jkl_i = cos_jkl /sin_jkl;
+		  
+                  rvec_ScaledSum( dvec_li, 1., system->my_atoms[i].x,
+                                  -1., system->my_atoms[l].x );
+                  r_li = rvec_Norm( dvec_li );
+		  
+		  
+                  /* omega and its derivative */
+                  omega = Calculate_Omega( pbond_ij->dvec, r_ij,
+                                           pbond_jk->dvec, r_jk,
+                                           pbond_kl->dvec, r_kl,
+                                           dvec_li, r_li,
+                                           p_ijk, p_jkl,
+                                           dcos_omega_di, dcos_omega_dj,
+                                           dcos_omega_dk, dcos_omega_dl,
+                                           out_control );
+
+                  cos_omega = cos( omega );
+                  cos2omega = cos( 2. * omega );
+                  cos3omega = cos( 3. * omega );
+                  /* end omega calculations */
+		  
+                  /* torsion energy */
+                  exp_tor1 = exp( fbp->p_tor1 *
+                                  SQR(2.0 - bo_jk->BO_pi - f11_DjDk) );
+                  exp_tor2_kl = exp( -p_tor2 * BOA_kl );
+                  exp_cot2_kl = exp( -p_cot2 * SQR(BOA_kl - 1.5) );
+                  fn10 = (1.0 - exp_tor2_ij) * (1.0 - exp_tor2_jk) *
+                    (1.0 - exp_tor2_kl);
+
+                  CV = 0.5 * ( fbp->V1 * (1.0 + cos_omega) +
+                               fbp->V2 * exp_tor1 * (1.0 - cos2omega) +
+                               fbp->V3 * (1.0 + cos3omega) );
+
+		  total_Etor += e_tor = fn10 * sin_ijk * sin_jkl * CV;
+
+                  dfn11 = (-p_tor3 * exp_tor3_DjDk +
+                           (p_tor3 * exp_tor3_DjDk - p_tor4 * exp_tor4_DjDk) *
+                           (2.0 + exp_tor3_DjDk) * exp_tor34_inv) *
+                    exp_tor34_inv;
+
+                  CEtors1 = sin_ijk * sin_jkl * CV;
+
+                  CEtors2 = -fn10 * 2.0 * fbp->p_tor1 * fbp->V2 * exp_tor1 *
+                    (2.0 - bo_jk->BO_pi - f11_DjDk) * (1.0 - SQR(cos_omega)) *
+                    sin_ijk * sin_jkl;
+                  CEtors3 = CEtors2 * dfn11;
+
+                  CEtors4 = CEtors1 * p_tor2 * exp_tor2_ij *
+                    (1.0 - exp_tor2_jk) * (1.0 - exp_tor2_kl);
+                  CEtors5 = CEtors1 * p_tor2 *
+                    (1.0 - exp_tor2_ij) * exp_tor2_jk * (1.0 - exp_tor2_kl);
+                  CEtors6 = CEtors1 * p_tor2 *
+                    (1.0 - exp_tor2_ij) * (1.0 - exp_tor2_jk) * exp_tor2_kl;
+
+                  cmn = -fn10 * CV;
+                  CEtors7 = cmn * sin_jkl * tan_ijk_i;
+                  CEtors8 = cmn * sin_ijk * tan_jkl_i;
+
+                  CEtors9 = fn10 * sin_ijk * sin_jkl *
+                    (0.5 * fbp->V1 - 2.0 * fbp->V2 * exp_tor1 * cos_omega +
+                     1.5 * fbp->V3 * (cos2omega + 2.0 * SQR(cos_omega)));
+                  /* end  of torsion energy */
+
+
+                  /* 4-body conjugation energy */
+                  fn12 = exp_cot2_ij * exp_cot2_jk * exp_cot2_kl;
+                  //data->my_en.e_con += e_con =
+		  total_Econ += e_con =
+                    fbp->p_cot1 * fn12 *
+                    (1.0 + (SQR(cos_omega) - 1.0) * sin_ijk * sin_jkl);
+
+                  Cconj = -2.0 * fn12 * fbp->p_cot1 * p_cot2 *
+                    (1.0 + (SQR(cos_omega) - 1.0) * sin_ijk * sin_jkl);
+
+                  CEconj1 = Cconj * (BOA_ij - 1.5e0);
+                  CEconj2 = Cconj * (BOA_jk - 1.5e0);
+                  CEconj3 = Cconj * (BOA_kl - 1.5e0);
+
+                  CEconj4 = -fbp->p_cot1 * fn12 *
+                    (SQR(cos_omega) - 1.0) * sin_jkl * tan_ijk_i;
+                  CEconj5 = -fbp->p_cot1 * fn12 *
+                    (SQR(cos_omega) - 1.0) * sin_ijk * tan_jkl_i;
+                  CEconj6 = 2.0 * fbp->p_cot1 * fn12 *
+                    cos_omega * sin_ijk * sin_jkl;
+                  /* end 4-body conjugation energy */
+
+                  /* FORCES */
+                  bo_jk->Cdbopi += CEtors2;
+                  workspace->CdDelta[j] += CEtors3;
+		  //workspace->CdDelta[k] += CEtors3;
+                  workspace->CdDeltaReduction[reductionOffset+k] += CEtors3;
+                  bo_ij->Cdbo += (CEtors4 + CEconj1);
+                  bo_jk->Cdbo += (CEtors5 + CEconj2);
+                  //bo_kl->Cdbo += (CEtors6 + CEconj3);
+		  bo_kl->CdboReduction[tid] += (CEtors6 + CEconj3);
+
+                  if( control->virial == 0 ) {
+                    /* dcos_theta_ijk */
+                    rvec_ScaledAdd( workspace->f[j],
+                                    CEtors7 + CEconj4, p_ijk->dcos_dj );
+		    rvec_ScaledAdd( workspace->forceReduction[reductionOffset+i],
+                                    CEtors7 + CEconj4, p_ijk->dcos_dk );
+                    rvec_ScaledAdd( workspace->forceReduction[reductionOffset+k],
+                                    CEtors7 + CEconj4, p_ijk->dcos_di );
+
+                    /* dcos_theta_jkl */
+                    rvec_ScaledAdd( workspace->f[j],
+                                    CEtors8 + CEconj5, p_jkl->dcos_di );
+		    rvec_ScaledAdd( workspace->forceReduction[reductionOffset+k],
+                                    CEtors8 + CEconj5, p_jkl->dcos_dj );
+                    rvec_ScaledAdd( workspace->forceReduction[reductionOffset+l],
+                                    CEtors8 + CEconj5, p_jkl->dcos_dk );
+
+                    /* dcos_omega */
+                    rvec_ScaledAdd( workspace->f[j],
+                                    CEtors9 + CEconj6, dcos_omega_dj );
+		    rvec_ScaledAdd( workspace->forceReduction[reductionOffset+i],
+                                    CEtors9 + CEconj6, dcos_omega_di );
+                    rvec_ScaledAdd( workspace->forceReduction[reductionOffset+k],
+                                    CEtors9 + CEconj6, dcos_omega_dk );
+                    rvec_ScaledAdd( workspace->forceReduction[reductionOffset+l],
+                                    CEtors9 + CEconj6, dcos_omega_dl );
+                  }
+                  else {
+                    ivec_Sum(rel_box_jl, pbond_jk->rel_box, pbond_kl->rel_box);
+		    
+                    /* dcos_theta_ijk */
+                    rvec_Scale( force, CEtors7 + CEconj4, p_ijk->dcos_dk );
+		    rvec_Add( workspace->forceReduction[reductionOffset+i], force );
+                    rvec_iMultiply( ext_press, pbond_ij->rel_box, force );
+		    rvec_Add( workspace->my_ext_pressReduction[tid], ext_press );
+		    
+                    rvec_ScaledAdd( workspace->f[j],
+                                    CEtors7 + CEconj4, p_ijk->dcos_dj );
+		    
+                    rvec_Scale( force, CEtors7 + CEconj4, p_ijk->dcos_di );
+		    rvec_Add( workspace->forceReduction[reductionOffset+k], force );
+                    rvec_iMultiply( ext_press, pbond_jk->rel_box, force );
+		    rvec_Add( workspace->my_ext_pressReduction[tid], ext_press );
+		    
+                    /* dcos_theta_jkl */
+                    rvec_ScaledAdd( workspace->f[j],
+                                    CEtors8 + CEconj5, p_jkl->dcos_di );
+		    
+                    rvec_Scale( force, CEtors8 + CEconj5, p_jkl->dcos_dj );
+		    rvec_Add( workspace->forceReduction[reductionOffset+k], force );
+                    rvec_iMultiply( ext_press, pbond_jk->rel_box, force );
+		    rvec_Add( workspace->my_ext_pressReduction[tid], ext_press );
+		    
+                    rvec_Scale( force, CEtors8 + CEconj5, p_jkl->dcos_dk );
+		    rvec_Add( workspace->forceReduction[reductionOffset+l], force );
+                    rvec_iMultiply( ext_press, rel_box_jl, force );
+		    rvec_Add( workspace->my_ext_pressReduction[tid], ext_press );    
+		    
+                    /* dcos_omega */
+                    rvec_Scale( force, CEtors9 + CEconj6, dcos_omega_di );
+		    rvec_Add( workspace->forceReduction[reductionOffset+i], force );
+                    rvec_iMultiply( ext_press, pbond_ij->rel_box, force );
+		    rvec_Add( workspace->my_ext_pressReduction[tid], ext_press );
+
+                    rvec_ScaledAdd( workspace->f[j],
+                                    CEtors9 + CEconj6, dcos_omega_dj );
+
+                    rvec_Scale( force, CEtors9 + CEconj6, dcos_omega_dk );
+		    rvec_Add( workspace->forceReduction[reductionOffset+k], force );
+                    rvec_iMultiply( ext_press, pbond_jk->rel_box, force );
+		    rvec_Add( workspace->my_ext_pressReduction[tid], ext_press );
+
+                    rvec_Scale( force, CEtors9 + CEconj6, dcos_omega_dl );
+		    rvec_Add( workspace->forceReduction[reductionOffset+i], force );
+                    rvec_iMultiply( ext_press, rel_box_jl, force );
+		    rvec_Add( workspace->my_ext_pressReduction[tid], ext_press );
+                  }
+
+                  /* tally into per-atom virials */
+                  if( system->pair_ptr->vflag_atom || system->pair_ptr->evflag) {
+		    
+                    // acquire vectors
+                    rvec_ScaledSum( delil, 1., system->my_atoms[l].x,
+                                          -1., system->my_atoms[i].x );
+                    rvec_ScaledSum( deljl, 1., system->my_atoms[l].x,
+                                          -1., system->my_atoms[j].x );
+                    rvec_ScaledSum( delkl, 1., system->my_atoms[l].x,
+                                          -1., system->my_atoms[k].x );
+                    // dcos_theta_ijk
+                    rvec_Scale( fi_tmp, CEtors7 + CEconj4, p_ijk->dcos_dk );
+                    rvec_Scale( fj_tmp, CEtors7 + CEconj4, p_ijk->dcos_dj );
+                    rvec_Scale( fk_tmp, CEtors7 + CEconj4, p_ijk->dcos_di );
+		    
+                    // dcos_theta_jkl
+                    rvec_ScaledAdd( fj_tmp, CEtors8 + CEconj5, p_jkl->dcos_di );
+                    rvec_ScaledAdd( fk_tmp, CEtors8 + CEconj5, p_jkl->dcos_dj );
+		    
+                    // dcos_omega
+                    rvec_ScaledAdd( fi_tmp, CEtors9 + CEconj6, dcos_omega_di );
+                    rvec_ScaledAdd( fj_tmp, CEtors9 + CEconj6, dcos_omega_dj );
+                    rvec_ScaledAdd( fk_tmp, CEtors9 + CEconj6, dcos_omega_dk );
+		    
+                    // tally
+                    eng_tmp = e_tor + e_con;
+		    
+                    if (system->pair_ptr->evflag)
+		      pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, j, k, system->n, 1, 
+						        eng_tmp, 0.0, 0.0, 0.0, 0.0, 0.0, thr);
+
+		    // NEED TO MAKE AN OMP VERSION OF THIS CALL!
+                    if (system->pair_ptr->vflag_atom)
+		      system->pair_ptr->v_tally4(i, j, k, l, fi_tmp, fj_tmp, fk_tmp,
+						 delil, deljl, delkl );
+                  }
+		  
+                } // pl check ends
+              } // pl loop ends
+            } // pi check ends
+          } // pi loop ends
+        } // k-j neighbor check ends
+      } // j<k && j-k neighbor check ends
+    } // pk loop ends
+  } // j loop
+  
+ } // end omp parallel
+ 
+  data->my_en.e_tor = total_Etor;
+  data->my_en.e_con = total_Econ;
+  
+#ifdef OMP_TIMING
+  endTimeBase = MPI_Wtime();
+  ompTimingData[COMPUTETORSIONANGLESBOINDEX] += (endTimeBase-startTimeBase);
+#endif
+}
diff --git a/src/USER-OMP/reaxc_torsion_angles_omp.h b/src/USER-OMP/reaxc_torsion_angles_omp.h
new file mode 100644
index 0000000000..51b05f7ae1
--- /dev/null
+++ b/src/USER-OMP/reaxc_torsion_angles_omp.h
@@ -0,0 +1,36 @@
+/*----------------------------------------------------------------------
+  PuReMD - Purdue ReaxFF Molecular Dynamics Program
+
+  Copyright (2010) Purdue University
+  Hasan Metin Aktulga, hmaktulga@lbl.gov
+  Joseph Fogarty, jcfogart@mail.usf.edu
+  Sagar Pandit, pandit@usf.edu
+  Ananth Y Grama, ayg@cs.purdue.edu
+
+  Please cite the related publication:
+  H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
+  "Parallel Reactive Molecular Dynamics: Numerical Methods and
+  Algorithmic Techniques", Parallel Computing, in press.
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of
+  the License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+  See the GNU General Public License for more details:
+  <http://www.gnu.org/licenses/>.
+  ----------------------------------------------------------------------*/
+
+#ifndef __TORSION_ANGLES_OMP_H_
+#define __TORSION_ANGLES_OMP_H_
+
+#include "reaxc_types.h"
+#include "reaxc_torsion_angles.h"
+
+void Torsion_AnglesOMP( reax_system*, control_params*, simulation_data*,
+			storage*, reax_list**, output_controls* );
+
+#endif
diff --git a/src/USER-OMP/reaxc_valence_angles_omp.cpp b/src/USER-OMP/reaxc_valence_angles_omp.cpp
new file mode 100644
index 0000000000..7c45db1493
--- /dev/null
+++ b/src/USER-OMP/reaxc_valence_angles_omp.cpp
@@ -0,0 +1,613 @@
+/*----------------------------------------------------------------------
+  PuReMD - Purdue ReaxFF Molecular Dynamics Program
+
+  Copyright (2010) Purdue University
+  Hasan Metin Aktulga, hmaktulga@lbl.gov
+  Joseph Fogarty, jcfogart@mail.usf.edu
+  Sagar Pandit, pandit@usf.edu
+  Ananth Y Grama, ayg@cs.purdue.edu
+
+  Please cite the related publication:
+  H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
+  "Parallel Reactive Molecular Dynamics: Numerical Methods and
+  Algorithmic Techniques", Parallel Computing, in press.
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of
+  the License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+  See the GNU General Public License for more details:
+  <http://www.gnu.org/licenses/>.
+  ----------------------------------------------------------------------*/
+
+#include "pair_reaxc_omp.h"
+#include <omp.h>
+#include "thr_data.h"
+
+#include "reaxc_types.h"
+#include "reaxc_valence_angles.h"
+#include "reaxc_valence_angles_omp.h"
+#include "reaxc_bond_orders_omp.h"
+#include "reaxc_list.h"
+#include "reaxc_vector.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void Calculate_dCos_ThetaOMP( rvec dvec_ji, double d_ji, rvec dvec_jk, double d_jk,
+			      rvec* dcos_theta_di,
+			      rvec* dcos_theta_dj,
+			      rvec* dcos_theta_dk )
+{
+  int t;
+  double sqr_d_ji = SQR(d_ji);
+  double sqr_d_jk = SQR(d_jk);
+  double inv_dists = 1.0 / (d_ji * d_jk);
+  double inv_dists3 = inv_dists * inv_dists * inv_dists;
+  double dot_dvecs = dvec_ji[0]*dvec_jk[0] + dvec_ji[1]*dvec_jk[1] + dvec_ji[2]*dvec_jk[2];
+  double Cdot_inv3 = dot_dvecs * inv_dists3;
+
+  double csqr_jk = Cdot_inv3 * sqr_d_jk;
+  double csqr_ji = Cdot_inv3 * sqr_d_ji;
+  
+  // Try to help compiler out by unrolling
+  // x-component
+  double dinv_jk = dvec_jk[0] * inv_dists;
+  double dinv_ji = dvec_ji[0] * inv_dists;
+
+  double cdev_ji = csqr_jk * dvec_ji[0];
+  double cdev_jk = csqr_ji * dvec_jk[0];
+
+  (*dcos_theta_di)[0] =   dinv_jk            - cdev_ji;
+  (*dcos_theta_dj)[0] = -(dinv_jk + dinv_ji) + cdev_ji + cdev_jk;
+  (*dcos_theta_dk)[0] =             dinv_ji            - cdev_jk;
+
+  // y-component
+  dinv_jk = dvec_jk[1] * inv_dists;
+  dinv_ji = dvec_ji[1] * inv_dists;
+
+  cdev_ji = csqr_jk * dvec_ji[1];
+  cdev_jk = csqr_ji * dvec_jk[1];
+
+  (*dcos_theta_di)[1] =   dinv_jk            - cdev_ji;
+  (*dcos_theta_dj)[1] = -(dinv_jk + dinv_ji) + cdev_ji + cdev_jk;
+  (*dcos_theta_dk)[1] =             dinv_ji            - cdev_jk;
+
+  // z-component
+  dinv_jk = dvec_jk[2] * inv_dists;
+  dinv_ji = dvec_ji[2] * inv_dists;
+
+  cdev_ji = csqr_jk * dvec_ji[2];
+  cdev_jk = csqr_ji * dvec_jk[2];
+
+  (*dcos_theta_di)[2] =   dinv_jk            - cdev_ji;
+  (*dcos_theta_dj)[2] = -(dinv_jk + dinv_ji) + cdev_ji + cdev_jk;
+  (*dcos_theta_dk)[2] =             dinv_ji            - cdev_jk;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* this is a 3-body interaction in which the main role is
+   played by j which sits in the middle of the other two. */
+void Valence_AnglesOMP( reax_system *system, control_params *control,
+			simulation_data *data, storage *workspace,
+			reax_list **lists, output_controls *out_control )
+{
+
+#ifdef OMP_TIMING
+  double endTimeBase, startTimeBase;
+  startTimeBase = MPI_Wtime();
+#endif
+  
+  reax_list *bonds = (*lists) + BONDS;
+  reax_list *thb_intrs =  (*lists) + THREE_BODIES;
+
+  // Precompute and store valence_angle offsets for OpenMP code.
+  int * _my_offset = workspace->valence_angle_atom_myoffset;
+
+  /* global parameters used in these calculations */
+  double p_val6 = system->reax_param.gp.l[14];
+  double p_val8 = system->reax_param.gp.l[33];
+  double p_val9 = system->reax_param.gp.l[16];
+  double p_val10 = system->reax_param.gp.l[17];
+  double total_Eang = 0;
+  double total_Epen = 0;
+  double total_Ecoa = 0;
+  
+  int  per_atom = (thb_intrs->num_intrs / system->N);
+  int  nthreads = control->nthreads;
+  int  chunksize = system->N/(nthreads*10);
+  int  num_thb_intrs = 0;
+  int  TWICE = 2;
+
+#pragma omp parallel default(shared) reduction(+:total_Eang, total_Epen, total_Ecoa, num_thb_intrs) 
+ {
+  int i, j, pi, k, pk, t;
+  int type_i, type_j, type_k;
+  int start_j, end_j, start_pk, end_pk;
+  int cnt, my_offset, mark;
+  
+  double temp, temp_bo_jt, pBOjt7;
+  double p_val1, p_val2, p_val3, p_val4, p_val5, p_val7;
+  double p_pen1, p_pen2, p_pen3, p_pen4;
+  double p_coa1, p_coa2, p_coa3, p_coa4;
+  double trm8, expval6, expval7, expval2theta, expval12theta, exp3ij, exp3jk;
+  double exp_pen2ij, exp_pen2jk, exp_pen3, exp_pen4, trm_pen34, exp_coa2;
+  double dSBO1, dSBO2, SBO, SBO2, CSBO2, SBOp, prod_SBO, vlpadj;
+  double CEval1, CEval2, CEval3, CEval4, CEval5, CEval6, CEval7, CEval8;
+  double CEpen1, CEpen2, CEpen3;
+  double e_ang, e_coa, e_pen;
+  double CEcoa1, CEcoa2, CEcoa3, CEcoa4, CEcoa5;
+  double Cf7ij, Cf7jk, Cf8j, Cf9j;
+  double f7_ij, f7_jk, f8_Dj, f9_Dj;
+  double Ctheta_0, theta_0, theta_00, theta, cos_theta, sin_theta;
+  double r_ij, r_jk;
+  double BOA_ij, BOA_jk;
+  rvec force, ext_press;
+  // rtensor temp_rtensor, total_rtensor;
+  
+  // Tallying variables
+  double eng_tmp, f_scaler, fi_tmp[3], fj_tmp[3], fk_tmp[3];
+  double delij[3], delkj[3];
+  
+  three_body_header *thbh;
+  three_body_parameters *thbp;
+  three_body_interaction_data *p_ijk, *p_kji;
+  bond_data *pbond_ij, *pbond_jk, *pbond_jt;
+  bond_order_data *bo_ij, *bo_jk, *bo_jt;
+  
+  int  tid = omp_get_thread_num();
+  long reductionOffset = (system->N * tid);
+  class PairReaxCOMP *pair_reax_ptr;
+  pair_reax_ptr = static_cast<class PairReaxCOMP*>(system->pair_ptr);
+  class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid);
+
+  pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either,
+                                    system->pair_ptr->vflag_either,
+                                    system->N, system->pair_ptr->eatom,
+                                    system->pair_ptr->vatom, thr);
+
+
+  // Run through a minimal for(j<N) loop once to precompute offsets with safe number of threads
+  
+  const int per_thread = thb_intrs->num_intrs / nthreads;
+
+#pragma omp for schedule(dynamic,50)
+  for (j = 0; j < system->N; ++j) {
+    type_j = system->my_atoms[j].type;
+    _my_offset[j] = 0;
+    if(type_j < 0) continue;
+
+    start_j = Start_Index(j, bonds);
+    end_j = End_Index(j, bonds);
+
+    // Always point to start of workspace to count angles
+    my_offset = tid * per_thread;
+    
+    for (pi = start_j; pi < end_j; ++pi) {
+      Set_Start_Index( pi, my_offset, thb_intrs );
+      pbond_ij = &(bonds->select.bond_list[pi]);
+      bo_ij = &(pbond_ij->bo_data);
+      BOA_ij = bo_ij->BO - control->thb_cut;
+
+      if (BOA_ij > 0.0) {
+	i = pbond_ij->nbr;	  
+	
+	/* first copy 3-body intrs from previously computed ones where i>k.
+	   in the second for-loop below,
+	   we compute only new 3-body intrs where i < k */
+	for (pk = start_j; pk < pi; ++pk) {
+	  start_pk = Start_Index( pk, thb_intrs );
+	  end_pk = End_Index( pk, thb_intrs );
+	  
+	  for (t = start_pk; t < end_pk; ++t)
+	    if (thb_intrs->select.three_body_list[t].thb == i) {
+
+	      p_ijk = &(thb_intrs->select.three_body_list[my_offset] );
+	      p_ijk->thb = bonds->select.bond_list[pk].nbr;
+	      
+	      ++my_offset;
+	      break;
+	    }
+	} // for(pk)
+
+	/* and this is the second for loop mentioned above */
+	for (pk = pi+1; pk < end_j; ++pk) {
+	  pbond_jk = &(bonds->select.bond_list[pk]);
+	  k        = pbond_jk->nbr;
+
+	  if (j >= system->n && i >= system->n && k >= system->n) continue;
+
+	  p_ijk    = &( thb_intrs->select.three_body_list[my_offset] );
+	  p_ijk->thb = k;
+	  
+	  ++my_offset; // add this  to the list of 3-body interactions
+	} // for(pk)
+      } // if()
+      
+      Set_End_Index(pi, my_offset, thb_intrs );
+    } // for(pi)
+
+    // Confirm that thb_intrs->num_intrs / nthreads is enough to hold all angles from a single atom
+    if(my_offset >= (tid+1)*per_thread) {
+      int me;
+      MPI_Comm_rank(MPI_COMM_WORLD,&me);
+      fprintf( stderr, "step%d-ran out of space on angle_list on proc %i for atom %i:", data->step, me, j);
+      fprintf( stderr, " nthreads= %d, tid=%d, my_offset=%d, per_thread=%d\n", nthreads, tid, my_offset, per_thread);
+      fprintf( stderr, " num_intrs= %i  N= %i\n",thb_intrs->num_intrs , system->N);
+      MPI_Abort( MPI_COMM_WORLD, INSUFFICIENT_MEMORY );
+    }
+
+    // Number of angles owned by this atom
+    _my_offset[j] = my_offset - tid * per_thread;
+  } // for(j)
+  
+  // Wait for all threads to finish counting angles
+#pragma omp barrier
+
+  // Master thread uses angle counts to compute offsets
+  // This can be threaded
+#pragma omp master
+  {
+    int current_count = 0;
+    int m = _my_offset[0];
+    _my_offset[0] = current_count;
+    for(j=1; j<system->N; j++) {
+      current_count+= m;
+      m = _my_offset[j];
+      _my_offset[j] = current_count;
+    }
+    _my_offset[system->N] = current_count + m; // Used to test if last particle has any angles
+  }
+
+  // All threads wait till master thread finished computing offsets
+#pragma omp barrier
+
+  // Original loop, but now using precomputed offsets
+  // Safe to use all threads available, regardless of threads tasked above
+  // We also now skip over atoms that have no angles assigned
+#pragma omp for schedule(dynamic,50)//(dynamic,chunksize)//(guided)  
+  for (j = 0; j < system->N; ++j) {         // Ray: the first one with system->N
+    type_j = system->my_atoms[j].type;
+    if(type_j < 0) continue;
+
+    // Skip if no angles for this atom
+    if(_my_offset[j] == _my_offset[j+1]) continue;
+
+    start_j = Start_Index(j, bonds);
+    end_j = End_Index(j, bonds);
+    
+    type_j = system->my_atoms[j].type;
+
+    my_offset = _my_offset[j];
+
+    p_val3 = system->reax_param.sbp[ type_j ].p_val3;
+    p_val5 = system->reax_param.sbp[ type_j ].p_val5;
+    
+    SBOp = 0, prod_SBO = 1;
+    for (t = start_j; t < end_j; ++t) {
+      bo_jt = &(bonds->select.bond_list[t].bo_data);
+      SBOp += (bo_jt->BO_pi + bo_jt->BO_pi2);
+      temp = SQR( bo_jt->BO );
+      temp *= temp;
+      temp *= temp;
+      prod_SBO *= exp( -temp );
+    }
+    
+    // modifications to match Adri's code - 09/01/09
+    if( workspace->vlpex[j] >= 0 ){
+      vlpadj = 0;
+      dSBO2 = prod_SBO - 1;
+    }
+    else{
+      vlpadj = workspace->nlp[j];
+      dSBO2 = (prod_SBO - 1) * (1 - p_val8 * workspace->dDelta_lp[j]);
+    }
+    
+    SBO = SBOp + (1 - prod_SBO) * (-workspace->Delta_boc[j] - p_val8 * vlpadj);
+    dSBO1 = -8 * prod_SBO * ( workspace->Delta_boc[j] + p_val8 * vlpadj );
+    
+    if( SBO <= 0 )
+      SBO2 = 0, CSBO2 = 0;
+    else if( SBO > 0 && SBO <= 1 ) {
+        SBO2 = pow( SBO, p_val9 );
+        CSBO2 = p_val9 * pow( SBO, p_val9 - 1 );
+    }
+    else if( SBO > 1 && SBO < 2 ) {
+      SBO2 = 2 - pow( 2-SBO, p_val9 );
+      CSBO2 = p_val9 * pow( 2 - SBO, p_val9 - 1 );
+    }
+    else
+      SBO2 = 2, CSBO2 = 0;
+    
+    expval6 = exp( p_val6 * workspace->Delta_boc[j] );
+    
+    for (pi = start_j; pi < end_j; ++pi) {
+      Set_Start_Index( pi, my_offset, thb_intrs );
+      pbond_ij = &(bonds->select.bond_list[pi]);
+      bo_ij = &(pbond_ij->bo_data);
+      BOA_ij = bo_ij->BO - control->thb_cut;
+      
+      
+      if (BOA_ij > 0.0) {
+        i = pbond_ij->nbr;
+        r_ij = pbond_ij->d;
+        type_i = system->my_atoms[i].type;
+	
+	
+        /* first copy 3-body intrs from previously computed ones where i>k.
+           in the second for-loop below,
+           we compute only new 3-body intrs where i < k */
+        for (pk = start_j; pk < pi; ++pk) {
+          start_pk = Start_Index( pk, thb_intrs );
+          end_pk = End_Index( pk, thb_intrs );
+	  
+          for (t = start_pk; t < end_pk; ++t)
+            if (thb_intrs->select.three_body_list[t].thb == i) {
+              p_ijk = &(thb_intrs->select.three_body_list[my_offset] );
+              p_kji = &(thb_intrs->select.three_body_list[t]);
+	      
+              p_ijk->thb = bonds->select.bond_list[pk].nbr;
+              p_ijk->pthb  = pk;
+              p_ijk->theta = p_kji->theta;
+              rvec_Copy( p_ijk->dcos_di, p_kji->dcos_dk );
+              rvec_Copy( p_ijk->dcos_dj, p_kji->dcos_dj );
+              rvec_Copy( p_ijk->dcos_dk, p_kji->dcos_di );
+	      
+              ++my_offset;
+	      ++num_thb_intrs;
+              break;
+            }
+	} // for(pk)
+	
+	
+        /* and this is the second for loop mentioned above */
+        for (pk = pi+1; pk < end_j; ++pk) {
+          pbond_jk = &(bonds->select.bond_list[pk]);
+          bo_jk    = &(pbond_jk->bo_data);
+          BOA_jk   = bo_jk->BO - control->thb_cut;
+          k        = pbond_jk->nbr;
+          type_k   = system->my_atoms[k].type;
+          p_ijk    = &( thb_intrs->select.three_body_list[my_offset] );
+	  
+	  // Fix by Sudhir
+	  // if (BOA_jk <= 0) continue;
+	  if (j >= system->n && i >= system->n && k >= system->n) continue;
+	  
+          Calculate_Theta( pbond_ij->dvec, pbond_ij->d,
+                           pbond_jk->dvec, pbond_jk->d,
+                           &theta, &cos_theta );
+	  
+          Calculate_dCos_ThetaOMP( pbond_ij->dvec, pbond_ij->d,
+				   pbond_jk->dvec, pbond_jk->d,
+				   &(p_ijk->dcos_di), &(p_ijk->dcos_dj),
+				   &(p_ijk->dcos_dk) );
+          p_ijk->thb = k;
+          p_ijk->pthb = pk;
+          p_ijk->theta = theta;
+	  
+          sin_theta = sin( theta );
+          if( sin_theta < 1.0e-5 )
+            sin_theta = 1.0e-5;
+	  
+          ++my_offset; // add this  to the list of 3-body interactions
+	  ++num_thb_intrs;
+	  
+          if ((j < system->n) && (BOA_jk > 0.0) &&
+              (bo_ij->BO > control->thb_cut) &&
+              (bo_jk->BO > control->thb_cut) &&
+              (bo_ij->BO * bo_jk->BO > control->thb_cutsq)) {
+            r_jk = pbond_jk->d;
+            thbh = &( system->reax_param.thbp[ type_i ][ type_j ][ type_k ] );
+	   	    
+            for (cnt = 0; cnt < thbh->cnt; ++cnt) {
+	      
+              if( fabs(thbh->prm[cnt].p_val1) > 0.001 ) {
+                thbp = &( thbh->prm[cnt] );
+		
+                /* ANGLE ENERGY */
+                p_val1 = thbp->p_val1;
+                p_val2 = thbp->p_val2;
+                p_val4 = thbp->p_val4;
+                p_val7 = thbp->p_val7;
+                theta_00 = thbp->theta_00;
+		
+                exp3ij = exp( -p_val3 * pow( BOA_ij, p_val4 ) );
+                f7_ij = 1.0 - exp3ij;
+                Cf7ij = p_val3 * p_val4 * pow( BOA_ij, p_val4 - 1.0 ) * exp3ij;
+		
+                exp3jk = exp( -p_val3 * pow( BOA_jk, p_val4 ) );
+                f7_jk = 1.0 - exp3jk;
+                Cf7jk = p_val3 * p_val4 * pow( BOA_jk, p_val4 - 1.0 ) * exp3jk;
+		
+                expval7 = exp( -p_val7 * workspace->Delta_boc[j] );
+                trm8 = 1.0 + expval6 + expval7;
+                f8_Dj = p_val5 - ( (p_val5 - 1.0) * (2.0 + expval6) / trm8 );
+                Cf8j = ( (1.0 - p_val5) / SQR(trm8) ) *
+                  ( p_val6 * expval6 * trm8 -
+                    (2.0 + expval6) * ( p_val6*expval6 - p_val7*expval7 ) );
+		
+                theta_0 = 180.0 - theta_00 * (1.0 -
+                                              exp(-p_val10 * (2.0 - SBO2)));
+                theta_0 = DEG2RAD( theta_0 );
+		
+                expval2theta  = exp( -p_val2 * SQR(theta_0 - theta) );
+                if (p_val1 >= 0)
+                  expval12theta = p_val1 * (1.0 - expval2theta);
+                else // To avoid linear Me-H-Me angles (6/6/06)
+                  expval12theta = p_val1 * -expval2theta;
+		
+                CEval1 = Cf7ij * f7_jk * f8_Dj * expval12theta;
+                CEval2 = Cf7jk * f7_ij * f8_Dj * expval12theta;
+                CEval3 = Cf8j  * f7_ij * f7_jk * expval12theta;
+                CEval4 = -2.0 * p_val1 * p_val2 * f7_ij * f7_jk * f8_Dj *
+                  expval2theta * (theta_0 - theta);
+		
+                Ctheta_0 = p_val10 * DEG2RAD(theta_00) *
+                  exp( -p_val10 * (2.0 - SBO2) );
+		
+                CEval5 = -CEval4 * Ctheta_0 * CSBO2;
+                CEval6 = CEval5 * dSBO1;
+                CEval7 = CEval5 * dSBO2;
+                CEval8 = -CEval4 / sin_theta;
+
+		total_Eang += e_ang = 
+                  f7_ij * f7_jk * f8_Dj * expval12theta;
+                /* END ANGLE ENERGY*/
+		
+		
+                /* PENALTY ENERGY */
+                p_pen1 = thbp->p_pen1;
+                p_pen2 = system->reax_param.gp.l[19];
+                p_pen3 = system->reax_param.gp.l[20];
+                p_pen4 = system->reax_param.gp.l[21];
+
+                exp_pen2ij = exp( -p_pen2 * SQR( BOA_ij - 2.0 ) );
+                exp_pen2jk = exp( -p_pen2 * SQR( BOA_jk - 2.0 ) );
+                exp_pen3 = exp( -p_pen3 * workspace->Delta[j] );
+                exp_pen4 = exp(  p_pen4 * workspace->Delta[j] );
+                trm_pen34 = 1.0 + exp_pen3 + exp_pen4;
+                f9_Dj = ( 2.0 + exp_pen3 ) / trm_pen34;
+                Cf9j = ( -p_pen3 * exp_pen3 * trm_pen34 -
+                         (2.0 + exp_pen3) * ( -p_pen3 * exp_pen3 +
+                                              p_pen4 * exp_pen4 ) ) /
+                  SQR( trm_pen34 );
+
+		total_Epen += e_pen =
+                  p_pen1 * f9_Dj * exp_pen2ij * exp_pen2jk;
+		
+                CEpen1 = e_pen * Cf9j / f9_Dj;
+                temp   = -2.0 * p_pen2 * e_pen;
+                CEpen2 = temp * (BOA_ij - 2.0);
+                CEpen3 = temp * (BOA_jk - 2.0);
+                /* END PENALTY ENERGY */
+		
+		
+                /* COALITION ENERGY */
+                p_coa1 = thbp->p_coa1;
+                p_coa2 = system->reax_param.gp.l[2];
+                p_coa3 = system->reax_param.gp.l[38];
+                p_coa4 = system->reax_param.gp.l[30];
+		
+                exp_coa2 = exp( p_coa2 * workspace->Delta_val[j] );
+		total_Ecoa += e_coa =
+                  p_coa1 / (1. + exp_coa2) *
+                  exp( -p_coa3 * SQR(workspace->total_bond_order[i]-BOA_ij) ) *
+                  exp( -p_coa3 * SQR(workspace->total_bond_order[k]-BOA_jk) ) *
+                  exp( -p_coa4 * SQR(BOA_ij - 1.5) ) *
+                  exp( -p_coa4 * SQR(BOA_jk - 1.5) );
+		
+                CEcoa1 = -2 * p_coa4 * (BOA_ij - 1.5) * e_coa;
+                CEcoa2 = -2 * p_coa4 * (BOA_jk - 1.5) * e_coa;
+                CEcoa3 = -p_coa2 * exp_coa2 * e_coa / (1 + exp_coa2);
+                CEcoa4 = -2 * p_coa3 *
+                  (workspace->total_bond_order[i]-BOA_ij) * e_coa;
+                CEcoa5 = -2 * p_coa3 *
+                  (workspace->total_bond_order[k]-BOA_jk) * e_coa;
+                /* END COALITION ENERGY */
+		
+		
+                /* FORCES */
+                bo_ij->Cdbo += (CEval1 + CEpen2 + (CEcoa1 - CEcoa4));
+                bo_jk->Cdbo += (CEval2 + CEpen3 + (CEcoa2 - CEcoa5));
+                workspace->CdDelta[j] += ((CEval3 + CEval7) + CEpen1 + CEcoa3);
+		workspace->CdDeltaReduction[reductionOffset+i] += CEcoa4;
+		workspace->CdDeltaReduction[reductionOffset+k] += CEcoa5;
+		
+                for (t = start_j; t < end_j; ++t) {
+                    pbond_jt = &( bonds->select.bond_list[t] );
+                    bo_jt = &(pbond_jt->bo_data);
+                    temp_bo_jt = bo_jt->BO;
+                    temp = CUBE( temp_bo_jt );
+                    pBOjt7 = temp * temp * temp_bo_jt;
+
+                    bo_jt->Cdbo += (CEval6 * pBOjt7);
+                    bo_jt->Cdbopi += CEval5;
+                    bo_jt->Cdbopi2 += CEval5;
+                }
+		
+                if( control->virial == 0 ) {
+                  rvec_ScaledAdd( workspace->f[j], CEval8, p_ijk->dcos_dj );
+		  rvec_ScaledAdd( workspace->forceReduction[reductionOffset+i], 
+				  CEval8, p_ijk->dcos_di );
+                  rvec_ScaledAdd( workspace->forceReduction[reductionOffset+k], 
+				  CEval8, p_ijk->dcos_dk );
+                }
+                else {
+                  /* terms not related to bond order derivatives are
+                     added directly into forces and pressure vector/tensor */
+                  rvec_Scale( force, CEval8, p_ijk->dcos_di );
+                  rvec_Add( workspace->forceReduction[reductionOffset+i], force );
+		  
+                  rvec_iMultiply( ext_press, pbond_ij->rel_box, force );
+		  rvec_Add( workspace->my_ext_pressReduction[tid], ext_press );
+		  
+                  rvec_ScaledAdd( workspace->f[j], CEval8, p_ijk->dcos_dj );
+		  
+                  rvec_Scale( force, CEval8, p_ijk->dcos_dk );
+                  rvec_Add( workspace->forceReduction[reductionOffset+k], force );
+                  
+		  rvec_iMultiply( ext_press, pbond_jk->rel_box, force );
+		  rvec_Add( workspace->my_ext_pressReduction[tid], ext_press );
+                }
+		
+                /* tally into per-atom virials */
+                if( system->pair_ptr->vflag_atom || system->pair_ptr->evflag) {
+		  
+                  /* Acquire vectors */
+                  rvec_ScaledSum( delij, 1., system->my_atoms[i].x,
+                                        -1., system->my_atoms[j].x );
+                  rvec_ScaledSum( delkj, 1., system->my_atoms[k].x,
+                                        -1., system->my_atoms[j].x );
+		  
+                  rvec_Scale( fi_tmp, -CEval8, p_ijk->dcos_di );
+                  rvec_Scale( fj_tmp, -CEval8, p_ijk->dcos_dj );
+                  rvec_Scale( fk_tmp, -CEval8, p_ijk->dcos_dk );
+		  
+                  eng_tmp = e_ang + e_pen + e_coa;
+		  
+                  if( system->pair_ptr->evflag)
+		    pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, j, j, system->N, 1, 
+						      eng_tmp, 0.0, 0.0, 0.0, 0.0, 0.0, thr);
+                  if( system->pair_ptr->vflag_atom)
+		    // NEED TO MAKE AN OMP VERSION OF THIS CALL!
+		    system->pair_ptr->v_tally3( i, j, k, fi_tmp, fk_tmp, delij, delkj);
+                }
+
+              } // if(p_val1>0.001)
+            } // for(cnt)
+          } // if(j<n && BOA_jk>0)
+        } // for(pk)
+      } // if(BOA_ij>0)
+
+      Set_End_Index(pi, my_offset, thb_intrs );
+    } // for(pi)
+  } // for(j)
+  
+  pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, system->pair_ptr->eflag_either,
+				  system->pair_ptr->vflag_either, thr);
+ } // end omp parallel
+
+  data->my_en.e_ang = total_Eang;
+  data->my_en.e_pen = total_Epen;
+  data->my_en.e_coa = total_Ecoa;
+
+  if( num_thb_intrs >= thb_intrs->num_intrs * DANGER_ZONE ) {
+    workspace->realloc.num_3body = num_thb_intrs * TWICE;
+    if( num_thb_intrs > thb_intrs->num_intrs ) {
+      fprintf( stderr, "step%d-ran out of space on angle_list: top=%d, max=%d",
+               data->step, num_thb_intrs, thb_intrs->num_intrs );
+      MPI_Abort( MPI_COMM_WORLD, INSUFFICIENT_MEMORY );
+    }
+  }
+
+#ifdef OMP_TIMING
+  endTimeBase = MPI_Wtime();
+  ompTimingData[COMPUTEVALENCEANGLESBOINDEX] += (endTimeBase-startTimeBase);
+#endif
+}
diff --git a/src/USER-OMP/reaxc_valence_angles_omp.h b/src/USER-OMP/reaxc_valence_angles_omp.h
new file mode 100644
index 0000000000..ce304acced
--- /dev/null
+++ b/src/USER-OMP/reaxc_valence_angles_omp.h
@@ -0,0 +1,37 @@
+/*----------------------------------------------------------------------
+  PuReMD - Purdue ReaxFF Molecular Dynamics Program
+
+  Copyright (2010) Purdue University
+  Hasan Metin Aktulga, hmaktulga@lbl.gov
+  Joseph Fogarty, jcfogart@mail.usf.edu
+  Sagar Pandit, pandit@usf.edu
+  Ananth Y Grama, ayg@cs.purdue.edu
+
+  Please cite the related publication:
+  H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
+  "Parallel Reactive Molecular Dynamics: Numerical Methods and
+  Algorithmic Techniques", Parallel Computing, in press.
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of
+  the License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+  See the GNU General Public License for more details:
+  <http://www.gnu.org/licenses/>.
+  ----------------------------------------------------------------------*/
+
+#ifndef __VALENCE_ANGLES_OMP_H_
+#define __VALENCE_ANGLES_OMP_H_
+
+#include "reaxc_types.h"
+
+void Valence_AnglesOMP( reax_system*, control_params*, simulation_data*,
+                     storage*, reax_list**, output_controls* );
+
+void Calculate_dCos_ThetaOMP( rvec, double, rvec, double, rvec*, rvec*, rvec* );
+
+#endif
diff --git a/src/USER-REAXC/fix_qeq_reax.cpp b/src/USER-REAXC/fix_qeq_reax.cpp
index 22b5382727..5f171c8768 100644
--- a/src/USER-REAXC/fix_qeq_reax.cpp
+++ b/src/USER-REAXC/fix_qeq_reax.cpp
@@ -64,7 +64,7 @@ static const char cite_fix_qeq_reax[] =
 /* ---------------------------------------------------------------------- */
 
 FixQEqReax::FixQEqReax(LAMMPS *lmp, int narg, char **arg) :
-  Fix(lmp, narg, arg)
+  Fix(lmp, narg, arg), pertype_option(NULL)
 {
   if (lmp->citeme) lmp->citeme->add(cite_fix_qeq_reax);
 
@@ -76,7 +76,9 @@ FixQEqReax::FixQEqReax(LAMMPS *lmp, int narg, char **arg) :
   swa = force->numeric(FLERR,arg[4]);
   swb = force->numeric(FLERR,arg[5]);
   tolerance = force->numeric(FLERR,arg[6]);
-  pertype_parameters(arg[7]);
+  int len = strlen(arg[7]) + 1;
+  pertype_option = new char[len];
+  strcpy(pertype_option,arg[7]);
 
   // dual CG support only available for USER-OMP variant
   dual_enabled = 0;
@@ -135,6 +137,8 @@ FixQEqReax::FixQEqReax(LAMMPS *lmp, int narg, char **arg) :
 
 FixQEqReax::~FixQEqReax()
 {
+  delete[] pertype_option;
+
   // unregister callbacks to this fix from Atom class
 
   if (copymode) return;
@@ -158,6 +162,13 @@ FixQEqReax::~FixQEqReax()
 
 /* ---------------------------------------------------------------------- */
 
+void FixQEqReax::post_constructor()
+{
+  pertype_parameters(pertype_option);
+}
+
+/* ---------------------------------------------------------------------- */
+
 int FixQEqReax::setmask()
 {
   int mask = 0;
diff --git a/src/USER-REAXC/fix_qeq_reax.h b/src/USER-REAXC/fix_qeq_reax.h
index d82232576a..19efcd2b03 100644
--- a/src/USER-REAXC/fix_qeq_reax.h
+++ b/src/USER-REAXC/fix_qeq_reax.h
@@ -39,6 +39,7 @@ class FixQEqReax : public Fix {
   FixQEqReax(class LAMMPS *, int, char **);
   ~FixQEqReax();
   int setmask();
+  virtual void post_constructor();
   virtual void init();
   void init_list(int,class NeighList *);
   virtual void init_storage();
@@ -100,6 +101,7 @@ class FixQEqReax : public Fix {
   //double **h;
   //double *hc, *hs;
 
+  char *pertype_option;  // argument to determine how per-type info is obtained
   virtual void pertype_parameters(char*);
   void init_shielding();
   void init_taper();
diff --git a/src/USER-REAXC/pair_reaxc.h b/src/USER-REAXC/pair_reaxc.h
index 5658648db6..91b44be661 100644
--- a/src/USER-REAXC/pair_reaxc.h
+++ b/src/USER-REAXC/pair_reaxc.h
@@ -42,7 +42,7 @@ class PairReaxC : public Pair {
   void compute(int, int);
   void settings(int, char **);
   void coeff(int, char **);
-  void init_style();
+  virtual void init_style();
   double init_one(int, int);
   void *extract(const char *, int &);
   int fixbond_flag, fixspecies_flag;