diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.cpp b/src/KOKKOS/pair_eam_alloy_kokkos.cpp
new file mode 100755
index 0000000000..2e56307779
--- /dev/null
+++ b/src/KOKKOS/pair_eam_alloy_kokkos.cpp
@@ -0,0 +1,1177 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Stan Moore (SNL)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "kokkos.h"
+#include "pair_kokkos.h"
+#include "pair_eam_alloy_kokkos.h"
+#include "atom_kokkos.h"
+#include "force.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "neigh_list_kokkos.h"
+#include "neigh_request.h"
+#include "memory.h"
+#include "error.h"
+#include "atom_masks.h"
+
+using namespace LAMMPS_NS;
+
+#define MAXLINE 1024
+
+// Cannot use virtual inheritance on the GPU, so must duplicate code
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairEAMAlloyKokkos<DeviceType>::PairEAMAlloyKokkos(LAMMPS *lmp) : PairEAM(lmp)
+{
+  respa_enable = 0;
+  one_coeff = 1;
+  manybody_flag = 1;
+
+  atomKK = (AtomKokkos *) atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK;
+  datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairEAMAlloyKokkos<DeviceType>::~PairEAMAlloyKokkos()
+{
+  if (!copymode) {
+    memory->destroy_kokkos(k_eatom,eatom);
+    memory->destroy_kokkos(k_vatom,vatom);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairEAMAlloyKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
+{
+  eflag = eflag_in;
+  vflag = vflag_in;
+
+  if (neighflag == FULL) no_virial_fdotr_compute = 1;
+
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = vflag_fdotr = 0;
+
+  // reallocate per-atom arrays if necessary
+
+  if (eflag_atom) {
+    memory->destroy_kokkos(k_eatom,eatom);
+    memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
+    d_eatom = k_eatom.d_view;
+  }
+  if (vflag_atom) {
+    memory->destroy_kokkos(k_vatom,vatom);
+    memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom");
+    d_vatom = k_vatom.d_view;
+  }
+
+  atomKK->sync(execution_space,datamask_read);
+  if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
+  else atomKK->modified(execution_space,F_MASK);
+
+  // grow energy and fp arrays if necessary
+  // need to be atom->nmax in length
+
+  if (atom->nmax > nmax) {
+    nmax = atom->nmax;
+    k_rho = DAT::tdual_ffloat_1d("pair:rho",nmax);
+    k_fp = DAT::tdual_ffloat_1d("pair:fp",nmax);
+    d_rho = k_rho.d_view;
+    d_fp = k_fp.d_view;
+    h_rho = k_rho.h_view;
+    h_fp = k_fp.h_view;
+  }
+
+  x = atomKK->k_x.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  v_rho = k_rho.view<DeviceType>();
+  type = atomKK->k_type.view<DeviceType>();
+  tag = atomKK->k_tag.view<DeviceType>();
+  nlocal = atom->nlocal;
+  nall = atom->nlocal + atom->nghost;
+  newton_pair = force->newton_pair;
+
+  NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
+  d_numneigh = k_list->d_numneigh;
+  d_neighbors = k_list->d_neighbors;
+  d_ilist = k_list->d_ilist;
+  int inum = list->inum;
+
+  // Call cleanup_copy which sets allocations NULL which are destructed by the PairStyle
+
+  k_list->clean_copy();
+  copymode = 1;
+
+  // zero out density
+
+  if (newton_pair)
+    Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyInitialize>(0,nall),*this);
+  else
+    Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyInitialize>(0,nlocal),*this);
+  DeviceType::fence();
+
+  // loop over neighbors of my atoms
+
+  EV_FLOAT ev;
+
+  // compute kernel A
+
+  if (neighflag == HALF || neighflag == HALFTHREAD) {
+
+    if (neighflag == HALF) {
+      if (newton_pair) {
+        Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelA<HALF,1> >(0,inum),*this);
+      } else {
+        Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelA<HALF,0> >(0,inum),*this);
+      }
+    } else if (neighflag == HALFTHREAD) {
+      if (newton_pair) {
+        Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelA<HALFTHREAD,1> >(0,inum),*this);
+      } else {
+        Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelA<HALFTHREAD,0> >(0,inum),*this);
+      }
+    }
+    DeviceType::fence();
+
+    // communicate and sum densities (on the host)
+
+    if (newton_pair) {
+      k_rho.template modify<DeviceType>();
+      k_rho.template sync<LMPHostType>();
+      comm->reverse_comm_pair(this);
+      k_rho.template modify<LMPHostType>();
+      k_rho.template sync<DeviceType>();
+    }
+
+    // compute kernel B
+
+    if (eflag)
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelB<1> >(0,inum),*this,ev);
+    else
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelB<0> >(0,inum),*this);
+    DeviceType::fence();
+
+  } else if (neighflag == FULL) {
+
+    // compute kernel AB
+
+    if (eflag)
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelAB<1> >(0,inum),*this,ev);
+    else
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelAB<0> >(0,inum),*this);
+    DeviceType::fence();
+  }
+
+  if (eflag) {
+    eng_vdwl += ev.evdwl;
+    ev.evdwl = 0.0;
+  }
+
+  // communicate derivative of embedding function (on the device)
+
+  comm->forward_comm_pair(this);
+
+  // compute kernel C
+
+  if (evflag) {
+    if (neighflag == HALF) {
+      if (newton_pair) {
+        Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelC<HALF,1,1> >(0,inum),*this,ev);
+      } else {
+        Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelC<HALF,0,1> >(0,inum),*this,ev);
+      }
+    } else if (neighflag == HALFTHREAD) {
+      if (newton_pair) {
+        Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelC<HALFTHREAD,1,1> >(0,inum),*this,ev);
+      } else {
+        Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelC<HALFTHREAD,0,1> >(0,inum),*this,ev);
+      }
+    } else if (neighflag == FULL) {
+      if (newton_pair) {
+        Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelC<FULL,1,1> >(0,inum),*this,ev);
+      } else {
+        Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelC<FULL,0,1> >(0,inum),*this,ev);
+      }
+    }
+  } else {
+    if (neighflag == HALF) {
+      if (newton_pair) {
+        Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelC<HALF,1,0> >(0,inum),*this);
+      } else {
+        Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelC<HALF,0,0> >(0,inum),*this);
+      }
+    } else if (neighflag == HALFTHREAD) {
+      if (newton_pair) {
+        Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelC<HALFTHREAD,1,0> >(0,inum),*this);
+      } else {
+        Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelC<HALFTHREAD,0,0> >(0,inum),*this);
+      }
+    } else if (neighflag == FULL) {
+      if (newton_pair) {
+        Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelC<FULL,1,0> >(0,inum),*this);
+      } else {
+        Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelC<FULL,0,0> >(0,inum),*this);
+      }
+    }
+  }
+  DeviceType::fence();
+
+  if (eflag_global) eng_vdwl += ev.evdwl;
+  if (vflag_global) {
+    virial[0] += ev.v[0];
+    virial[1] += ev.v[1];
+    virial[2] += ev.v[2];
+    virial[3] += ev.v[3];
+    virial[4] += ev.v[4];
+    virial[5] += ev.v[5];
+  }
+
+  if (vflag_fdotr) pair_virial_fdotr_compute(this);
+
+  if (eflag_atom) {
+    k_eatom.template modify<DeviceType>();
+    k_eatom.template sync<LMPHostType>();
+  }
+
+  if (vflag_atom) {
+    k_vatom.template modify<DeviceType>();
+    k_vatom.template sync<LMPHostType>();
+  }
+
+  copymode = 0;
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairEAMAlloyKokkos<DeviceType>::init_style()
+{
+  // convert read-in file(s) to arrays and spline them
+
+  PairEAM::init_style();
+
+  // irequest = neigh request made by parent class
+
+  neighflag = lmp->kokkos->neighflag;
+  int irequest = neighbor->nrequest - 1;
+
+  neighbor->requests[irequest]->
+    kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
+    !Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
+  neighbor->requests[irequest]->
+    kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
+
+  if (neighflag == FULL) {
+    neighbor->requests[irequest]->full = 1;
+    neighbor->requests[irequest]->half = 0;
+    neighbor->requests[irequest]->full_cluster = 0;
+  } else if (neighflag == HALF || neighflag == HALFTHREAD) {
+    neighbor->requests[irequest]->full = 0;
+    neighbor->requests[irequest]->half = 1;
+    neighbor->requests[irequest]->full_cluster = 0;
+  } else {
+    error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk/alloy");
+  }
+
+}
+
+template<class DeviceType>
+void PairEAMAlloyKokkos<DeviceType>::file2array()
+{
+  file2array_alloy();
+
+  int i,j;
+  int n = atom->ntypes;
+
+  DAT::tdual_int_1d k_type2frho = DAT::tdual_int_1d("pair:type2frho",n+1);
+  DAT::tdual_int_2d k_type2rhor = DAT::tdual_int_2d("pair:type2rhor",n+1,n+1);
+  DAT::tdual_int_2d k_type2z2r = DAT::tdual_int_2d("pair:type2z2r",n+1,n+1);
+
+  HAT::t_int_1d h_type2frho =  k_type2frho.h_view;
+  HAT::t_int_2d h_type2rhor = k_type2rhor.h_view;
+  HAT::t_int_2d h_type2z2r = k_type2z2r.h_view;
+
+  for (i = 1; i <= n; i++) {
+    h_type2frho[i] = type2frho[i];
+    for (j = 1; j <= n; j++) {
+      h_type2rhor(i,j) = type2rhor[i][j];
+      h_type2z2r(i,j) = type2z2r[i][j];
+    }
+  }
+  k_type2frho.template modify<LMPHostType>();
+  k_type2frho.template sync<DeviceType>();
+  k_type2rhor.template modify<LMPHostType>();
+  k_type2rhor.template sync<DeviceType>();
+  k_type2z2r.template modify<LMPHostType>();
+  k_type2z2r.template sync<DeviceType>();
+
+  d_type2frho = k_type2frho.d_view;
+  d_type2rhor = k_type2rhor.d_view;
+  d_type2z2r = k_type2z2r.d_view;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairEAMAlloyKokkos<DeviceType>::array2spline()
+{
+  rdr = 1.0/dr;
+  rdrho = 1.0/drho;
+
+  tdual_ffloat_2d_n7 k_frho_spline = tdual_ffloat_2d_n7("pair:frho",nfrho,nrho+1);
+  tdual_ffloat_2d_n7 k_rhor_spline = tdual_ffloat_2d_n7("pair:rhor",nrhor,nr+1);
+  tdual_ffloat_2d_n7 k_z2r_spline = tdual_ffloat_2d_n7("pair:z2r",nz2r,nr+1);
+
+  t_host_ffloat_2d_n7 h_frho_spline = k_frho_spline.h_view;
+  t_host_ffloat_2d_n7 h_rhor_spline = k_rhor_spline.h_view;
+  t_host_ffloat_2d_n7 h_z2r_spline = k_z2r_spline.h_view;
+
+  for (int i = 0; i < nfrho; i++)
+    interpolate(nrho,drho,frho[i],h_frho_spline,i);
+  k_frho_spline.template modify<LMPHostType>();
+  k_frho_spline.template sync<DeviceType>();
+
+  for (int i = 0; i < nrhor; i++)
+    interpolate(nr,dr,rhor[i],h_rhor_spline,i);
+  k_rhor_spline.template modify<LMPHostType>();
+  k_rhor_spline.template sync<DeviceType>();
+
+  for (int i = 0; i < nz2r; i++)
+    interpolate(nr,dr,z2r[i],h_z2r_spline,i);
+  k_z2r_spline.template modify<LMPHostType>();
+  k_z2r_spline.template sync<DeviceType>();
+
+  d_frho_spline = k_frho_spline.d_view;
+  d_rhor_spline = k_rhor_spline.d_view;
+  d_z2r_spline = k_z2r_spline.d_view;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairEAMAlloyKokkos<DeviceType>::interpolate(int n, double delta, double *f, t_host_ffloat_2d_n7 h_spline, int i)
+{
+  for (int m = 1; m <= n; m++) h_spline(i,m,6) = f[m];
+
+  h_spline(i,1,5) = h_spline(i,2,6) - h_spline(i,1,6);
+  h_spline(i,2,5) = 0.5 * (h_spline(i,3,6)-h_spline(i,1,6));
+  h_spline(i,n-1,5) = 0.5 * (h_spline(i,n,6)-h_spline(i,n-2,6));
+  h_spline(i,n,5) = h_spline(i,n,6) - h_spline(i,n-1,6);
+
+  for (int m = 3; m <= n-2; m++)
+    h_spline(i,m,5) = ((h_spline(i,m-2,6)-h_spline(i,m+2,6)) +
+                    8.0*(h_spline(i,m+1,6)-h_spline(i,m-1,6))) / 12.0;
+
+  for (int m = 1; m <= n-1; m++) {
+    h_spline(i,m,4) = 3.0*(h_spline(i,m+1,6)-h_spline(i,m,6)) -
+      2.0*h_spline(i,m,5) - h_spline(i,m+1,5);
+    h_spline(i,m,3) = h_spline(i,m,5) + h_spline(i,m+1,5) -
+      2.0*(h_spline(i,m+1,6)-h_spline(i,m,6));
+  }
+
+  h_spline(i,n,4) = 0.0;
+  h_spline(i,n,3) = 0.0;
+
+  for (int m = 1; m <= n; m++) {
+    h_spline(i,m,2) = h_spline(i,m,5)/delta;
+    h_spline(i,m,1) = 2.0*h_spline(i,m,4)/delta;
+    h_spline(i,m,0) = 3.0*h_spline(i,m,3)/delta;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+int PairEAMAlloyKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist, int iswap_in, DAT::tdual_xfloat_1d &buf,
+                               int pbc_flag, int *pbc)
+{
+  d_sendlist = k_sendlist.view<DeviceType>();
+  iswap = iswap_in;
+  v_buf = buf.view<DeviceType>();
+  Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMAlloyPackForwardComm>(0,n),*this);
+  DeviceType::fence();
+  return n;
+}
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void PairEAMAlloyKokkos<DeviceType>::operator()(TagPairEAMAlloyPackForwardComm, const int &i) const {
+  int j = d_sendlist(iswap, i);
+  v_buf[i] = d_fp[j];
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairEAMAlloyKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first_in, DAT::tdual_xfloat_1d &buf)
+{
+  first = first_in;
+  v_buf = buf.view<DeviceType>();
+  Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMAlloyUnpackForwardComm>(0,n),*this);
+  DeviceType::fence();
+}
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void PairEAMAlloyKokkos<DeviceType>::operator()(TagPairEAMAlloyUnpackForwardComm, const int &i) const {
+  d_fp[i + first] = v_buf[i];
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+int PairEAMAlloyKokkos<DeviceType>::pack_forward_comm(int n, int *list, double *buf,
+                               int pbc_flag, int *pbc)
+{
+  int i,j;
+
+  for (i = 0; i < n; i++) {
+    j = list[i];
+    buf[i] = h_fp[j];
+  }
+  return n;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairEAMAlloyKokkos<DeviceType>::unpack_forward_comm(int n, int first, double *buf)
+{
+  for (int i = 0; i < n; i++) {
+    h_fp[i + first] = buf[i];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+int PairEAMAlloyKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) buf[m++] = h_rho[i];
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairEAMAlloyKokkos<DeviceType>::unpack_reverse_comm(int n, int *list, double *buf)
+{
+  int i,j,m;
+
+  m = 0;
+  for (i = 0; i < n; i++) {
+    j = list[i];
+    h_rho[j] += buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void PairEAMAlloyKokkos<DeviceType>::operator()(TagPairEAMAlloyInitialize, const int &i) const {
+  d_rho[i] = 0.0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+////Specialisation for Neighborlist types Half, HalfThread, Full
+template<class DeviceType>
+template<int NEIGHFLAG, int NEWTON_PAIR>
+KOKKOS_INLINE_FUNCTION
+void PairEAMAlloyKokkos<DeviceType>::operator()(TagPairEAMAlloyKernelA<NEIGHFLAG,NEWTON_PAIR>, const int &ii) const {
+
+  // rho = density at each atom
+  // loop over neighbors of my atoms
+
+  // The rho array is atomic for Half/Thread neighbor style
+  Kokkos::View<F_FLOAT*, typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > rho = v_rho;
+
+  const int i = d_ilist[ii];
+  const X_FLOAT xtmp = x(i,0);
+  const X_FLOAT ytmp = x(i,1);
+  const X_FLOAT ztmp = x(i,2);
+  const int itype = type(i);
+
+  //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
+  const int jnum = d_numneigh[i];
+
+  F_FLOAT rhotmp = 0.0;
+
+  for (int jj = 0; jj < jnum; jj++) {
+    //int j = d_neighbors_i[jj];
+    int j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    const X_FLOAT delx = xtmp - x(j,0);
+    const X_FLOAT dely = ytmp - x(j,1);
+    const X_FLOAT delz = ztmp - x(j,2);
+    const int jtype = type(j);
+    const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+
+    if (rsq < cutforcesq) {
+      F_FLOAT p = sqrt(rsq)*rdr + 1.0;
+      int m = static_cast<int> (p);
+      m = MIN(m,nr-1);
+      p -= m;
+      p = MIN(p,1.0);
+      const int d_type2rhor_ji = d_type2rhor(jtype,itype);
+      rhotmp += ((d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p +
+                  d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6);
+      if (NEWTON_PAIR || j < nlocal) {
+        const int d_type2rhor_ij = d_type2rhor(itype,jtype);
+        rho[j] += ((d_rhor_spline(d_type2rhor_ij,m,3)*p + d_rhor_spline(d_type2rhor_ij,m,4))*p +
+                    d_rhor_spline(d_type2rhor_ij,m,5))*p + d_rhor_spline(d_type2rhor_ij,m,6);
+      }
+    }
+
+  }
+  rho[i] += rhotmp;
+}
+
+/* ---------------------------------------------------------------------- */
+
+////Specialisation for Neighborlist types Half, HalfThread, Full
+template<class DeviceType>
+template<int EFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairEAMAlloyKokkos<DeviceType>::operator()(TagPairEAMAlloyKernelB<EFLAG>, const int &ii, EV_FLOAT& ev) const {
+
+  // fp = derivative of embedding energy at each atom
+  // phi = embedding energy at each atom
+  // if rho > rhomax (e.g. due to close approach of two atoms),
+  //   will exceed table, so add linear term to conserve energy
+
+  const int i = d_ilist[ii];
+  const int itype = type(i);
+
+  F_FLOAT p = d_rho[i]*rdrho + 1.0;
+  int m = static_cast<int> (p);
+  m = MAX(1,MIN(m,nrho-1));
+  p -= m;
+  p = MIN(p,1.0);
+  const int d_type2frho_i = d_type2frho[itype];
+  d_fp[i] = (d_frho_spline(d_type2frho_i,m,0)*p + d_frho_spline(d_type2frho_i,m,1))*p + d_frho_spline(d_type2frho_i,m,2);
+  if (EFLAG) {
+    F_FLOAT phi = ((d_frho_spline(d_type2frho_i,m,3)*p + d_frho_spline(d_type2frho_i,m,4))*p +
+                    d_frho_spline(d_type2frho_i,m,5))*p + d_frho_spline(d_type2frho_i,m,6);
+    if (d_rho[i] > rhomax) phi += d_fp[i] * (d_rho[i]-rhomax);
+    if (eflag_global) ev.evdwl += phi;
+    if (eflag_atom) d_eatom[i] += phi;
+  }
+
+}
+
+template<class DeviceType>
+template<int EFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairEAMAlloyKokkos<DeviceType>::operator()(TagPairEAMAlloyKernelB<EFLAG>, const int &ii) const {
+  EV_FLOAT ev;
+  this->template operator()<EFLAG>(TagPairEAMAlloyKernelB<EFLAG>(), ii, ev);
+}
+
+/* ---------------------------------------------------------------------- */
+
+////Specialisation for Neighborlist types Half, HalfThread, Full
+template<class DeviceType>
+template<int EFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairEAMAlloyKokkos<DeviceType>::operator()(TagPairEAMAlloyKernelAB<EFLAG>, const int &ii, EV_FLOAT& ev) const {
+
+  // rho = density at each atom
+  // loop over neighbors of my atoms
+
+  const int i = d_ilist[ii];
+  const X_FLOAT xtmp = x(i,0);
+  const X_FLOAT ytmp = x(i,1);
+  const X_FLOAT ztmp = x(i,2);
+  const int itype = type(i);
+
+  //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
+  const int jnum = d_numneigh[i];
+
+  F_FLOAT rhotmp = 0.0;
+
+  for (int jj = 0; jj < jnum; jj++) {
+    //int j = d_neighbors_i[jj];
+    int j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    const X_FLOAT delx = xtmp - x(j,0);
+    const X_FLOAT dely = ytmp - x(j,1);
+    const X_FLOAT delz = ztmp - x(j,2);
+    const int jtype = type(j);
+    const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+
+    if (rsq < cutforcesq) {
+      F_FLOAT p = sqrt(rsq)*rdr + 1.0;
+      int m = static_cast<int> (p);
+      m = MIN(m,nr-1);
+      p -= m;
+      p = MIN(p,1.0);
+      const int d_type2rhor_ji = d_type2rhor(jtype,itype);
+      rhotmp += ((d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p +
+                  d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6);
+    }
+
+  }
+  d_rho[i] += rhotmp;
+
+  // fp = derivative of embedding energy at each atom
+  // phi = embedding energy at each atom
+  // if rho > rhomax (e.g. due to close approach of two atoms),
+  //   will exceed table, so add linear term to conserve energy
+
+  F_FLOAT p = d_rho[i]*rdrho + 1.0;
+  int m = static_cast<int> (p);
+  m = MAX(1,MIN(m,nrho-1));
+  p -= m;
+  p = MIN(p,1.0);
+  const int d_type2frho_i = d_type2frho[itype];
+  d_fp[i] = (d_frho_spline(d_type2frho_i,m,0)*p + d_frho_spline(d_type2frho_i,m,1))*p + d_frho_spline(d_type2frho_i,m,2);
+  if (EFLAG) {
+    F_FLOAT phi = ((d_frho_spline(d_type2frho_i,m,3)*p + d_frho_spline(d_type2frho_i,m,4))*p +
+                    d_frho_spline(d_type2frho_i,m,5))*p + d_frho_spline(d_type2frho_i,m,6);
+    if (d_rho[i] > rhomax) phi += d_fp[i] * (d_rho[i]-rhomax);
+    if (eflag_global) ev.evdwl += phi;
+    if (eflag_atom) d_eatom[i] += phi;
+  }
+
+}
+
+template<class DeviceType>
+template<int EFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairEAMAlloyKokkos<DeviceType>::operator()(TagPairEAMAlloyKernelAB<EFLAG>, const int &ii) const {
+  EV_FLOAT ev;
+  this->template operator()<EFLAG>(TagPairEAMAlloyKernelAB<EFLAG>(), ii, ev);
+}
+
+/* ---------------------------------------------------------------------- */
+
+////Specialisation for Neighborlist types Half, HalfThread, Full
+template<class DeviceType>
+template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairEAMAlloyKokkos<DeviceType>::operator()(TagPairEAMAlloyKernelC<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
+
+  // The f array is atomic for Half/Thread neighbor style
+  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
+
+  const int i = d_ilist[ii];
+  const X_FLOAT xtmp = x(i,0);
+  const X_FLOAT ytmp = x(i,1);
+  const X_FLOAT ztmp = x(i,2);
+  const int itype = type(i);
+
+  //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
+  const int jnum = d_numneigh[i];
+
+  F_FLOAT fxtmp = 0.0;
+  F_FLOAT fytmp = 0.0;
+  F_FLOAT fztmp = 0.0;
+
+  for (int jj = 0; jj < jnum; jj++) {
+    //int j = d_neighbors_i[jj];
+    int j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    const X_FLOAT delx = xtmp - x(j,0);
+    const X_FLOAT dely = ytmp - x(j,1);
+    const X_FLOAT delz = ztmp - x(j,2);
+    const int jtype = type(j);
+    const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+
+    if(rsq < cutforcesq) {
+      const F_FLOAT r = sqrt(rsq);
+      F_FLOAT p = r*rdr + 1.0;
+      int m = static_cast<int> (p);
+      m = MIN(m,nr-1);
+      p -= m;
+      p = MIN(p,1.0);
+
+      // rhoip = derivative of (density at atom j due to atom i)
+      // rhojp = derivative of (density at atom i due to atom j)
+      // phi = pair potential energy
+      // phip = phi'
+      // z2 = phi * r
+      // z2p = (phi * r)' = (phi' r) + phi
+      // psip needs both fp[i] and fp[j] terms since r_ij appears in two
+      //   terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji)
+      //   hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip
+
+      const int d_type2rhor_ij = d_type2rhor(itype,jtype);
+      const F_FLOAT rhoip = (d_rhor_spline(d_type2rhor_ij,m,0)*p + d_rhor_spline(d_type2rhor_ij,m,1))*p +
+                             d_rhor_spline(d_type2rhor_ij,m,2);
+      const int d_type2rhor_ji = d_type2rhor(jtype,itype);
+      const F_FLOAT rhojp = (d_rhor_spline(d_type2rhor_ji,m,0)*p + d_rhor_spline(d_type2rhor_ji,m,1))*p +
+                             d_rhor_spline(d_type2rhor_ji,m,2);
+      const int d_type2z2r_ij = d_type2z2r(itype,jtype);
+      const F_FLOAT z2p = (d_z2r_spline(d_type2z2r_ij,m,0)*p + d_z2r_spline(d_type2z2r_ij,m,1))*p +
+                           d_z2r_spline(d_type2z2r_ij,m,2);
+      const F_FLOAT z2 = ((d_z2r_spline(d_type2z2r_ij,m,3)*p + d_z2r_spline(d_type2z2r_ij,m,4))*p +
+                           d_z2r_spline(d_type2z2r_ij,m,5))*p + d_z2r_spline(d_type2z2r_ij,m,6);
+
+      const F_FLOAT recip = 1.0/r;
+      const F_FLOAT phi = z2*recip;
+      const F_FLOAT phip = z2p*recip - phi*recip;
+      const F_FLOAT psip = d_fp[i]*rhojp + d_fp[j]*rhoip + phip;
+      const F_FLOAT fpair = -psip*recip;
+
+      fxtmp += delx*fpair;
+      fytmp += dely*fpair;
+      fztmp += delz*fpair;
+
+      if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) {
+        a_f(j,0) -= delx*fpair;
+        a_f(j,1) -= dely*fpair;
+        a_f(j,2) -= delz*fpair;
+      }
+
+      if (EVFLAG) {
+        if (eflag) {
+          ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<nlocal)))?1.0:0.5)*phi;
+        }
+
+        if (vflag_either || eflag_atom) this->template ev_tally<NEIGHFLAG,NEWTON_PAIR>(ev,i,j,phi,fpair,delx,dely,delz);
+      }
+
+    }
+  }
+
+  a_f(i,0) += fxtmp;
+  a_f(i,1) += fytmp;
+  a_f(i,2) += fztmp;
+}
+
+template<class DeviceType>
+template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairEAMAlloyKokkos<DeviceType>::operator()(TagPairEAMAlloyKernelC<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int &ii) const {
+  EV_FLOAT ev;
+  this->template operator()<NEIGHFLAG,NEWTON_PAIR,EVFLAG>(TagPairEAMAlloyKernelC<NEIGHFLAG,NEWTON_PAIR,EVFLAG>(), ii, ev);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<int NEIGHFLAG, int NEWTON_PAIR>
+KOKKOS_INLINE_FUNCTION
+void PairEAMAlloyKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &j,
+      const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
+                const F_FLOAT &dely, const F_FLOAT &delz) const
+{
+  const int EFLAG = eflag;
+  const int VFLAG = vflag_either;
+
+  // The eatom and vatom arrays are atomic for Half/Thread neighbor style
+  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
+  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
+
+  if (EFLAG) {
+    if (eflag_atom) {
+      const E_FLOAT epairhalf = 0.5 * epair;
+      if (NEIGHFLAG!=FULL) {
+        if (NEWTON_PAIR || i < nlocal) v_eatom[i] += epairhalf;
+        if (NEWTON_PAIR || j < nlocal) v_eatom[j] += epairhalf;
+      } else {
+        v_eatom[i] += epairhalf;
+      }
+    }
+  }
+
+  if (VFLAG) {
+    const E_FLOAT v0 = delx*delx*fpair;
+    const E_FLOAT v1 = dely*dely*fpair;
+    const E_FLOAT v2 = delz*delz*fpair;
+    const E_FLOAT v3 = delx*dely*fpair;
+    const E_FLOAT v4 = delx*delz*fpair;
+    const E_FLOAT v5 = dely*delz*fpair;
+
+    if (vflag_global) {
+      if (NEIGHFLAG!=FULL) {
+        if (NEWTON_PAIR || i < nlocal) {
+          ev.v[0] += 0.5*v0;
+          ev.v[1] += 0.5*v1;
+          ev.v[2] += 0.5*v2;
+          ev.v[3] += 0.5*v3;
+          ev.v[4] += 0.5*v4;
+          ev.v[5] += 0.5*v5;
+        }
+        if (NEWTON_PAIR || j < nlocal) {
+        ev.v[0] += 0.5*v0;
+        ev.v[1] += 0.5*v1;
+        ev.v[2] += 0.5*v2;
+        ev.v[3] += 0.5*v3;
+        ev.v[4] += 0.5*v4;
+        ev.v[5] += 0.5*v5;
+        }
+      } else {
+        ev.v[0] += 0.5*v0;
+        ev.v[1] += 0.5*v1;
+        ev.v[2] += 0.5*v2;
+        ev.v[3] += 0.5*v3;
+        ev.v[4] += 0.5*v4;
+        ev.v[5] += 0.5*v5;
+      }
+    }
+
+    if (vflag_atom) {
+      if (NEIGHFLAG!=FULL) {
+        if (NEWTON_PAIR || i < nlocal) {
+          v_vatom(i,0) += 0.5*v0;
+          v_vatom(i,1) += 0.5*v1;
+          v_vatom(i,2) += 0.5*v2;
+          v_vatom(i,3) += 0.5*v3;
+          v_vatom(i,4) += 0.5*v4;
+          v_vatom(i,5) += 0.5*v5;
+        }
+        if (NEWTON_PAIR || j < nlocal) {
+        v_vatom(j,0) += 0.5*v0;
+        v_vatom(j,1) += 0.5*v1;
+        v_vatom(j,2) += 0.5*v2;
+        v_vatom(j,3) += 0.5*v3;
+        v_vatom(j,4) += 0.5*v4;
+        v_vatom(j,5) += 0.5*v5;
+        }
+      } else {
+        v_vatom(i,0) += 0.5*v0;
+        v_vatom(i,1) += 0.5*v1;
+        v_vatom(i,2) += 0.5*v2;
+        v_vatom(i,3) += 0.5*v3;
+        v_vatom(i,4) += 0.5*v4;
+        v_vatom(i,5) += 0.5*v5;
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+// Duplicate PairEAMAlloy functions
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more type pairs
+   read DYNAMO setfl file
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairEAMAlloyKokkos<DeviceType>::coeff(int narg, char **arg)
+{
+  int i,j;
+
+  if (!allocated) allocate();
+
+  if (narg != 3 + atom->ntypes)
+    error->all(FLERR,"Incorrect args for pair coefficients");
+
+  // insure I,J args are * *
+
+  if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
+    error->all(FLERR,"Incorrect args for pair coefficients");
+
+  // read EAM setfl file
+
+  if (setfl) {
+    for (i = 0; i < setfl->nelements; i++) delete [] setfl->elements[i];
+    delete [] setfl->elements;
+    delete [] setfl->mass;
+    memory->destroy(setfl->frho);
+    memory->destroy(setfl->rhor);
+    memory->destroy(setfl->z2r);
+    delete setfl;
+  }
+  setfl = new Setfl();
+  read_file(arg[2]);
+
+  // read args that map atom types to elements in potential file
+  // map[i] = which element the Ith atom type is, -1 if NULL
+
+  for (i = 3; i < narg; i++) {
+    if (strcmp(arg[i],"NULL") == 0) {
+      map[i-2] = -1;
+      continue;
+    }
+    for (j = 0; j < setfl->nelements; j++)
+      if (strcmp(arg[i],setfl->elements[j]) == 0) break;
+    if (j < setfl->nelements) map[i-2] = j;
+    else error->all(FLERR,"No matching element in EAM potential file");
+  }
+
+  // clear setflag since coeff() called once with I,J = * *
+
+  int n = atom->ntypes;
+  for (i = 1; i <= n; i++)
+    for (j = i; j <= n; j++)
+      setflag[i][j] = 0;
+
+  // set setflag i,j for type pairs where both are mapped to elements
+  // set mass of atom type if i = j
+
+  int count = 0;
+  for (i = 1; i <= n; i++) {
+    for (j = i; j <= n; j++) {
+      if (map[i] >= 0 && map[j] >= 0) {
+        setflag[i][j] = 1;
+        if (i == j) atom->set_mass(i,setfl->mass[map[i]]);
+        count++;
+      }
+    }
+  }
+
+  if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
+}
+
+/* ----------------------------------------------------------------------
+   read a multi-element DYNAMO setfl file
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairEAMAlloyKokkos<DeviceType>::read_file(char *filename)
+{
+  Setfl *file = setfl;
+
+  // open potential file
+
+  int me = comm->me;
+  FILE *fptr;
+  char line[MAXLINE];
+
+  if (me == 0) {
+    fptr = force->open_potential(filename);
+    if (fptr == NULL) {
+      char str[128];
+      sprintf(str,"Cannot open EAM potential file %s",filename);
+      error->one(FLERR,str);
+    }
+  }
+
+  // read and broadcast header
+  // extract element names from nelements line
+
+  int n;
+  if (me == 0) {
+    fgets(line,MAXLINE,fptr);
+    fgets(line,MAXLINE,fptr);
+    fgets(line,MAXLINE,fptr);
+    fgets(line,MAXLINE,fptr);
+    n = strlen(line) + 1;
+  }
+  MPI_Bcast(&n,1,MPI_INT,0,world);
+  MPI_Bcast(line,n,MPI_CHAR,0,world);
+
+  sscanf(line,"%d",&file->nelements);
+  int nwords = atom->count_words(line);
+  if (nwords != file->nelements + 1)
+    error->all(FLERR,"Incorrect element names in EAM potential file");
+
+  char **words = new char*[file->nelements+1];
+  nwords = 0;
+  strtok(line," \t\n\r\f");
+  while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue;
+
+  file->elements = new char*[file->nelements];
+  for (int i = 0; i < file->nelements; i++) {
+    n = strlen(words[i]) + 1;
+    file->elements[i] = new char[n];
+    strcpy(file->elements[i],words[i]);
+  }
+  delete [] words;
+
+  if (me == 0) {
+    fgets(line,MAXLINE,fptr);
+    sscanf(line,"%d %lg %d %lg %lg",
+           &file->nrho,&file->drho,&file->nr,&file->dr,&file->cut);
+  }
+
+  MPI_Bcast(&file->nrho,1,MPI_INT,0,world);
+  MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&file->nr,1,MPI_INT,0,world);
+  MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world);
+
+  file->mass = new double[file->nelements];
+  memory->create(file->frho,file->nelements,file->nrho+1,"pair:frho");
+  memory->create(file->rhor,file->nelements,file->nr+1,"pair:rhor");
+  memory->create(file->z2r,file->nelements,file->nelements,file->nr+1,
+                 "pair:z2r");
+
+  int i,j,tmp;
+  for (i = 0; i < file->nelements; i++) {
+    if (me == 0) {
+      fgets(line,MAXLINE,fptr);
+      sscanf(line,"%d %lg",&tmp,&file->mass[i]);
+    }
+    MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world);
+
+    if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]);
+    MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world);
+    if (me == 0) grab(fptr,file->nr,&file->rhor[i][1]);
+    MPI_Bcast(&file->rhor[i][1],file->nr,MPI_DOUBLE,0,world);
+  }
+
+  for (i = 0; i < file->nelements; i++)
+    for (j = 0; j <= i; j++) {
+      if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]);
+      MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world);
+    }
+
+  // close the potential file
+
+  if (me == 0) fclose(fptr);
+}
+
+/* ----------------------------------------------------------------------
+   copy read-in setfl potential to standard array format
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairEAMAlloyKokkos<DeviceType>::file2array_alloy()
+{
+  int i,j,m,n;
+  int ntypes = atom->ntypes;
+
+  // set function params directly from setfl file
+
+  nrho = setfl->nrho;
+  nr = setfl->nr;
+  drho = setfl->drho;
+  dr = setfl->dr;
+  rhomax = (nrho-1) * drho;
+
+  // ------------------------------------------------------------------
+  // setup frho arrays
+  // ------------------------------------------------------------------
+
+  // allocate frho arrays
+  // nfrho = # of setfl elements + 1 for zero array
+
+  nfrho = setfl->nelements + 1;
+  memory->destroy(frho);
+  memory->create(frho,nfrho,nrho+1,"pair:frho");
+
+  // copy each element's frho to global frho
+
+  for (i = 0; i < setfl->nelements; i++)
+    for (m = 1; m <= nrho; m++) frho[i][m] = setfl->frho[i][m];
+
+  // add extra frho of zeroes for non-EAM types to point to (pair hybrid)
+  // this is necessary b/c fp is still computed for non-EAM atoms
+
+  for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0;
+
+  // type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to
+  // if atom type doesn't point to element (non-EAM atom in pair hybrid)
+  // then map it to last frho array of zeroes
+
+  for (i = 1; i <= ntypes; i++)
+    if (map[i] >= 0) type2frho[i] = map[i];
+    else type2frho[i] = nfrho-1;
+
+  // ------------------------------------------------------------------
+  // setup rhor arrays
+  // ------------------------------------------------------------------
+
+  // allocate rhor arrays
+  // nrhor = # of setfl elements
+
+  nrhor = setfl->nelements;
+  memory->destroy(rhor);
+  memory->create(rhor,nrhor,nr+1,"pair:rhor");
+
+  // copy each element's rhor to global rhor
+
+  for (i = 0; i < setfl->nelements; i++)
+    for (m = 1; m <= nr; m++) rhor[i][m] = setfl->rhor[i][m];
+
+  // type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to
+  // for setfl files, I,J mapping only depends on I
+  // OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used
+
+  for (i = 1; i <= ntypes; i++)
+    for (j = 1; j <= ntypes; j++)
+      type2rhor[i][j] = map[i];
+
+  // ------------------------------------------------------------------
+  // setup z2r arrays
+  // ------------------------------------------------------------------
+
+  // allocate z2r arrays
+  // nz2r = N*(N+1)/2 where N = # of setfl elements
+
+  nz2r = setfl->nelements * (setfl->nelements+1) / 2;
+  memory->destroy(z2r);
+  memory->create(z2r,nz2r,nr+1,"pair:z2r");
+
+  // copy each element pair z2r to global z2r, only for I >= J
+
+  n = 0;
+  for (i = 0; i < setfl->nelements; i++)
+    for (j = 0; j <= i; j++) {
+      for (m = 1; m <= nr; m++) z2r[n][m] = setfl->z2r[i][j][m];
+      n++;
+    }
+
+  // type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to
+  // set of z2r arrays only fill lower triangular Nelement matrix
+  // value = n = sum over rows of lower-triangular matrix until reach irow,icol
+  // swap indices when irow < icol to stay lower triangular
+  // if map = -1 (non-EAM atom in pair hybrid):
+  //   type2z2r is not used by non-opt
+  //   but set type2z2r to 0 since accessed by opt
+
+  int irow,icol;
+  for (i = 1; i <= ntypes; i++) {
+    for (j = 1; j <= ntypes; j++) {
+      irow = map[i];
+      icol = map[j];
+      if (irow == -1 || icol == -1) {
+        type2z2r[i][j] = 0;
+        continue;
+      }
+      if (irow < icol) {
+        irow = map[j];
+        icol = map[i];
+      }
+      n = 0;
+      for (m = 0; m < irow; m++) n += m + 1;
+      n += icol;
+      type2z2r[i][j] = n;
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template class PairEAMAlloyKokkos<LMPDeviceType>;
+#ifdef KOKKOS_HAVE_CUDA
+template class PairEAMAlloyKokkos<LMPHostType>;
+#endif
\ No newline at end of file
diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.h b/src/KOKKOS/pair_eam_alloy_kokkos.h
new file mode 100755
index 0000000000..2d48f0fde5
--- /dev/null
+++ b/src/KOKKOS/pair_eam_alloy_kokkos.h
@@ -0,0 +1,183 @@
+/* -*- c++ -*- ----------------------------------------------------------
+
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(eam/alloy/kk,PairEAMAlloyKokkos<LMPDeviceType>)
+PairStyle(eam/alloy/kk/device,PairEAMAlloyKokkos<LMPDeviceType>)
+PairStyle(eam/alloy/kk/host,PairEAMAlloyKokkos<LMPHostType>)
+
+#else
+
+#ifndef LMP_PAIR_EAM_ALLOY_KOKKOS_H
+#define LMP_PAIR_EAM_ALLOY_KOKKOS_H
+
+#include "stdio.h"
+#include "pair_kokkos.h"
+#include "pair_eam.h"
+#include "neigh_list_kokkos.h"
+
+namespace LAMMPS_NS {
+
+struct TagPairEAMAlloyPackForwardComm{};
+struct TagPairEAMAlloyUnpackForwardComm{};
+struct TagPairEAMAlloyInitialize{};
+
+template<int NEIGHFLAG, int NEWTON_PAIR>
+struct TagPairEAMAlloyKernelA{};
+
+template<int EFLAG>
+struct TagPairEAMAlloyKernelB{};
+
+template<int EFLAG>
+struct TagPairEAMAlloyKernelAB{};
+
+template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
+struct TagPairEAMAlloyKernelC{};
+
+// Cannot use virtual inheritance on the GPU
+
+template<class DeviceType>
+class PairEAMAlloyKokkos : public PairEAM {
+ public:
+  enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF};
+  enum {COUL_FLAG=0};
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typedef EV_FLOAT value_type;
+
+  PairEAMAlloyKokkos(class LAMMPS *);
+  virtual ~PairEAMAlloyKokkos();
+  virtual void compute(int, int);
+  void init_style();
+  void coeff(int, char **);
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairEAMAlloyPackForwardComm, const int&) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairEAMAlloyUnpackForwardComm, const int&) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairEAMAlloyInitialize, const int&) const;
+  
+  template<int NEIGHFLAG, int NEWTON_PAIR>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairEAMAlloyKernelA<NEIGHFLAG,NEWTON_PAIR>, const int&) const;
+
+  template<int EFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairEAMAlloyKernelB<EFLAG>, const int&, EV_FLOAT&) const;
+
+  template<int EFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairEAMAlloyKernelB<EFLAG>, const int&) const;
+
+  template<int EFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairEAMAlloyKernelAB<EFLAG>, const int&, EV_FLOAT&) const;
+
+  template<int EFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairEAMAlloyKernelAB<EFLAG>, const int&) const;
+
+  template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairEAMAlloyKernelC<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int&, EV_FLOAT&) const;
+
+  template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairEAMAlloyKernelC<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int&) const;
+
+  template<int NEIGHFLAG, int NEWTON_PAIR>
+  KOKKOS_INLINE_FUNCTION
+  void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
+      const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
+                  const F_FLOAT &dely, const F_FLOAT &delz) const;
+
+  virtual int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&, 
+                               int, int *);
+  virtual void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&);
+  virtual int pack_forward_comm(int, int *, double *, int, int *);
+  virtual void unpack_forward_comm(int, int, double *);
+  int pack_reverse_comm(int, int, double *);
+  void unpack_reverse_comm(int, int *, double *);
+
+ protected:
+  void cleanup_copy();
+
+  typename AT::t_x_array_randomread x;
+  typename AT::t_f_array f;
+  typename AT::t_int_1d_randomread type;
+  typename AT::t_tagint_1d tag;
+
+  DAT::tdual_efloat_1d k_eatom;
+  DAT::tdual_virial_array k_vatom;
+  DAT::t_efloat_1d d_eatom;
+  DAT::t_virial_array d_vatom;
+
+  DAT::tdual_ffloat_1d k_rho;
+  DAT::tdual_ffloat_1d k_fp;
+  DAT::t_ffloat_1d d_rho;
+  typename AT::t_ffloat_1d v_rho;
+  DAT::t_ffloat_1d d_fp;
+  HAT::t_ffloat_1d h_rho;
+  HAT::t_ffloat_1d h_fp;
+
+  DAT::t_int_1d_randomread d_type2frho;
+  DAT::t_int_2d_randomread d_type2rhor;
+  DAT::t_int_2d_randomread d_type2z2r;
+
+  typedef Kokkos::DualView<F_FLOAT**[7],Kokkos::LayoutRight,DeviceType> tdual_ffloat_2d_n7;
+  typedef typename tdual_ffloat_2d_n7::t_dev_const_randomread t_ffloat_2d_n7_randomread;
+  typedef typename tdual_ffloat_2d_n7::t_host t_host_ffloat_2d_n7;
+
+  t_ffloat_2d_n7_randomread d_frho_spline;
+  t_ffloat_2d_n7_randomread d_rhor_spline;
+  t_ffloat_2d_n7_randomread d_z2r_spline;
+
+  virtual void file2array();
+  void file2array_alloy();
+  void array2spline();
+  void interpolate(int, double, double *, t_host_ffloat_2d_n7, int);
+  void read_file(char *);
+
+  typename ArrayTypes<DeviceType>::t_neighbors_2d d_neighbors;
+  typename ArrayTypes<DeviceType>::t_int_1d_randomread d_ilist;
+  typename ArrayTypes<DeviceType>::t_int_1d_randomread d_numneigh;
+  //NeighListKokkos<DeviceType> k_list;
+
+  int iswap;
+  int first;
+  typename AT::t_int_2d d_sendlist;
+  typename AT::t_xfloat_1d_um v_buf;
+
+  int neighflag,newton_pair;
+  int nlocal,nall,eflag,vflag;
+
+  friend void pair_virial_fdotr_compute<PairEAMAlloyKokkos>(PairEAMAlloyKokkos*);
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: Cannot use chosen neighbor list style with pair eam/kk/alloy
+
+That style is not supported by Kokkos.
+
+*/
diff --git a/src/KOKKOS/pair_eam_fs_kokkos.cpp b/src/KOKKOS/pair_eam_fs_kokkos.cpp
new file mode 100755
index 0000000000..a95deb8542
--- /dev/null
+++ b/src/KOKKOS/pair_eam_fs_kokkos.cpp
@@ -0,0 +1,1186 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Stan Moore (SNL)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "kokkos.h"
+#include "pair_kokkos.h"
+#include "pair_eam_fs_kokkos.h"
+#include "atom_kokkos.h"
+#include "force.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "neigh_list_kokkos.h"
+#include "neigh_request.h"
+#include "memory.h"
+#include "error.h"
+#include "atom_masks.h"
+
+using namespace LAMMPS_NS;
+
+#define MAXLINE 1024
+
+// Cannot use virtual inheritance on the GPU, so must duplicate code
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairEAMFSKokkos<DeviceType>::PairEAMFSKokkos(LAMMPS *lmp) : PairEAM(lmp)
+{
+  one_coeff = 1;
+  manybody_flag = 1;
+  respa_enable = 0;
+
+  atomKK = (AtomKokkos *) atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK;
+  datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairEAMFSKokkos<DeviceType>::~PairEAMFSKokkos()
+{
+  if (!copymode) {
+    memory->destroy_kokkos(k_eatom,eatom);
+    memory->destroy_kokkos(k_vatom,vatom);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
+{
+  eflag = eflag_in;
+  vflag = vflag_in;
+
+  if (neighflag == FULL) no_virial_fdotr_compute = 1;
+
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = vflag_fdotr = 0;
+
+  // reallocate per-atom arrays if necessary
+
+  if (eflag_atom) {
+    memory->destroy_kokkos(k_eatom,eatom);
+    memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
+    d_eatom = k_eatom.d_view;
+  }
+  if (vflag_atom) {
+    memory->destroy_kokkos(k_vatom,vatom);
+    memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom");
+    d_vatom = k_vatom.d_view;
+  }
+
+  atomKK->sync(execution_space,datamask_read);
+  if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
+  else atomKK->modified(execution_space,F_MASK);
+
+  // grow energy and fp arrays if necessary
+  // need to be atom->nmax in length
+
+  if (atom->nmax > nmax) {
+    nmax = atom->nmax;
+    k_rho = DAT::tdual_ffloat_1d("pair:rho",nmax);
+    k_fp = DAT::tdual_ffloat_1d("pair:fp",nmax);
+    d_rho = k_rho.d_view;
+    d_fp = k_fp.d_view;
+    h_rho = k_rho.h_view;
+    h_fp = k_fp.h_view;
+  }
+
+  x = atomKK->k_x.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  v_rho = k_rho.view<DeviceType>();
+  type = atomKK->k_type.view<DeviceType>();
+  tag = atomKK->k_tag.view<DeviceType>();
+  nlocal = atom->nlocal;
+  nall = atom->nlocal + atom->nghost;
+  newton_pair = force->newton_pair;
+
+  NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
+  d_numneigh = k_list->d_numneigh;
+  d_neighbors = k_list->d_neighbors;
+  d_ilist = k_list->d_ilist;
+  int inum = list->inum;
+
+  // Call cleanup_copy which sets allocations NULL which are destructed by the PairStyle
+
+  k_list->clean_copy();
+  copymode = 1;
+
+  // zero out density
+
+  if (newton_pair)
+    Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSInitialize>(0,nall),*this);
+  else
+    Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSInitialize>(0,nlocal),*this);
+  DeviceType::fence();
+
+  // loop over neighbors of my atoms
+
+  EV_FLOAT ev;
+
+  // compute kernel A
+
+  if (neighflag == HALF || neighflag == HALFTHREAD) {
+
+    if (neighflag == HALF) {
+      if (newton_pair) {
+        Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelA<HALF,1> >(0,inum),*this);
+      } else {
+        Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelA<HALF,0> >(0,inum),*this);
+      }
+    } else if (neighflag == HALFTHREAD) {
+      if (newton_pair) {
+        Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelA<HALFTHREAD,1> >(0,inum),*this);
+      } else {
+        Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelA<HALFTHREAD,0> >(0,inum),*this);
+      }
+    }
+    DeviceType::fence();
+
+    // communicate and sum densities (on the host)
+
+    if (newton_pair) {
+      k_rho.template modify<DeviceType>();
+      k_rho.template sync<LMPHostType>();
+      comm->reverse_comm_pair(this);
+      k_rho.template modify<LMPHostType>();
+      k_rho.template sync<DeviceType>();
+    }
+
+    // compute kernel B
+
+    if (eflag)
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelB<1> >(0,inum),*this,ev);
+    else
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelB<0> >(0,inum),*this);
+    DeviceType::fence();
+
+  } else if (neighflag == FULL) {
+
+    // compute kernel AB
+
+    if (eflag)
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelAB<1> >(0,inum),*this,ev);
+    else
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelAB<0> >(0,inum),*this);
+    DeviceType::fence();
+  }
+
+  if (eflag) {
+    eng_vdwl += ev.evdwl;
+    ev.evdwl = 0.0;
+  }
+
+  // communicate derivative of embedding function (on the device)
+
+  comm->forward_comm_pair(this);
+
+  // compute kernel C
+
+  if (evflag) {
+    if (neighflag == HALF) {
+      if (newton_pair) {
+        Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelC<HALF,1,1> >(0,inum),*this,ev);
+      } else {
+        Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelC<HALF,0,1> >(0,inum),*this,ev);
+      }
+    } else if (neighflag == HALFTHREAD) {
+      if (newton_pair) {
+        Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelC<HALFTHREAD,1,1> >(0,inum),*this,ev);
+      } else {
+        Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelC<HALFTHREAD,0,1> >(0,inum),*this,ev);
+      }
+    } else if (neighflag == FULL) {
+      if (newton_pair) {
+        Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelC<FULL,1,1> >(0,inum),*this,ev);
+      } else {
+        Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelC<FULL,0,1> >(0,inum),*this,ev);
+      }
+    }
+  } else {
+    if (neighflag == HALF) {
+      if (newton_pair) {
+        Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelC<HALF,1,0> >(0,inum),*this);
+      } else {
+        Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelC<HALF,0,0> >(0,inum),*this);
+      }
+    } else if (neighflag == HALFTHREAD) {
+      if (newton_pair) {
+        Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelC<HALFTHREAD,1,0> >(0,inum),*this);
+      } else {
+        Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelC<HALFTHREAD,0,0> >(0,inum),*this);
+      }
+    } else if (neighflag == FULL) {
+      if (newton_pair) {
+        Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelC<FULL,1,0> >(0,inum),*this);
+      } else {
+        Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelC<FULL,0,0> >(0,inum),*this);
+      }
+    }
+  }
+  DeviceType::fence();
+
+  if (eflag_global) eng_vdwl += ev.evdwl;
+  if (vflag_global) {
+    virial[0] += ev.v[0];
+    virial[1] += ev.v[1];
+    virial[2] += ev.v[2];
+    virial[3] += ev.v[3];
+    virial[4] += ev.v[4];
+    virial[5] += ev.v[5];
+  }
+
+  if (vflag_fdotr) pair_virial_fdotr_compute(this);
+
+  if (eflag_atom) {
+    k_eatom.template modify<DeviceType>();
+    k_eatom.template sync<LMPHostType>();
+  }
+
+  if (vflag_atom) {
+    k_vatom.template modify<DeviceType>();
+    k_vatom.template sync<LMPHostType>();
+  }
+
+  copymode = 0;
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairEAMFSKokkos<DeviceType>::init_style()
+{
+  // convert read-in file(s) to arrays and spline them
+
+  PairEAM::init_style();
+
+  // irequest = neigh request made by parent class
+
+  neighflag = lmp->kokkos->neighflag;
+  int irequest = neighbor->nrequest - 1;
+
+  neighbor->requests[irequest]->
+    kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
+    !Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
+  neighbor->requests[irequest]->
+    kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
+
+  if (neighflag == FULL) {
+    neighbor->requests[irequest]->full = 1;
+    neighbor->requests[irequest]->half = 0;
+    neighbor->requests[irequest]->full_cluster = 0;
+  } else if (neighflag == HALF || neighflag == HALFTHREAD) {
+    neighbor->requests[irequest]->full = 0;
+    neighbor->requests[irequest]->half = 1;
+    neighbor->requests[irequest]->full_cluster = 0;
+  } else {
+    error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk/fs");
+  }
+
+}
+
+template<class DeviceType>
+void PairEAMFSKokkos<DeviceType>::file2array()
+{
+  file2array_fs();
+
+  int i,j;
+  int n = atom->ntypes;
+
+  DAT::tdual_int_1d k_type2frho = DAT::tdual_int_1d("pair:type2frho",n+1);
+  DAT::tdual_int_2d k_type2rhor = DAT::tdual_int_2d("pair:type2rhor",n+1,n+1);
+  DAT::tdual_int_2d k_type2z2r = DAT::tdual_int_2d("pair:type2z2r",n+1,n+1);
+
+  HAT::t_int_1d h_type2frho =  k_type2frho.h_view;
+  HAT::t_int_2d h_type2rhor = k_type2rhor.h_view;
+  HAT::t_int_2d h_type2z2r = k_type2z2r.h_view;
+
+  for (i = 1; i <= n; i++) {
+    h_type2frho[i] = type2frho[i];
+    for (j = 1; j <= n; j++) {
+      h_type2rhor(i,j) = type2rhor[i][j];
+      h_type2z2r(i,j) = type2z2r[i][j];
+    }
+  }
+  k_type2frho.template modify<LMPHostType>();
+  k_type2frho.template sync<DeviceType>();
+  k_type2rhor.template modify<LMPHostType>();
+  k_type2rhor.template sync<DeviceType>();
+  k_type2z2r.template modify<LMPHostType>();
+  k_type2z2r.template sync<DeviceType>();
+
+  d_type2frho = k_type2frho.d_view;
+  d_type2rhor = k_type2rhor.d_view;
+  d_type2z2r = k_type2z2r.d_view;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairEAMFSKokkos<DeviceType>::array2spline()
+{
+  rdr = 1.0/dr;
+  rdrho = 1.0/drho;
+
+  tdual_ffloat_2d_n7 k_frho_spline = tdual_ffloat_2d_n7("pair:frho",nfrho,nrho+1);
+  tdual_ffloat_2d_n7 k_rhor_spline = tdual_ffloat_2d_n7("pair:rhor",nrhor,nr+1);
+  tdual_ffloat_2d_n7 k_z2r_spline = tdual_ffloat_2d_n7("pair:z2r",nz2r,nr+1);
+
+  t_host_ffloat_2d_n7 h_frho_spline = k_frho_spline.h_view;
+  t_host_ffloat_2d_n7 h_rhor_spline = k_rhor_spline.h_view;
+  t_host_ffloat_2d_n7 h_z2r_spline = k_z2r_spline.h_view;
+
+  for (int i = 0; i < nfrho; i++)
+    interpolate(nrho,drho,frho[i],h_frho_spline,i);
+  k_frho_spline.template modify<LMPHostType>();
+  k_frho_spline.template sync<DeviceType>();
+
+  for (int i = 0; i < nrhor; i++)
+    interpolate(nr,dr,rhor[i],h_rhor_spline,i);
+  k_rhor_spline.template modify<LMPHostType>();
+  k_rhor_spline.template sync<DeviceType>();
+
+  for (int i = 0; i < nz2r; i++)
+    interpolate(nr,dr,z2r[i],h_z2r_spline,i);
+  k_z2r_spline.template modify<LMPHostType>();
+  k_z2r_spline.template sync<DeviceType>();
+
+  d_frho_spline = k_frho_spline.d_view;
+  d_rhor_spline = k_rhor_spline.d_view;
+  d_z2r_spline = k_z2r_spline.d_view;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairEAMFSKokkos<DeviceType>::interpolate(int n, double delta, double *f, t_host_ffloat_2d_n7 h_spline, int i)
+{
+  for (int m = 1; m <= n; m++) h_spline(i,m,6) = f[m];
+
+  h_spline(i,1,5) = h_spline(i,2,6) - h_spline(i,1,6);
+  h_spline(i,2,5) = 0.5 * (h_spline(i,3,6)-h_spline(i,1,6));
+  h_spline(i,n-1,5) = 0.5 * (h_spline(i,n,6)-h_spline(i,n-2,6));
+  h_spline(i,n,5) = h_spline(i,n,6) - h_spline(i,n-1,6);
+
+  for (int m = 3; m <= n-2; m++)
+    h_spline(i,m,5) = ((h_spline(i,m-2,6)-h_spline(i,m+2,6)) +
+                    8.0*(h_spline(i,m+1,6)-h_spline(i,m-1,6))) / 12.0;
+
+  for (int m = 1; m <= n-1; m++) {
+    h_spline(i,m,4) = 3.0*(h_spline(i,m+1,6)-h_spline(i,m,6)) -
+      2.0*h_spline(i,m,5) - h_spline(i,m+1,5);
+    h_spline(i,m,3) = h_spline(i,m,5) + h_spline(i,m+1,5) -
+      2.0*(h_spline(i,m+1,6)-h_spline(i,m,6));
+  }
+
+  h_spline(i,n,4) = 0.0;
+  h_spline(i,n,3) = 0.0;
+
+  for (int m = 1; m <= n; m++) {
+    h_spline(i,m,2) = h_spline(i,m,5)/delta;
+    h_spline(i,m,1) = 2.0*h_spline(i,m,4)/delta;
+    h_spline(i,m,0) = 3.0*h_spline(i,m,3)/delta;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+int PairEAMFSKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist, int iswap_in, DAT::tdual_xfloat_1d &buf,
+                               int pbc_flag, int *pbc)
+{
+  d_sendlist = k_sendlist.view<DeviceType>();
+  iswap = iswap_in;
+  v_buf = buf.view<DeviceType>();
+  Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMFSPackForwardComm>(0,n),*this);
+  DeviceType::fence();
+  return n;
+}
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void PairEAMFSKokkos<DeviceType>::operator()(TagPairEAMFSPackForwardComm, const int &i) const {
+  int j = d_sendlist(iswap, i);
+  v_buf[i] = d_fp[j];
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairEAMFSKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first_in, DAT::tdual_xfloat_1d &buf)
+{
+  first = first_in;
+  v_buf = buf.view<DeviceType>();
+  Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMFSUnpackForwardComm>(0,n),*this);
+  DeviceType::fence();
+}
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void PairEAMFSKokkos<DeviceType>::operator()(TagPairEAMFSUnpackForwardComm, const int &i) const {
+  d_fp[i + first] = v_buf[i];
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+int PairEAMFSKokkos<DeviceType>::pack_forward_comm(int n, int *list, double *buf,
+                               int pbc_flag, int *pbc)
+{
+  int i,j;
+
+  for (i = 0; i < n; i++) {
+    j = list[i];
+    buf[i] = h_fp[j];
+  }
+  return n;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairEAMFSKokkos<DeviceType>::unpack_forward_comm(int n, int first, double *buf)
+{
+  for (int i = 0; i < n; i++) {
+    h_fp[i + first] = buf[i];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+int PairEAMFSKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) buf[m++] = h_rho[i];
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairEAMFSKokkos<DeviceType>::unpack_reverse_comm(int n, int *list, double *buf)
+{
+  int i,j,m;
+
+  m = 0;
+  for (i = 0; i < n; i++) {
+    j = list[i];
+    h_rho[j] += buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void PairEAMFSKokkos<DeviceType>::operator()(TagPairEAMFSInitialize, const int &i) const {
+  d_rho[i] = 0.0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+////Specialisation for Neighborlist types Half, HalfThread, Full
+template<class DeviceType>
+template<int NEIGHFLAG, int NEWTON_PAIR>
+KOKKOS_INLINE_FUNCTION
+void PairEAMFSKokkos<DeviceType>::operator()(TagPairEAMFSKernelA<NEIGHFLAG,NEWTON_PAIR>, const int &ii) const {
+
+  // rho = density at each atom
+  // loop over neighbors of my atoms
+
+  // The rho array is atomic for Half/Thread neighbor style
+  Kokkos::View<F_FLOAT*, typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > rho = v_rho;
+
+  const int i = d_ilist[ii];
+  const X_FLOAT xtmp = x(i,0);
+  const X_FLOAT ytmp = x(i,1);
+  const X_FLOAT ztmp = x(i,2);
+  const int itype = type(i);
+
+  //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
+  const int jnum = d_numneigh[i];
+
+  F_FLOAT rhotmp = 0.0;
+
+  for (int jj = 0; jj < jnum; jj++) {
+    //int j = d_neighbors_i[jj];
+    int j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    const X_FLOAT delx = xtmp - x(j,0);
+    const X_FLOAT dely = ytmp - x(j,1);
+    const X_FLOAT delz = ztmp - x(j,2);
+    const int jtype = type(j);
+    const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+
+    if (rsq < cutforcesq) {
+      F_FLOAT p = sqrt(rsq)*rdr + 1.0;
+      int m = static_cast<int> (p);
+      m = MIN(m,nr-1);
+      p -= m;
+      p = MIN(p,1.0);
+      const int d_type2rhor_ji = d_type2rhor(jtype,itype);
+      rhotmp += ((d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p +
+                  d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6);
+      if (NEWTON_PAIR || j < nlocal) {
+        const int d_type2rhor_ij = d_type2rhor(itype,jtype);
+        rho[j] += ((d_rhor_spline(d_type2rhor_ij,m,3)*p + d_rhor_spline(d_type2rhor_ij,m,4))*p +
+                    d_rhor_spline(d_type2rhor_ij,m,5))*p + d_rhor_spline(d_type2rhor_ij,m,6);
+      }
+    }
+
+  }
+  rho[i] += rhotmp;
+}
+
+/* ---------------------------------------------------------------------- */
+
+////Specialisation for Neighborlist types Half, HalfThread, Full
+template<class DeviceType>
+template<int EFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairEAMFSKokkos<DeviceType>::operator()(TagPairEAMFSKernelB<EFLAG>, const int &ii, EV_FLOAT& ev) const {
+
+  // fp = derivative of embedding energy at each atom
+  // phi = embedding energy at each atom
+  // if rho > rhomax (e.g. due to close approach of two atoms),
+  //   will exceed table, so add linear term to conserve energy
+
+  const int i = d_ilist[ii];
+  const int itype = type(i);
+
+  F_FLOAT p = d_rho[i]*rdrho + 1.0;
+  int m = static_cast<int> (p);
+  m = MAX(1,MIN(m,nrho-1));
+  p -= m;
+  p = MIN(p,1.0);
+  const int d_type2frho_i = d_type2frho[itype];
+  d_fp[i] = (d_frho_spline(d_type2frho_i,m,0)*p + d_frho_spline(d_type2frho_i,m,1))*p + d_frho_spline(d_type2frho_i,m,2);
+  if (EFLAG) {
+    F_FLOAT phi = ((d_frho_spline(d_type2frho_i,m,3)*p + d_frho_spline(d_type2frho_i,m,4))*p +
+                    d_frho_spline(d_type2frho_i,m,5))*p + d_frho_spline(d_type2frho_i,m,6);
+    if (d_rho[i] > rhomax) phi += d_fp[i] * (d_rho[i]-rhomax);
+    if (eflag_global) ev.evdwl += phi;
+    if (eflag_atom) d_eatom[i] += phi;
+  }
+
+}
+
+template<class DeviceType>
+template<int EFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairEAMFSKokkos<DeviceType>::operator()(TagPairEAMFSKernelB<EFLAG>, const int &ii) const {
+  EV_FLOAT ev;
+  this->template operator()<EFLAG>(TagPairEAMFSKernelB<EFLAG>(), ii, ev);
+}
+
+/* ---------------------------------------------------------------------- */
+
+////Specialisation for Neighborlist types Half, HalfThread, Full
+template<class DeviceType>
+template<int EFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairEAMFSKokkos<DeviceType>::operator()(TagPairEAMFSKernelAB<EFLAG>, const int &ii, EV_FLOAT& ev) const {
+
+  // rho = density at each atom
+  // loop over neighbors of my atoms
+
+  const int i = d_ilist[ii];
+  const X_FLOAT xtmp = x(i,0);
+  const X_FLOAT ytmp = x(i,1);
+  const X_FLOAT ztmp = x(i,2);
+  const int itype = type(i);
+
+  //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
+  const int jnum = d_numneigh[i];
+
+  F_FLOAT rhotmp = 0.0;
+
+  for (int jj = 0; jj < jnum; jj++) {
+    //int j = d_neighbors_i[jj];
+    int j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    const X_FLOAT delx = xtmp - x(j,0);
+    const X_FLOAT dely = ytmp - x(j,1);
+    const X_FLOAT delz = ztmp - x(j,2);
+    const int jtype = type(j);
+    const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+
+    if (rsq < cutforcesq) {
+      F_FLOAT p = sqrt(rsq)*rdr + 1.0;
+      int m = static_cast<int> (p);
+      m = MIN(m,nr-1);
+      p -= m;
+      p = MIN(p,1.0);
+      const int d_type2rhor_ji = d_type2rhor(jtype,itype);
+      rhotmp += ((d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p +
+                  d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6);
+    }
+
+  }
+  d_rho[i] += rhotmp;
+
+  // fp = derivative of embedding energy at each atom
+  // phi = embedding energy at each atom
+  // if rho > rhomax (e.g. due to close approach of two atoms),
+  //   will exceed table, so add linear term to conserve energy
+
+  F_FLOAT p = d_rho[i]*rdrho + 1.0;
+  int m = static_cast<int> (p);
+  m = MAX(1,MIN(m,nrho-1));
+  p -= m;
+  p = MIN(p,1.0);
+  const int d_type2frho_i = d_type2frho[itype];
+  d_fp[i] = (d_frho_spline(d_type2frho_i,m,0)*p + d_frho_spline(d_type2frho_i,m,1))*p + d_frho_spline(d_type2frho_i,m,2);
+  if (EFLAG) {
+    F_FLOAT phi = ((d_frho_spline(d_type2frho_i,m,3)*p + d_frho_spline(d_type2frho_i,m,4))*p +
+                    d_frho_spline(d_type2frho_i,m,5))*p + d_frho_spline(d_type2frho_i,m,6);
+    if (d_rho[i] > rhomax) phi += d_fp[i] * (d_rho[i]-rhomax);
+    if (eflag_global) ev.evdwl += phi;
+    if (eflag_atom) d_eatom[i] += phi;
+  }
+
+}
+
+template<class DeviceType>
+template<int EFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairEAMFSKokkos<DeviceType>::operator()(TagPairEAMFSKernelAB<EFLAG>, const int &ii) const {
+  EV_FLOAT ev;
+  this->template operator()<EFLAG>(TagPairEAMFSKernelAB<EFLAG>(), ii, ev);
+}
+
+/* ---------------------------------------------------------------------- */
+
+////Specialisation for Neighborlist types Half, HalfThread, Full
+template<class DeviceType>
+template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairEAMFSKokkos<DeviceType>::operator()(TagPairEAMFSKernelC<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
+
+  // The f array is atomic for Half/Thread neighbor style
+  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
+
+  const int i = d_ilist[ii];
+  const X_FLOAT xtmp = x(i,0);
+  const X_FLOAT ytmp = x(i,1);
+  const X_FLOAT ztmp = x(i,2);
+  const int itype = type(i);
+
+  //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
+  const int jnum = d_numneigh[i];
+
+  F_FLOAT fxtmp = 0.0;
+  F_FLOAT fytmp = 0.0;
+  F_FLOAT fztmp = 0.0;
+
+  for (int jj = 0; jj < jnum; jj++) {
+    //int j = d_neighbors_i[jj];
+    int j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    const X_FLOAT delx = xtmp - x(j,0);
+    const X_FLOAT dely = ytmp - x(j,1);
+    const X_FLOAT delz = ztmp - x(j,2);
+    const int jtype = type(j);
+    const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+
+    if(rsq < cutforcesq) {
+      const F_FLOAT r = sqrt(rsq);
+      F_FLOAT p = r*rdr + 1.0;
+      int m = static_cast<int> (p);
+      m = MIN(m,nr-1);
+      p -= m;
+      p = MIN(p,1.0);
+
+      // rhoip = derivative of (density at atom j due to atom i)
+      // rhojp = derivative of (density at atom i due to atom j)
+      // phi = pair potential energy
+      // phip = phi'
+      // z2 = phi * r
+      // z2p = (phi * r)' = (phi' r) + phi
+      // psip needs both fp[i] and fp[j] terms since r_ij appears in two
+      //   terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji)
+      //   hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip
+
+      const int d_type2rhor_ij = d_type2rhor(itype,jtype);
+      const F_FLOAT rhoip = (d_rhor_spline(d_type2rhor_ij,m,0)*p + d_rhor_spline(d_type2rhor_ij,m,1))*p +
+                             d_rhor_spline(d_type2rhor_ij,m,2);
+      const int d_type2rhor_ji = d_type2rhor(jtype,itype);
+      const F_FLOAT rhojp = (d_rhor_spline(d_type2rhor_ji,m,0)*p + d_rhor_spline(d_type2rhor_ji,m,1))*p +
+                             d_rhor_spline(d_type2rhor_ji,m,2);
+      const int d_type2z2r_ij = d_type2z2r(itype,jtype);
+      const F_FLOAT z2p = (d_z2r_spline(d_type2z2r_ij,m,0)*p + d_z2r_spline(d_type2z2r_ij,m,1))*p +
+                           d_z2r_spline(d_type2z2r_ij,m,2);
+      const F_FLOAT z2 = ((d_z2r_spline(d_type2z2r_ij,m,3)*p + d_z2r_spline(d_type2z2r_ij,m,4))*p +
+                           d_z2r_spline(d_type2z2r_ij,m,5))*p + d_z2r_spline(d_type2z2r_ij,m,6);
+
+      const F_FLOAT recip = 1.0/r;
+      const F_FLOAT phi = z2*recip;
+      const F_FLOAT phip = z2p*recip - phi*recip;
+      const F_FLOAT psip = d_fp[i]*rhojp + d_fp[j]*rhoip + phip;
+      const F_FLOAT fpair = -psip*recip;
+
+      fxtmp += delx*fpair;
+      fytmp += dely*fpair;
+      fztmp += delz*fpair;
+
+      if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) {
+        a_f(j,0) -= delx*fpair;
+        a_f(j,1) -= dely*fpair;
+        a_f(j,2) -= delz*fpair;
+      }
+
+      if (EVFLAG) {
+        if (eflag) {
+          ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<nlocal)))?1.0:0.5)*phi;
+        }
+
+        if (vflag_either || eflag_atom) this->template ev_tally<NEIGHFLAG,NEWTON_PAIR>(ev,i,j,phi,fpair,delx,dely,delz);
+      }
+
+    }
+  }
+
+  a_f(i,0) += fxtmp;
+  a_f(i,1) += fytmp;
+  a_f(i,2) += fztmp;
+}
+
+template<class DeviceType>
+template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairEAMFSKokkos<DeviceType>::operator()(TagPairEAMFSKernelC<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int &ii) const {
+  EV_FLOAT ev;
+  this->template operator()<NEIGHFLAG,NEWTON_PAIR,EVFLAG>(TagPairEAMFSKernelC<NEIGHFLAG,NEWTON_PAIR,EVFLAG>(), ii, ev);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<int NEIGHFLAG, int NEWTON_PAIR>
+KOKKOS_INLINE_FUNCTION
+void PairEAMFSKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &j,
+      const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
+                const F_FLOAT &dely, const F_FLOAT &delz) const
+{
+  const int EFLAG = eflag;
+  const int VFLAG = vflag_either;
+
+  // The eatom and vatom arrays are atomic for Half/Thread neighbor style
+  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
+  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
+
+  if (EFLAG) {
+    if (eflag_atom) {
+      const E_FLOAT epairhalf = 0.5 * epair;
+      if (NEIGHFLAG!=FULL) {
+        if (NEWTON_PAIR || i < nlocal) v_eatom[i] += epairhalf;
+        if (NEWTON_PAIR || j < nlocal) v_eatom[j] += epairhalf;
+      } else {
+        v_eatom[i] += epairhalf;
+      }
+    }
+  }
+
+  if (VFLAG) {
+    const E_FLOAT v0 = delx*delx*fpair;
+    const E_FLOAT v1 = dely*dely*fpair;
+    const E_FLOAT v2 = delz*delz*fpair;
+    const E_FLOAT v3 = delx*dely*fpair;
+    const E_FLOAT v4 = delx*delz*fpair;
+    const E_FLOAT v5 = dely*delz*fpair;
+
+    if (vflag_global) {
+      if (NEIGHFLAG!=FULL) {
+        if (NEWTON_PAIR || i < nlocal) {
+          ev.v[0] += 0.5*v0;
+          ev.v[1] += 0.5*v1;
+          ev.v[2] += 0.5*v2;
+          ev.v[3] += 0.5*v3;
+          ev.v[4] += 0.5*v4;
+          ev.v[5] += 0.5*v5;
+        }
+        if (NEWTON_PAIR || j < nlocal) {
+        ev.v[0] += 0.5*v0;
+        ev.v[1] += 0.5*v1;
+        ev.v[2] += 0.5*v2;
+        ev.v[3] += 0.5*v3;
+        ev.v[4] += 0.5*v4;
+        ev.v[5] += 0.5*v5;
+        }
+      } else {
+        ev.v[0] += 0.5*v0;
+        ev.v[1] += 0.5*v1;
+        ev.v[2] += 0.5*v2;
+        ev.v[3] += 0.5*v3;
+        ev.v[4] += 0.5*v4;
+        ev.v[5] += 0.5*v5;
+      }
+    }
+
+    if (vflag_atom) {
+      if (NEIGHFLAG!=FULL) {
+        if (NEWTON_PAIR || i < nlocal) {
+          v_vatom(i,0) += 0.5*v0;
+          v_vatom(i,1) += 0.5*v1;
+          v_vatom(i,2) += 0.5*v2;
+          v_vatom(i,3) += 0.5*v3;
+          v_vatom(i,4) += 0.5*v4;
+          v_vatom(i,5) += 0.5*v5;
+        }
+        if (NEWTON_PAIR || j < nlocal) {
+        v_vatom(j,0) += 0.5*v0;
+        v_vatom(j,1) += 0.5*v1;
+        v_vatom(j,2) += 0.5*v2;
+        v_vatom(j,3) += 0.5*v3;
+        v_vatom(j,4) += 0.5*v4;
+        v_vatom(j,5) += 0.5*v5;
+        }
+      } else {
+        v_vatom(i,0) += 0.5*v0;
+        v_vatom(i,1) += 0.5*v1;
+        v_vatom(i,2) += 0.5*v2;
+        v_vatom(i,3) += 0.5*v3;
+        v_vatom(i,4) += 0.5*v4;
+        v_vatom(i,5) += 0.5*v5;
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+// Duplicate PairEAMFS functions
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more type pairs
+   read EAM Finnis-Sinclair file
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairEAMFSKokkos<DeviceType>::coeff(int narg, char **arg)
+{
+  int i,j;
+
+  if (!allocated) allocate();
+
+  if (narg != 3 + atom->ntypes)
+    error->all(FLERR,"Incorrect args for pair coefficients");
+
+  // insure I,J args are * *
+
+  if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
+    error->all(FLERR,"Incorrect args for pair coefficients");
+
+  // read EAM Finnis-Sinclair file
+
+  if (fs) {
+    for (i = 0; i < fs->nelements; i++) delete [] fs->elements[i];
+    delete [] fs->elements;
+    delete [] fs->mass;
+    memory->destroy(fs->frho);
+    memory->destroy(fs->rhor);
+    memory->destroy(fs->z2r);
+    delete fs;
+  }
+  fs = new Fs();
+  read_file(arg[2]);
+
+  // read args that map atom types to elements in potential file
+  // map[i] = which element the Ith atom type is, -1 if NULL
+
+  for (i = 3; i < narg; i++) {
+    if (strcmp(arg[i],"NULL") == 0) {
+      map[i-2] = -1;
+      continue;
+    }
+    for (j = 0; j < fs->nelements; j++)
+      if (strcmp(arg[i],fs->elements[j]) == 0) break;
+    if (j < fs->nelements) map[i-2] = j;
+    else error->all(FLERR,"No matching element in EAM potential file");
+  }
+
+  // clear setflag since coeff() called once with I,J = * *
+
+  int n = atom->ntypes;
+  for (i = 1; i <= n; i++)
+    for (j = i; j <= n; j++)
+      setflag[i][j] = 0;
+
+  // set setflag i,j for type pairs where both are mapped to elements
+  // set mass of atom type if i = j
+
+  int count = 0;
+  for (i = 1; i <= n; i++) {
+    for (j = i; j <= n; j++) {
+      if (map[i] >= 0 && map[j] >= 0) {
+        setflag[i][j] = 1;
+        if (i == j) atom->set_mass(i,fs->mass[map[i]]);
+        count++;
+      }
+    }
+  }
+
+  if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
+}
+
+/* ----------------------------------------------------------------------
+   read a multi-element DYNAMO setfl file
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairEAMFSKokkos<DeviceType>::read_file(char *filename)
+{
+  Fs *file = fs;
+
+  // open potential file
+
+  int me = comm->me;
+  FILE *fptr;
+  char line[MAXLINE];
+
+  if (me == 0) {
+    fptr = force->open_potential(filename);
+    if (fptr == NULL) {
+      char str[128];
+      sprintf(str,"Cannot open EAM potential file %s",filename);
+      error->one(FLERR,str);
+    }
+  }
+
+  // read and broadcast header
+  // extract element names from nelements line
+
+  int n;
+  if (me == 0) {
+    fgets(line,MAXLINE,fptr);
+    fgets(line,MAXLINE,fptr);
+    fgets(line,MAXLINE,fptr);
+    fgets(line,MAXLINE,fptr);
+    n = strlen(line) + 1;
+  }
+  MPI_Bcast(&n,1,MPI_INT,0,world);
+  MPI_Bcast(line,n,MPI_CHAR,0,world);
+
+  sscanf(line,"%d",&file->nelements);
+  int nwords = atom->count_words(line);
+  if (nwords != file->nelements + 1)
+    error->all(FLERR,"Incorrect element names in EAM potential file");
+
+  char **words = new char*[file->nelements+1];
+  nwords = 0;
+  strtok(line," \t\n\r\f");
+  while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue;
+
+  file->elements = new char*[file->nelements];
+  for (int i = 0; i < file->nelements; i++) {
+    n = strlen(words[i]) + 1;
+    file->elements[i] = new char[n];
+    strcpy(file->elements[i],words[i]);
+  }
+  delete [] words;
+
+  if (me == 0) {
+    fgets(line,MAXLINE,fptr);
+    sscanf(line,"%d %lg %d %lg %lg",
+           &file->nrho,&file->drho,&file->nr,&file->dr,&file->cut);
+  }
+
+  MPI_Bcast(&file->nrho,1,MPI_INT,0,world);
+  MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&file->nr,1,MPI_INT,0,world);
+  MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world);
+
+  file->mass = new double[file->nelements];
+  memory->create(file->frho,file->nelements,file->nrho+1,
+                                              "pair:frho");
+  memory->create(file->rhor,file->nelements,file->nelements,
+                 file->nr+1,"pair:rhor");
+  memory->create(file->z2r,file->nelements,file->nelements,
+                 file->nr+1,"pair:z2r");
+
+  int i,j,tmp;
+  for (i = 0; i < file->nelements; i++) {
+    if (me == 0) {
+      fgets(line,MAXLINE,fptr);
+      sscanf(line,"%d %lg",&tmp,&file->mass[i]);
+    }
+    MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world);
+
+    if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]);
+    MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world);
+
+    for (j = 0; j < file->nelements; j++) {
+      if (me == 0) grab(fptr,file->nr,&file->rhor[i][j][1]);
+      MPI_Bcast(&file->rhor[i][j][1],file->nr,MPI_DOUBLE,0,world);
+    }
+  }
+
+  for (i = 0; i < file->nelements; i++)
+    for (j = 0; j <= i; j++) {
+      if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]);
+      MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world);
+    }
+
+  // close the potential file
+
+  if (me == 0) fclose(fptr);
+}
+
+/* ----------------------------------------------------------------------
+   copy read-in setfl potential to standard array format
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairEAMFSKokkos<DeviceType>::file2array_fs()
+{
+  int i,j,m,n;
+  int ntypes = atom->ntypes;
+
+  // set function params directly from fs file
+
+  nrho = fs->nrho;
+  nr = fs->nr;
+  drho = fs->drho;
+  dr = fs->dr;
+  rhomax = (nrho-1) * drho;
+
+  // ------------------------------------------------------------------
+  // setup frho arrays
+  // ------------------------------------------------------------------
+
+  // allocate frho arrays
+  // nfrho = # of fs elements + 1 for zero array
+
+  nfrho = fs->nelements + 1;
+  memory->destroy(frho);
+  memory->create(frho,nfrho,nrho+1,"pair:frho");
+
+  // copy each element's frho to global frho
+
+  for (i = 0; i < fs->nelements; i++)
+    for (m = 1; m <= nrho; m++) frho[i][m] = fs->frho[i][m];
+
+  // add extra frho of zeroes for non-EAM types to point to (pair hybrid)
+  // this is necessary b/c fp is still computed for non-EAM atoms
+
+  for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0;
+
+  // type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to
+  // if atom type doesn't point to element (non-EAM atom in pair hybrid)
+  // then map it to last frho array of zeroes
+
+  for (i = 1; i <= ntypes; i++)
+    if (map[i] >= 0) type2frho[i] = map[i];
+    else type2frho[i] = nfrho-1;
+
+  // ------------------------------------------------------------------
+  // setup rhor arrays
+  // ------------------------------------------------------------------
+
+  // allocate rhor arrays
+  // nrhor = square of # of fs elements
+
+  nrhor = fs->nelements * fs->nelements;
+  memory->destroy(rhor);
+  memory->create(rhor,nrhor,nr+1,"pair:rhor");
+
+  // copy each element pair rhor to global rhor
+
+  n = 0;
+  for (i = 0; i < fs->nelements; i++)
+    for (j = 0; j < fs->nelements; j++) {
+      for (m = 1; m <= nr; m++) rhor[n][m] = fs->rhor[i][j][m];
+      n++;
+    }
+
+  // type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to
+  // for fs files, there is a full NxN set of rhor arrays
+  // OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used
+
+  for (i = 1; i <= ntypes; i++)
+    for (j = 1; j <= ntypes; j++)
+      type2rhor[i][j] = map[i] * fs->nelements + map[j];
+
+  // ------------------------------------------------------------------
+  // setup z2r arrays
+  // ------------------------------------------------------------------
+
+  // allocate z2r arrays
+  // nz2r = N*(N+1)/2 where N = # of fs elements
+
+  nz2r = fs->nelements * (fs->nelements+1) / 2;
+  memory->destroy(z2r);
+  memory->create(z2r,nz2r,nr+1,"pair:z2r");
+
+  // copy each element pair z2r to global z2r, only for I >= J
+
+  n = 0;
+  for (i = 0; i < fs->nelements; i++)
+    for (j = 0; j <= i; j++) {
+      for (m = 1; m <= nr; m++) z2r[n][m] = fs->z2r[i][j][m];
+      n++;
+    }
+
+  // type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to
+  // set of z2r arrays only fill lower triangular Nelement matrix
+  // value = n = sum over rows of lower-triangular matrix until reach irow,icol
+  // swap indices when irow < icol to stay lower triangular
+  // if map = -1 (non-EAM atom in pair hybrid):
+  //   type2z2r is not used by non-opt
+  //   but set type2z2r to 0 since accessed by opt
+
+  int irow,icol;
+  for (i = 1; i <= ntypes; i++) {
+    for (j = 1; j <= ntypes; j++) {
+      irow = map[i];
+      icol = map[j];
+      if (irow == -1 || icol == -1) {
+        type2z2r[i][j] = 0;
+        continue;
+      }
+      if (irow < icol) {
+        irow = map[j];
+        icol = map[i];
+      }
+      n = 0;
+      for (m = 0; m < irow; m++) n += m + 1;
+      n += icol;
+      type2z2r[i][j] = n;
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template class PairEAMFSKokkos<LMPDeviceType>;
+#ifdef KOKKOS_HAVE_CUDA
+template class PairEAMFSKokkos<LMPHostType>;
+#endif
\ No newline at end of file
diff --git a/src/KOKKOS/pair_eam_fs_kokkos.h b/src/KOKKOS/pair_eam_fs_kokkos.h
new file mode 100755
index 0000000000..9ffa121467
--- /dev/null
+++ b/src/KOKKOS/pair_eam_fs_kokkos.h
@@ -0,0 +1,183 @@
+/* -*- c++ -*- ----------------------------------------------------------
+
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(eam/fs/kk,PairEAMFSKokkos<LMPDeviceType>)
+PairStyle(eam/fs/kk/device,PairEAMFSKokkos<LMPDeviceType>)
+PairStyle(eam/fs/kk/host,PairEAMFSKokkos<LMPHostType>)
+
+#else
+
+#ifndef LMP_PAIR_EAM_FS_KOKKOS_H
+#define LMP_PAIR_EAM_FS_KOKKOS_H
+
+#include "stdio.h"
+#include "pair_kokkos.h"
+#include "pair_eam.h"
+#include "neigh_list_kokkos.h"
+
+namespace LAMMPS_NS {
+
+struct TagPairEAMFSPackForwardComm{};
+struct TagPairEAMFSUnpackForwardComm{};
+struct TagPairEAMFSInitialize{};
+
+template<int NEIGHFLAG, int NEWTON_PAIR>
+struct TagPairEAMFSKernelA{};
+
+template<int EFLAG>
+struct TagPairEAMFSKernelB{};
+
+template<int EFLAG>
+struct TagPairEAMFSKernelAB{};
+
+template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
+struct TagPairEAMFSKernelC{};
+
+// Cannot use virtual inheritance on the GPU
+
+template<class DeviceType>
+class PairEAMFSKokkos : public PairEAM {
+ public:
+  enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF};
+  enum {COUL_FLAG=0};
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typedef EV_FLOAT value_type;
+
+  PairEAMFSKokkos(class LAMMPS *);
+  virtual ~PairEAMFSKokkos();
+  virtual void compute(int, int);
+  void init_style();
+  void coeff(int, char **);
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairEAMFSPackForwardComm, const int&) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairEAMFSUnpackForwardComm, const int&) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairEAMFSInitialize, const int&) const;
+  
+  template<int NEIGHFLAG, int NEWTON_PAIR>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairEAMFSKernelA<NEIGHFLAG,NEWTON_PAIR>, const int&) const;
+
+  template<int EFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairEAMFSKernelB<EFLAG>, const int&, EV_FLOAT&) const;
+
+  template<int EFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairEAMFSKernelB<EFLAG>, const int&) const;
+
+  template<int EFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairEAMFSKernelAB<EFLAG>, const int&, EV_FLOAT&) const;
+
+  template<int EFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairEAMFSKernelAB<EFLAG>, const int&) const;
+
+  template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairEAMFSKernelC<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int&, EV_FLOAT&) const;
+
+  template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairEAMFSKernelC<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int&) const;
+
+  template<int NEIGHFLAG, int NEWTON_PAIR>
+  KOKKOS_INLINE_FUNCTION
+  void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
+      const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
+                  const F_FLOAT &dely, const F_FLOAT &delz) const;
+
+  virtual int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&, 
+                               int, int *);
+  virtual void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&);
+  virtual int pack_forward_comm(int, int *, double *, int, int *);
+  virtual void unpack_forward_comm(int, int, double *);
+  int pack_reverse_comm(int, int, double *);
+  void unpack_reverse_comm(int, int *, double *);
+
+ protected:
+  void cleanup_copy();
+
+  typename AT::t_x_array_randomread x;
+  typename AT::t_f_array f;
+  typename AT::t_int_1d_randomread type;
+  typename AT::t_tagint_1d tag;
+
+  DAT::tdual_efloat_1d k_eatom;
+  DAT::tdual_virial_array k_vatom;
+  DAT::t_efloat_1d d_eatom;
+  DAT::t_virial_array d_vatom;
+
+  DAT::tdual_ffloat_1d k_rho;
+  DAT::tdual_ffloat_1d k_fp;
+  DAT::t_ffloat_1d d_rho;
+  typename AT::t_ffloat_1d v_rho;
+  DAT::t_ffloat_1d d_fp;
+  HAT::t_ffloat_1d h_rho;
+  HAT::t_ffloat_1d h_fp;
+
+  DAT::t_int_1d_randomread d_type2frho;
+  DAT::t_int_2d_randomread d_type2rhor;
+  DAT::t_int_2d_randomread d_type2z2r;
+
+  typedef Kokkos::DualView<F_FLOAT**[7],Kokkos::LayoutRight,DeviceType> tdual_ffloat_2d_n7;
+  typedef typename tdual_ffloat_2d_n7::t_dev_const_randomread t_ffloat_2d_n7_randomread;
+  typedef typename tdual_ffloat_2d_n7::t_host t_host_ffloat_2d_n7;
+
+  t_ffloat_2d_n7_randomread d_frho_spline;
+  t_ffloat_2d_n7_randomread d_rhor_spline;
+  t_ffloat_2d_n7_randomread d_z2r_spline;
+
+  virtual void file2array();
+  void file2array_fs();
+  void array2spline();
+  void interpolate(int, double, double *, t_host_ffloat_2d_n7, int);
+  void read_file(char *);
+
+  typename ArrayTypes<DeviceType>::t_neighbors_2d d_neighbors;
+  typename ArrayTypes<DeviceType>::t_int_1d_randomread d_ilist;
+  typename ArrayTypes<DeviceType>::t_int_1d_randomread d_numneigh;
+  //NeighListKokkos<DeviceType> k_list;
+
+  int iswap;
+  int first;
+  typename AT::t_int_2d d_sendlist;
+  typename AT::t_xfloat_1d_um v_buf;
+
+  int neighflag,newton_pair;
+  int nlocal,nall,eflag,vflag;
+
+  friend void pair_virial_fdotr_compute<PairEAMFSKokkos>(PairEAMFSKokkos*);
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: Cannot use chosen neighbor list style with pair eam/kk/fs
+
+That style is not supported by Kokkos.
+
+*/
diff --git a/src/KOKKOS/pair_sw_kokkos.cpp b/src/KOKKOS/pair_sw_kokkos.cpp
new file mode 100755
index 0000000000..2e72bdaa0f
--- /dev/null
+++ b/src/KOKKOS/pair_sw_kokkos.cpp
@@ -0,0 +1,908 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Stan Moore (SNL)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "pair_sw_kokkos.h"
+#include "kokkos.h"
+#include "pair_kokkos.h"
+#include "atom_kokkos.h"
+#include "neighbor.h"
+#include "neigh_request.h"
+#include "force.h"
+#include "comm.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "neigh_list_kokkos.h"
+#include "memory.h"
+#include "error.h"
+#include "atom_masks.h"
+
+using namespace LAMMPS_NS;
+
+#define MAXLINE 1024
+#define DELTA 4
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairSWKokkos<DeviceType>::PairSWKokkos(LAMMPS *lmp) : PairSW(lmp)
+{
+  THIRD = 1.0/3.0;
+
+  respa_enable = 0;
+
+
+  atomKK = (AtomKokkos *) atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read = X_MASK | F_MASK | TAG_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK;
+  datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
+}
+
+/* ----------------------------------------------------------------------
+   check if allocated, since class can be destructed when incomplete
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairSWKokkos<DeviceType>::~PairSWKokkos()
+{
+  if (!copymode) {
+    memory->destroy_kokkos(k_eatom,eatom);
+    memory->destroy_kokkos(k_vatom,vatom);
+    eatom = NULL;
+    vatom = NULL;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairSWKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
+{
+  eflag = eflag_in;
+  vflag = vflag_in;
+
+  if (neighflag == FULL) no_virial_fdotr_compute = 1;
+
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = vflag_fdotr = 0;
+
+  // reallocate per-atom arrays if necessary
+
+  if (eflag_atom) {
+    memory->destroy_kokkos(k_eatom,eatom);
+    memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
+    d_eatom = k_eatom.d_view;
+  }
+  if (vflag_atom) {
+    memory->destroy_kokkos(k_vatom,vatom);
+    memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom");
+    d_vatom = k_vatom.d_view;
+  }
+
+  atomKK->sync(execution_space,datamask_read);
+  if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
+  else atomKK->modified(execution_space,F_MASK);
+
+  x = atomKK->k_x.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  tag = atomKK->k_tag.view<DeviceType>();
+  type = atomKK->k_type.view<DeviceType>();
+  nlocal = atom->nlocal;
+  newton_pair = force->newton_pair;
+  nall = atom->nlocal + atom->nghost;
+
+  const int inum = list->inum;
+  const int ignum = inum + list->gnum;
+  NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
+  d_ilist = k_list->d_ilist;
+  d_numneigh = k_list->d_numneigh;
+  d_neighbors = k_list->d_neighbors;
+
+  k_list->clean_copy();
+  copymode = 1;
+
+  EV_FLOAT ev;
+  EV_FLOAT ev_all;
+
+  // loop over neighbor list of my atoms
+
+  if (neighflag == HALF) {
+    if (evflag)
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairSWComputeHalf<HALF,1> >(0,inum),*this,ev);
+    else
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairSWComputeHalf<HALF,0> >(0,inum),*this);
+    DeviceType::fence();
+    ev_all += ev;
+  } else if (neighflag == HALFTHREAD) {
+    if (evflag)
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairSWComputeHalf<HALFTHREAD,1> >(0,inum),*this,ev);
+    else
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairSWComputeHalf<HALFTHREAD,0> >(0,inum),*this);
+    DeviceType::fence();
+    ev_all += ev;
+  } else if (neighflag == FULL) {
+    if (evflag)
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairSWComputeFullA<FULL,1> >(0,inum),*this,ev);
+    else
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairSWComputeFullA<FULL,0> >(0,inum),*this);
+    DeviceType::fence();
+    ev_all += ev;
+    
+    if (evflag)
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairSWComputeFullB<FULL,1> >(0,ignum),*this,ev);
+    else
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairSWComputeFullB<FULL,0> >(0,ignum),*this);
+    DeviceType::fence();
+    ev_all += ev;
+  }
+
+  if (eflag_global) eng_vdwl += ev_all.evdwl;
+  if (vflag_global) {
+    virial[0] += ev_all.v[0];
+    virial[1] += ev_all.v[1];
+    virial[2] += ev_all.v[2];
+    virial[3] += ev_all.v[3];
+    virial[4] += ev_all.v[4];
+    virial[5] += ev_all.v[5];
+  }
+
+  if (vflag_fdotr) pair_virial_fdotr_compute(this);
+
+  if (eflag_atom) {
+    k_eatom.template modify<DeviceType>();
+    k_eatom.template sync<LMPHostType>();
+  }
+
+  if (vflag_atom) {
+    k_vatom.template modify<DeviceType>();
+    k_vatom.template sync<LMPHostType>();
+  }
+
+  copymode = 0;
+}
+
+template<class DeviceType>
+template<int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairSWKokkos<DeviceType>::operator()(TagPairSWComputeHalf<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
+
+  // The f array is atomic
+
+  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
+
+  F_FLOAT delr1[3],delr2[3],fj[3],fk[3];
+  F_FLOAT evdwl = 0.0;
+  F_FLOAT fpair = 0.0;
+
+  const int i = d_ilist[ii];
+  const tagint itag = tag[i];
+  const int itype = d_map[type[i]];
+  const X_FLOAT xtmp = x(i,0);
+  const X_FLOAT ytmp = x(i,1);
+  const X_FLOAT ztmp = x(i,2);
+
+  // two-body interactions, skip half of them
+
+  const int jnum = d_numneigh[i];
+
+  F_FLOAT fxtmpi = 0.0;
+  F_FLOAT fytmpi = 0.0;
+  F_FLOAT fztmpi = 0.0;
+
+  for (int jj = 0; jj < jnum; jj++) {
+    int j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    const tagint jtag = tag[j];
+
+    if (itag > jtag) {
+      if ((itag+jtag) % 2 == 0) continue;
+    } else if (itag < jtag) {
+      if ((itag+jtag) % 2 == 1) continue;
+    } else {
+      if (x(j,2) < ztmp) continue;
+      if (x(j,2) == ztmp && x(j,1) < ytmp) continue;
+      if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) continue;
+    }
+
+    const int jtype = d_map[type[j]];
+
+    const X_FLOAT delx = xtmp - x(j,0);
+    const X_FLOAT dely = ytmp - x(j,1);
+    const X_FLOAT delz = ztmp - x(j,2);
+    const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+
+    const int ijparam = d_elem2param(itype,jtype,jtype);
+    if (rsq > d_params[ijparam].cutsq) continue;
+
+    twobody(d_params[ijparam],rsq,fpair,eflag,evdwl);
+
+    fxtmpi += delx*fpair;
+    fytmpi += dely*fpair;
+    fztmpi += delz*fpair;
+    a_f(j,0) -= delx*fpair;
+    a_f(j,1) -= dely*fpair;
+    a_f(j,2) -= delz*fpair;
+
+    if (EVFLAG) {
+      if (eflag) ev.evdwl += evdwl;
+      if (vflag_either || eflag_atom) this->template ev_tally<NEIGHFLAG>(ev,i,j,evdwl,fpair,delx,dely,delz);
+    }
+  }
+
+  const int jnumm1 = jnum - 1;
+
+  for (int jj = 0; jj < jnumm1; jj++) {
+    int j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    const int jtype = d_map[type[j]];
+    const int ijparam = d_elem2param(itype,jtype,jtype);
+    delr1[0] = x(j,0) - xtmp;
+    delr1[1] = x(j,1) - ytmp;
+    delr1[2] = x(j,2) - ztmp;
+    const F_FLOAT rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];
+    if (rsq1 > d_params[ijparam].cutsq) continue;
+
+    F_FLOAT fxtmpj = 0.0;
+    F_FLOAT fytmpj = 0.0;
+    F_FLOAT fztmpj = 0.0;
+
+    for (int kk = jj+1; kk < jnum; kk++) {
+      int k = d_neighbors(i,kk);
+      k &= NEIGHMASK;
+      const int ktype = d_map[type[k]];
+      const int ikparam = d_elem2param(itype,ktype,ktype);
+      const int ijkparam = d_elem2param(itype,jtype,ktype);
+
+      delr2[0] = x(k,0) - xtmp;
+      delr2[1] = x(k,1) - ytmp;
+      delr2[2] = x(k,2) - ztmp;
+      const F_FLOAT rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2];
+
+      if (rsq2 > d_params[ikparam].cutsq) continue;
+
+      threebody(d_params[ijparam],d_params[ikparam],d_params[ijkparam],
+                rsq1,rsq2,delr1,delr2,fj,fk,eflag,evdwl);
+
+      fxtmpi -= fj[0] + fk[0];
+      fytmpi -= fj[1] + fk[1];
+      fztmpi -= fj[2] + fk[2];
+      fxtmpj += fj[0];
+      fytmpj += fj[1];
+      fztmpj += fj[2];
+      a_f(k,0) += fk[0];
+      a_f(k,1) += fk[1];
+      a_f(k,2) += fk[2];
+
+      if (EVFLAG) {
+        if (eflag) ev.evdwl += evdwl;
+        if (vflag_either || eflag_atom) this->template ev_tally3<NEIGHFLAG>(ev,i,j,k,evdwl,0.0,fj,fk,delr1,delr2);
+      }
+    }
+
+    a_f(j,0) += fxtmpj;
+    a_f(j,1) += fytmpj;
+    a_f(j,2) += fztmpj;
+  }
+
+  a_f(i,0) += fxtmpi;
+  a_f(i,1) += fytmpi;
+  a_f(i,2) += fztmpi;
+}
+
+template<class DeviceType>
+template<int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairSWKokkos<DeviceType>::operator()(TagPairSWComputeHalf<NEIGHFLAG,EVFLAG>, const int &ii) const {
+  EV_FLOAT ev;
+  this->template operator()<NEIGHFLAG,EVFLAG>(TagPairSWComputeHalf<NEIGHFLAG,EVFLAG>(), ii, ev);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairSWKokkos<DeviceType>::operator()(TagPairSWComputeFullA<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
+
+  F_FLOAT delr1[3],delr2[3],fj[3],fk[3];
+  F_FLOAT evdwl = 0.0;
+  F_FLOAT fpair = 0.0;
+
+  const int i = d_ilist[ii];
+
+  const tagint itag = tag[i];
+  const int itype = d_map[type[i]];
+  const X_FLOAT xtmp = x(i,0);
+  const X_FLOAT ytmp = x(i,1);
+  const X_FLOAT ztmp = x(i,2);
+
+  // two-body interactions
+
+  const int jnum = d_numneigh[i];
+
+  F_FLOAT fxtmpi = 0.0;
+  F_FLOAT fytmpi = 0.0;
+  F_FLOAT fztmpi = 0.0;
+
+  for (int jj = 0; jj < jnum; jj++) {
+    int j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    const tagint jtag = tag[j];
+
+    const int jtype = d_map[type[j]];
+
+    const X_FLOAT delx = xtmp - x(j,0);
+    const X_FLOAT dely = ytmp - x(j,1);
+    const X_FLOAT delz = ztmp - x(j,2);
+    const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+
+    const int ijparam = d_elem2param(itype,jtype,jtype);
+
+    if (rsq > d_params[ijparam].cutsq) continue;
+
+    twobody(d_params[ijparam],rsq,fpair,eflag,evdwl);
+
+    fxtmpi += delx*fpair;
+    fytmpi += dely*fpair;
+    fztmpi += delz*fpair;
+
+    if (EVFLAG) {
+      if (eflag) ev.evdwl += 0.5*evdwl;
+      if (vflag_either || eflag_atom) this->template ev_tally<NEIGHFLAG>(ev,i,j,evdwl,fpair,delx,dely,delz);
+    }
+  }
+
+  const int jnumm1 = jnum - 1;
+
+  for (int jj = 0; jj < jnumm1; jj++) {
+    int j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    const int jtype = d_map[type[j]];
+    const int ijparam = d_elem2param(itype,jtype,jtype);
+    delr1[0] = x(j,0) - xtmp;
+    delr1[1] = x(j,1) - ytmp;
+    delr1[2] = x(j,2) - ztmp;
+    const F_FLOAT rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];
+
+    if (rsq1 > d_params[ijparam].cutsq) continue;
+
+    for (int kk = jj+1; kk < jnum; kk++) {
+      int k = d_neighbors(i,kk);
+      k &= NEIGHMASK;
+      const int ktype = d_map[type[k]];
+      const int ikparam = d_elem2param(itype,ktype,ktype);
+      const int ijkparam = d_elem2param(itype,jtype,ktype);
+
+      delr2[0] = x(k,0) - xtmp;
+      delr2[1] = x(k,1) - ytmp;
+      delr2[2] = x(k,2) - ztmp;
+      const F_FLOAT rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2];
+
+      if (rsq2 > d_params[ikparam].cutsq) continue;
+
+      threebody(d_params[ijparam],d_params[ikparam],d_params[ijkparam],
+                rsq1,rsq2,delr1,delr2,fj,fk,eflag,evdwl);
+
+      fxtmpi -= fj[0] + fk[0];
+      fytmpi -= fj[1] + fk[1];
+      fztmpi -= fj[2] + fk[2];
+
+      if (EVFLAG) {
+        if (eflag) ev.evdwl += evdwl;
+        if (vflag_either || eflag_atom) this->template ev_tally3<NEIGHFLAG>(ev,i,j,k,evdwl,0.0,fj,fk,delr1,delr2);
+      }
+    }
+  }
+
+  f(i,0) += fxtmpi;
+  f(i,1) += fytmpi;
+  f(i,2) += fztmpi;
+}
+
+template<class DeviceType>
+template<int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairSWKokkos<DeviceType>::operator()(TagPairSWComputeFullA<NEIGHFLAG,EVFLAG>, const int &ii) const {
+  EV_FLOAT ev;
+  this->template operator()<NEIGHFLAG,EVFLAG>(TagPairSWComputeFullA<NEIGHFLAG,EVFLAG>(), ii, ev);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairSWKokkos<DeviceType>::operator()(TagPairSWComputeFullB<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
+
+  F_FLOAT delr1[3],delr2[3],fj[3],fk[3];
+  F_FLOAT evdwl = 0.0;
+  F_FLOAT fpair = 0.0;
+
+  const int i = d_ilist[ii];
+
+  const int itype = d_map[type[i]];
+  const X_FLOAT xtmpi = x(i,0);
+  const X_FLOAT ytmpi = x(i,1);
+  const X_FLOAT ztmpi = x(i,2);
+
+  const int jnum = d_numneigh[i];
+
+  F_FLOAT fxtmpi = 0.0;
+  F_FLOAT fytmpi = 0.0;
+  F_FLOAT fztmpi = 0.0;
+
+  for (int jj = 0; jj < jnum; jj++) {
+    int j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    if (j >= nlocal) continue;
+    const int jtype = d_map[type[j]];
+    const int jiparam = d_elem2param(jtype,itype,itype);
+    const X_FLOAT xtmpj = x(j,0);
+    const X_FLOAT ytmpj = x(j,1);
+    const X_FLOAT ztmpj = x(j,2);
+
+    delr1[0] = xtmpi - xtmpj;
+    delr1[1] = ytmpi - ytmpj;
+    delr1[2] = ztmpi - ztmpj;
+    const F_FLOAT rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];
+
+    if (rsq1 > d_params[jiparam].cutsq) continue;
+
+    const int j_jnum = d_numneigh[j];
+
+    for (int kk = 0; kk < j_jnum; kk++) {
+      int k = d_neighbors(j,kk);
+      k &= NEIGHMASK;
+      if (k == i) continue;
+      const int ktype = d_map[type[k]];
+      const int jkparam = d_elem2param(jtype,ktype,ktype);
+      const int jikparam = d_elem2param(jtype,itype,ktype);
+
+      delr2[0] = x(k,0) - xtmpj;
+      delr2[1] = x(k,1) - ytmpj;
+      delr2[2] = x(k,2) - ztmpj;
+      const F_FLOAT rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2];
+
+      if (rsq2 > d_params[jkparam].cutsq) continue;
+
+      if (vflag_atom)
+        threebody(d_params[jiparam],d_params[jkparam],d_params[jikparam],
+                  rsq1,rsq2,delr1,delr2,fj,fk,eflag,evdwl);
+      else
+        threebodyj(d_params[jiparam],d_params[jkparam],d_params[jikparam],
+                  rsq1,rsq2,delr1,delr2,fj);
+
+      fxtmpi += fj[0];
+      fytmpi += fj[1];
+      fztmpi += fj[2];
+
+      if (EVFLAG)
+        if (vflag_atom || eflag_atom) ev_tally3_atom(ev,i,evdwl,0.0,fj,fk,delr1,delr2);
+    }
+  }
+
+  f(i,0) += fxtmpi;
+  f(i,1) += fytmpi;
+  f(i,2) += fztmpi;
+}
+
+template<class DeviceType>
+template<int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairSWKokkos<DeviceType>::operator()(TagPairSWComputeFullB<NEIGHFLAG,EVFLAG>, const int &ii) const {
+  EV_FLOAT ev;
+  this->template operator()<NEIGHFLAG,EVFLAG>(TagPairSWComputeFullB<NEIGHFLAG,EVFLAG>(), ii, ev);
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more type pairs
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairSWKokkos<DeviceType>::coeff(int narg, char **arg)
+{
+  PairSW::coeff(narg,arg);
+
+  // sync map
+
+  int n = atom->ntypes;
+
+  DAT::tdual_int_1d k_map = DAT::tdual_int_1d("pair:map",n+1);
+  HAT::t_int_1d h_map = k_map.h_view;
+
+  for (int i = 1; i <= n; i++)
+    h_map[i] = map[i];
+
+  k_map.template modify<LMPHostType>();
+  k_map.template sync<DeviceType>();
+
+  d_map = k_map.d_view;
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairSWKokkos<DeviceType>::init_style()
+{
+  PairSW::init_style();
+
+  // irequest = neigh request made by parent class
+
+  neighflag = lmp->kokkos->neighflag;
+  int irequest = neighbor->nrequest - 1;
+
+  neighbor->requests[irequest]->
+    kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
+    !Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
+  neighbor->requests[irequest]->
+    kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
+
+  // always request a full neighbor list
+
+  if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
+    neighbor->requests[irequest]->full = 1;
+    neighbor->requests[irequest]->half = 0;
+    neighbor->requests[irequest]->full_cluster = 0;
+    if (neighflag == FULL)
+      neighbor->requests[irequest]->ghost = 1;
+    else
+      neighbor->requests[irequest]->ghost = 0;
+  } else {
+    error->all(FLERR,"Cannot use chosen neighbor list style with pair sw/kk");
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairSWKokkos<DeviceType>::setup()
+{
+  PairSW::setup();
+
+  // sync elem2param and params
+
+  tdual_int_3d k_elem2param = tdual_int_3d("pair:elem2param",nelements,nelements,nelements);
+  t_host_int_3d h_elem2param = k_elem2param.h_view;
+
+  tdual_param_1d k_params = tdual_param_1d("pair:params",nparams);
+  t_host_param_1d h_params = k_params.h_view;
+
+  for (int i = 0; i < nelements; i++)
+    for (int j = 0; j < nelements; j++)
+      for (int k = 0; k < nelements; k++)
+        h_elem2param(i,j,k) = elem2param[i][j][k];
+
+  for (int m = 0; m < nparams; m++)
+    h_params[m] = params[m];
+
+  k_elem2param.template modify<LMPHostType>();
+  k_elem2param.template sync<DeviceType>();
+  k_params.template modify<LMPHostType>();
+  k_params.template sync<DeviceType>();
+
+  d_elem2param = k_elem2param.d_view;
+  d_params = k_params.d_view;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void PairSWKokkos<DeviceType>::twobody(const Param& param, const F_FLOAT& rsq, F_FLOAT& fforce,
+                     const int& eflag, F_FLOAT& eng) const
+{
+  F_FLOAT r,rinvsq,rp,rq,rainv,rainvsq,expsrainv;
+
+  r = sqrt(rsq);
+  rinvsq = 1.0/rsq;
+  rp = pow(r,-param.powerp);
+  rq = pow(r,-param.powerq);
+  rainv = 1.0 / (r - param.cut);
+  rainvsq = rainv*rainv*r;
+  expsrainv = exp(param.sigma * rainv);
+  fforce = (param.c1*rp - param.c2*rq +
+            (param.c3*rp -param.c4*rq) * rainvsq) * expsrainv * rinvsq;
+  if (eflag) eng = (param.c5*rp - param.c6*rq) * expsrainv;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void PairSWKokkos<DeviceType>::threebody(const Param& paramij, const Param& paramik, const Param& paramijk,
+                       const F_FLOAT& rsq1, const F_FLOAT& rsq2,
+                       F_FLOAT *delr1, F_FLOAT *delr2,
+                       F_FLOAT *fj, F_FLOAT *fk, const int& eflag, F_FLOAT& eng) const
+{
+  F_FLOAT r1,rinvsq1,rainv1,gsrainv1,gsrainvsq1,expgsrainv1;
+  F_FLOAT r2,rinvsq2,rainv2,gsrainv2,gsrainvsq2,expgsrainv2;
+  F_FLOAT rinv12,cs,delcs,delcssq,facexp,facrad,frad1,frad2;
+  F_FLOAT facang,facang12,csfacang,csfac1,csfac2;
+
+  r1 = sqrt(rsq1);
+  rinvsq1 = 1.0/rsq1;
+  rainv1 = 1.0/(r1 - paramij.cut);
+  gsrainv1 = paramij.sigma_gamma * rainv1;
+  gsrainvsq1 = gsrainv1*rainv1/r1;
+  expgsrainv1 = exp(gsrainv1);
+
+  r2 = sqrt(rsq2);
+  rinvsq2 = 1.0/rsq2;
+  rainv2 = 1.0/(r2 - paramik.cut);
+  gsrainv2 = paramik.sigma_gamma * rainv2;
+  gsrainvsq2 = gsrainv2*rainv2/r2;
+  expgsrainv2 = exp(gsrainv2);
+
+  rinv12 = 1.0/(r1*r2);
+  cs = (delr1[0]*delr2[0] + delr1[1]*delr2[1] + delr1[2]*delr2[2]) * rinv12;
+  delcs = cs - paramijk.costheta;
+  delcssq = delcs*delcs;
+
+  facexp = expgsrainv1*expgsrainv2;
+
+  // facrad = sqrt(paramij.lambda_epsilon*paramik.lambda_epsilon) *
+  //          facexp*delcssq;
+
+  facrad = paramijk.lambda_epsilon * facexp*delcssq;
+  frad1 = facrad*gsrainvsq1;
+  frad2 = facrad*gsrainvsq2;
+  facang = paramijk.lambda_epsilon2 * facexp*delcs;
+  facang12 = rinv12*facang;
+  csfacang = cs*facang;
+  csfac1 = rinvsq1*csfacang;
+
+  fj[0] = delr1[0]*(frad1+csfac1)-delr2[0]*facang12;
+  fj[1] = delr1[1]*(frad1+csfac1)-delr2[1]*facang12;
+  fj[2] = delr1[2]*(frad1+csfac1)-delr2[2]*facang12;
+
+  csfac2 = rinvsq2*csfacang;
+
+  fk[0] = delr2[0]*(frad2+csfac2)-delr1[0]*facang12;
+  fk[1] = delr2[1]*(frad2+csfac2)-delr1[1]*facang12;
+  fk[2] = delr2[2]*(frad2+csfac2)-delr1[2]*facang12;
+
+  if (eflag) eng = facrad;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void PairSWKokkos<DeviceType>::threebodyj(const Param& paramij, const Param& paramik, const Param& paramijk,
+                       const F_FLOAT& rsq1, const F_FLOAT& rsq2, F_FLOAT *delr1, F_FLOAT *delr2, F_FLOAT *fj) const
+{
+  F_FLOAT r1,rinvsq1,rainv1,gsrainv1,gsrainvsq1,expgsrainv1;
+  F_FLOAT r2, rainv2, gsrainv2, expgsrainv2;
+  F_FLOAT rinv12,cs,delcs,delcssq,facexp,facrad,frad1;
+  F_FLOAT facang,facang12,csfacang,csfac1;
+
+  r1 = sqrt(rsq1);
+  rinvsq1 = 1.0/rsq1;
+  rainv1 = 1.0/(r1 - paramij.cut);
+  gsrainv1 = paramij.sigma_gamma * rainv1;
+  gsrainvsq1 = gsrainv1*rainv1/r1;
+  expgsrainv1 = exp(gsrainv1);
+
+  r2 = sqrt(rsq2);
+  rainv2 = 1.0/(r2 - paramik.cut);
+  gsrainv2 = paramik.sigma_gamma * rainv2;
+  expgsrainv2 = exp(gsrainv2);
+
+  rinv12 = 1.0/(r1*r2);
+  cs = (delr1[0]*delr2[0] + delr1[1]*delr2[1] + delr1[2]*delr2[2]) * rinv12;
+  delcs = cs - paramijk.costheta;
+  delcssq = delcs*delcs;
+
+  facexp = expgsrainv1*expgsrainv2;
+
+  // facrad = sqrt(paramij.lambda_epsilon*paramik.lambda_epsilon) *
+  //          facexp*delcssq;
+
+  facrad = paramijk.lambda_epsilon * facexp*delcssq;
+  frad1 = facrad*gsrainvsq1;
+  facang = paramijk.lambda_epsilon2 * facexp*delcs;
+  facang12 = rinv12*facang;
+  csfacang = cs*facang;
+  csfac1 = rinvsq1*csfacang;
+
+  fj[0] = delr1[0]*(frad1+csfac1)-delr2[0]*facang12;
+  fj[1] = delr1[1]*(frad1+csfac1)-delr2[1]*facang12;
+  fj[2] = delr1[2]*(frad1+csfac1)-delr2[2]*facang12;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<int NEIGHFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairSWKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &j,
+      const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
+                const F_FLOAT &dely, const F_FLOAT &delz) const
+{
+  const int VFLAG = vflag_either;
+
+  // The eatom and vatom arrays are atomic for half/thread neighbor list
+
+  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
+  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
+
+
+  if (eflag_atom) {
+    const E_FLOAT epairhalf = 0.5 * epair;
+    v_eatom[i] += epairhalf;
+    if (NEIGHFLAG != FULL)
+      v_eatom[j] += epairhalf;
+  }
+
+  if (VFLAG) {
+    const E_FLOAT v0 = delx*delx*fpair;
+    const E_FLOAT v1 = dely*dely*fpair;
+    const E_FLOAT v2 = delz*delz*fpair;
+    const E_FLOAT v3 = delx*dely*fpair;
+    const E_FLOAT v4 = delx*delz*fpair;
+    const E_FLOAT v5 = dely*delz*fpair;
+
+    if (vflag_global) {
+      if (NEIGHFLAG != FULL) {
+        ev.v[0] += v0;
+        ev.v[1] += v1;
+        ev.v[2] += v2;
+        ev.v[3] += v3;
+        ev.v[4] += v4;
+        ev.v[5] += v5;
+      } else {
+        ev.v[0] += 0.5*v0;
+        ev.v[1] += 0.5*v1;
+        ev.v[2] += 0.5*v2;
+        ev.v[3] += 0.5*v3;
+        ev.v[4] += 0.5*v4;
+        ev.v[5] += 0.5*v5;
+      }
+    }
+
+    if (vflag_atom) {
+      v_vatom(i,0) += 0.5*v0;
+      v_vatom(i,1) += 0.5*v1;
+      v_vatom(i,2) += 0.5*v2;
+      v_vatom(i,3) += 0.5*v3;
+      v_vatom(i,4) += 0.5*v4;
+      v_vatom(i,5) += 0.5*v5;
+
+      if (NEIGHFLAG != FULL) {
+        v_vatom(j,0) += 0.5*v0;
+        v_vatom(j,1) += 0.5*v1;
+        v_vatom(j,2) += 0.5*v2;
+        v_vatom(j,3) += 0.5*v3;
+        v_vatom(j,4) += 0.5*v4;
+        v_vatom(j,5) += 0.5*v5;
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   tally eng_vdwl and virial into global and per-atom accumulators
+   called by SW and hbond potentials, newton_pair is always on
+   virial = riFi + rjFj + rkFk = (rj-ri) Fj + (rk-ri) Fk = drji*fj + drki*fk
+ ------------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<int NEIGHFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairSWKokkos<DeviceType>::ev_tally3(EV_FLOAT &ev, const int &i, const int &j, int &k,
+          const F_FLOAT &evdwl, const F_FLOAT &ecoul,
+                     F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drki) const
+{
+  F_FLOAT epairthird,v[6];
+
+  const int VFLAG = vflag_either;
+
+// The eatom and vatom arrays are atomic for half/thread neighbor list
+
+  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
+  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
+
+  if (eflag_atom) {
+    epairthird = THIRD * (evdwl + ecoul);
+    v_eatom[i] += epairthird;
+    if (NEIGHFLAG != FULL) {
+      v_eatom[j] += epairthird;
+      v_eatom[k] += epairthird;
+    }
+  }
+
+  if (VFLAG) {
+    v[0] = drji[0]*fj[0] + drki[0]*fk[0];
+    v[1] = drji[1]*fj[1] + drki[1]*fk[1];
+    v[2] = drji[2]*fj[2] + drki[2]*fk[2];
+    v[3] = drji[0]*fj[1] + drki[0]*fk[1];
+    v[4] = drji[0]*fj[2] + drki[0]*fk[2];
+    v[5] = drji[1]*fj[2] + drki[1]*fk[2];
+
+    if (vflag_global) {
+      ev.v[0] += v[0];
+      ev.v[1] += v[1];
+      ev.v[2] += v[2];
+      ev.v[3] += v[3];
+      ev.v[4] += v[4];
+      ev.v[5] += v[5];
+    }
+
+    if (vflag_atom) {
+      v_vatom(i,0) += THIRD*v[0]; v_vatom(i,1) += THIRD*v[1];
+      v_vatom(i,2) += THIRD*v[2]; v_vatom(i,3) += THIRD*v[3];
+      v_vatom(i,4) += THIRD*v[4]; v_vatom(i,5) += THIRD*v[5];
+
+      if (NEIGHFLAG != FULL) {
+        v_vatom(j,0) += THIRD*v[0]; v_vatom(j,1) += THIRD*v[1];
+        v_vatom(j,2) += THIRD*v[2]; v_vatom(j,3) += THIRD*v[3];
+        v_vatom(j,4) += THIRD*v[4]; v_vatom(j,5) += THIRD*v[5];
+
+        v_vatom(k,0) += THIRD*v[0]; v_vatom(k,1) += THIRD*v[1];
+        v_vatom(k,2) += THIRD*v[2]; v_vatom(k,3) += THIRD*v[3];
+        v_vatom(k,4) += THIRD*v[4]; v_vatom(k,5) += THIRD*v[5];
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   tally eng_vdwl and virial into global and per-atom accumulators
+   called by SW and hbond potentials, newton_pair is always on
+   virial = riFi + rjFj + rkFk = (rj-ri) Fj + (rk-ri) Fk = drji*fj + drki*fk
+ ------------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void PairSWKokkos<DeviceType>::ev_tally3_atom(EV_FLOAT &ev, const int &i,
+          const F_FLOAT &evdwl, const F_FLOAT &ecoul,
+                     F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drki) const
+{
+  F_FLOAT epairthird,v[6];
+
+  const int VFLAG = vflag_either;
+
+  if (eflag_atom) {
+    epairthird = THIRD * (evdwl + ecoul);
+    d_eatom[i] += epairthird;
+  }
+
+  if (VFLAG) {
+    v[0] = drji[0]*fj[0] + drki[0]*fk[0];
+    v[1] = drji[1]*fj[1] + drki[1]*fk[1];
+    v[2] = drji[2]*fj[2] + drki[2]*fk[2];
+    v[3] = drji[0]*fj[1] + drki[0]*fk[1];
+    v[4] = drji[0]*fj[2] + drki[0]*fk[2];
+    v[5] = drji[1]*fj[2] + drki[1]*fk[2];
+
+    if (vflag_atom) {
+      d_vatom(i,0) += THIRD*v[0]; d_vatom(i,1) += THIRD*v[1];
+      d_vatom(i,2) += THIRD*v[2]; d_vatom(i,3) += THIRD*v[3];
+      d_vatom(i,4) += THIRD*v[4]; d_vatom(i,5) += THIRD*v[5];
+    }
+  }
+}
+
+template class PairSWKokkos<LMPDeviceType>;
+#ifdef KOKKOS_HAVE_CUDA
+template class PairSWKokkos<LMPHostType>;
+#endif
\ No newline at end of file
diff --git a/src/KOKKOS/pair_sw_kokkos.h b/src/KOKKOS/pair_sw_kokkos.h
new file mode 100755
index 0000000000..a73008ee1c
--- /dev/null
+++ b/src/KOKKOS/pair_sw_kokkos.h
@@ -0,0 +1,150 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(sw/kk,PairSWKokkos<LMPDeviceType>)
+PairStyle(sw/kk/device,PairSWKokkos<LMPDeviceType>)
+PairStyle(sw/kk/host,PairSWKokkos<LMPHostType>)
+
+#else
+
+#ifndef LMP_PAIR_SW_KOKKOS_H
+#define LMP_PAIR_SW_KOKKOS_H
+
+#include "pair_sw.h"
+#include "pair_kokkos.h"
+
+template<int NEIGHFLAG, int EVFLAG>
+struct TagPairSWComputeHalf{};
+
+template<int NEIGHFLAG, int EVFLAG>
+struct TagPairSWComputeFullA{};
+
+template<int NEIGHFLAG, int EVFLAG>
+struct TagPairSWComputeFullB{};
+
+namespace LAMMPS_NS {
+
+template<class DeviceType>
+class PairSWKokkos : public PairSW {
+ public:
+  enum {EnabledNeighFlags=FULL};
+  enum {COUL_FLAG=0};
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typedef EV_FLOAT value_type;
+
+  PairSWKokkos(class LAMMPS *);
+  virtual ~PairSWKokkos();
+  virtual void compute(int, int);
+  virtual void coeff(int, char **);
+  virtual void init_style();
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairSWComputeHalf<NEIGHFLAG,EVFLAG>, const int&, EV_FLOAT&) const;
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairSWComputeHalf<NEIGHFLAG,EVFLAG>, const int&) const;
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairSWComputeFullA<NEIGHFLAG,EVFLAG>, const int&, EV_FLOAT&) const;
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairSWComputeFullA<NEIGHFLAG,EVFLAG>, const int&) const;
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairSWComputeFullB<NEIGHFLAG,EVFLAG>, const int&, EV_FLOAT&) const;
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairSWComputeFullB<NEIGHFLAG,EVFLAG>, const int&) const;
+
+  template<int NEIGHFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
+      const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
+                  const F_FLOAT &dely, const F_FLOAT &delz) const;
+
+  template<int NEIGHFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void ev_tally3(EV_FLOAT &ev, const int &i, const int &j, int &k, 
+            const F_FLOAT &evdwl, const F_FLOAT &ecoul,
+                       F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drki) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void ev_tally3_atom(EV_FLOAT &ev, const int &i, 
+            const F_FLOAT &evdwl, const F_FLOAT &ecoul,
+                       F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drki) const;
+
+ protected:
+  typedef Kokkos::DualView<int***,DeviceType> tdual_int_3d;
+  typedef typename tdual_int_3d::t_dev_const_randomread t_int_3d_randomread;
+  typedef typename tdual_int_3d::t_host t_host_int_3d;
+
+  t_int_3d_randomread d_elem2param;
+  DAT::t_int_1d_randomread d_map;
+
+  typedef Kokkos::DualView<Param*,DeviceType> tdual_param_1d;
+  typedef typename tdual_param_1d::t_dev t_param_1d;
+  typedef typename tdual_param_1d::t_host t_host_param_1d;
+
+  t_param_1d d_params;
+
+  virtual void setup();
+  void twobody(const Param&, const F_FLOAT&, F_FLOAT&, const int&, F_FLOAT&) const;
+  void threebody(const Param&, const Param&, const Param&, const F_FLOAT&, const F_FLOAT&, F_FLOAT *, F_FLOAT *,
+                 F_FLOAT *, F_FLOAT *, const int&, F_FLOAT&) const;
+  void threebodyj(const Param&, const Param&, const Param&, const F_FLOAT&, const F_FLOAT&, F_FLOAT *, F_FLOAT *,
+                 F_FLOAT *) const;
+
+  typename ArrayTypes<DeviceType>::t_x_array_randomread x;
+  typename ArrayTypes<DeviceType>::t_f_array f;
+  typename ArrayTypes<DeviceType>::t_tagint_1d tag;
+  typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
+
+  DAT::tdual_efloat_1d k_eatom;
+  DAT::tdual_virial_array k_vatom;
+  DAT::t_efloat_1d d_eatom;
+  DAT::t_virial_array d_vatom;
+
+  DAT::t_int_1d_randomread d_type2frho;
+  DAT::t_int_2d_randomread d_type2rhor;
+  DAT::t_int_2d_randomread d_type2z2r;
+
+  typename ArrayTypes<DeviceType>::t_neighbors_2d d_neighbors;
+  typename ArrayTypes<DeviceType>::t_int_1d_randomread d_ilist;
+  typename ArrayTypes<DeviceType>::t_int_1d_randomread d_numneigh;
+  //NeighListKokkos<DeviceType> k_list;
+
+  int neighflag,newton_pair;
+  int nlocal,nall,eflag,vflag;
+
+  int inum;
+
+  friend void pair_virial_fdotr_compute<PairSWKokkos>(PairSWKokkos*);
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+*/
diff --git a/src/KOKKOS/pair_tersoff_kokkos.cpp b/src/KOKKOS/pair_tersoff_kokkos.cpp
new file mode 100755
index 0000000000..d32e18e66f
--- /dev/null
+++ b/src/KOKKOS/pair_tersoff_kokkos.cpp
@@ -0,0 +1,1202 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Ray Shan (SNL)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "pair_tersoff_kokkos.h"
+#include "kokkos.h"
+#include "atom_kokkos.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_request.h"
+#include "neigh_list_kokkos.h"
+#include "update.h"
+#include "integrate.h"
+#include "respa.h"
+#include "math_const.h"
+#include "memory.h"
+#include "error.h"
+#include "atom_masks.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define KOKKOS_CUDA_MAX_THREADS 256
+#define KOKKOS_CUDA_MIN_BLOCKS 8
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairTersoffKokkos<DeviceType>::PairTersoffKokkos(LAMMPS *lmp) : PairTersoff(lmp)
+{
+  THIRD = 1.0/3.0;
+
+  respa_enable = 0;
+
+  atomKK = (AtomKokkos *) atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK;
+  datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairTersoffKokkos<DeviceType>::~PairTersoffKokkos()
+{
+  if (!copymode) {
+    memory->destroy_kokkos(k_eatom,eatom);
+    memory->destroy_kokkos(k_vatom,vatom);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTersoffKokkos<DeviceType>::allocate()
+{
+  PairTersoff::allocate();
+
+  int n = atom->ntypes;
+
+  k_params = Kokkos::DualView<params_ters***,Kokkos::LayoutRight,DeviceType>
+	  ("PairTersoff::paramskk",n+1,n+1,n+1);
+  paramskk = k_params.d_view;
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTersoffKokkos<DeviceType>::init_style()
+{
+  PairTersoff::init_style();
+
+  // irequest = neigh request made by parent class
+
+  neighflag = lmp->kokkos->neighflag;
+  int irequest = neighbor->nrequest - 1;
+
+  neighbor->requests[irequest]->
+    kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
+    !Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
+  neighbor->requests[irequest]->
+    kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
+
+  if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
+  //if (neighflag == FULL || neighflag == HALFTHREAD) {
+    neighbor->requests[irequest]->full = 1;
+    neighbor->requests[irequest]->half = 0;
+    neighbor->requests[irequest]->full_cluster = 0;
+    if (neighflag == FULL)
+      neighbor->requests[irequest]->ghost = 1;
+    else
+      neighbor->requests[irequest]->ghost = 0;
+  } else {
+    error->all(FLERR,"Cannot use chosen neighbor list style with tersoff/kk");
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTersoffKokkos<DeviceType>::setup()
+{
+  PairTersoff::setup();
+
+  int i,j,k,m;
+  int n = atom->ntypes;
+
+  for (i = 1; i <= n; i++)
+    for (j = 1; j <= n; j++)
+      for (k = 1; k <= n; k++) {
+	m = elem2param[i-1][j-1][k-1];
+	k_params.h_view(i,j,k).powerm = params[m].powerm;
+	k_params.h_view(i,j,k).gamma = params[m].gamma;
+	k_params.h_view(i,j,k).lam3 = params[m].lam3;
+	k_params.h_view(i,j,k).c = params[m].c;
+	k_params.h_view(i,j,k).d = params[m].d;
+	k_params.h_view(i,j,k).h = params[m].h;
+	k_params.h_view(i,j,k).powern = params[m].powern;
+	k_params.h_view(i,j,k).beta = params[m].beta;
+	k_params.h_view(i,j,k).lam2 = params[m].lam2;
+	k_params.h_view(i,j,k).bigb = params[m].bigb;
+	k_params.h_view(i,j,k).bigr = params[m].bigr;
+	k_params.h_view(i,j,k).bigd = params[m].bigd;
+	k_params.h_view(i,j,k).lam1 = params[m].lam1;
+	k_params.h_view(i,j,k).biga = params[m].biga;
+	k_params.h_view(i,j,k).cutsq = params[m].cutsq;
+	k_params.h_view(i,j,k).c1 = params[m].c1;
+	k_params.h_view(i,j,k).c2 = params[m].c2;
+	k_params.h_view(i,j,k).c3 = params[m].c3;
+	k_params.h_view(i,j,k).c4 = params[m].c4;
+      }
+
+  k_params.template modify<LMPHostType>();
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTersoffKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
+{
+  eflag = eflag_in;
+  vflag = vflag_in;
+
+  if (neighflag == FULL) no_virial_fdotr_compute = 1;
+
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = vflag_fdotr = 0;
+
+  // reallocate per-atom arrays if necessary
+
+  if (eflag_atom) {
+    memory->destroy_kokkos(k_eatom,eatom);
+    memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
+    d_eatom = k_eatom.d_view;
+  }
+  if (vflag_atom) {
+    memory->destroy_kokkos(k_vatom,vatom);
+    memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom");
+    d_vatom = k_vatom.d_view;
+  }
+
+  atomKK->sync(execution_space,datamask_read);
+  k_params.template sync<DeviceType>();
+  if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
+  else atomKK->modified(execution_space,F_MASK);
+
+  x = atomKK->k_x.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  type = atomKK->k_type.view<DeviceType>();
+  tag = atomKK->k_tag.view<DeviceType>();
+  nlocal = atom->nlocal;
+  nall = atom->nlocal + atom->nghost;
+  newton_pair = force->newton_pair;
+
+  const int inum = list->inum;
+  const int ignum = inum + list->gnum;
+  NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
+  d_numneigh = k_list->d_numneigh;
+  d_neighbors = k_list->d_neighbors;
+  d_ilist = k_list->d_ilist;
+
+  k_list->clean_copy();
+  copymode = 1;
+
+  EV_FLOAT ev;
+  EV_FLOAT ev_all;
+
+  if (neighflag == HALF) {
+    if (evflag)
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeHalf<HALF,1> >(0,inum),*this,ev);
+    else
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeHalf<HALF,0> >(0,inum),*this);
+    DeviceType::fence();
+    ev_all += ev;
+  } else if (neighflag == HALFTHREAD) {
+    if (evflag)
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeHalf<HALFTHREAD,1> >(0,inum),*this,ev);
+    else
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeHalf<HALFTHREAD,0> >(0,inum),*this);
+    DeviceType::fence();
+    ev_all += ev;
+  } else if (neighflag == FULL) {
+    if (evflag)
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeFullA<FULL,1> >(0,inum),*this,ev);
+    else
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeFullA<FULL,0> >(0,inum),*this);
+    DeviceType::fence();
+    ev_all += ev;
+    
+    if (evflag)
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeFullB<FULL,1> >(0,ignum),*this,ev);
+    else
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeFullB<FULL,0> >(0,ignum),*this);
+    DeviceType::fence();
+    ev_all += ev;
+  }
+
+  if (eflag_global) eng_vdwl += ev_all.evdwl;
+  if (vflag_global) {
+    virial[0] += ev_all.v[0];
+    virial[1] += ev_all.v[1];
+    virial[2] += ev_all.v[2];
+    virial[3] += ev_all.v[3];
+    virial[4] += ev_all.v[4];
+    virial[5] += ev_all.v[5];
+  }
+
+  if (vflag_fdotr) pair_virial_fdotr_compute(this);
+
+  if (eflag_atom) {
+    k_eatom.template modify<DeviceType>();
+    k_eatom.template sync<LMPHostType>();
+  }
+
+  if (vflag_atom) {
+    k_vatom.template modify<DeviceType>();
+    k_vatom.template sync<LMPHostType>();
+  }
+
+  copymode = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeHalf<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
+
+  // The f array is atomic for Half/Thread neighbor style
+  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
+
+  const int i = d_ilist[ii];
+  if (i >= nlocal) return;
+  const X_FLOAT xtmp = x(i,0);
+  const X_FLOAT ytmp = x(i,1);
+  const X_FLOAT ztmp = x(i,2);
+  const int itype = type(i);
+  const int itag = tag(i);
+
+  int j,k,jj,kk,jtag,jtype,ktype;
+  F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij;
+  F_FLOAT fi[3], fj[3], fk[3];
+  X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2;
+
+  //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
+  const int jnum = d_numneigh[i];
+
+  // repulsive
+
+  for (jj = 0; jj < jnum; jj++) {
+    j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    jtype = type(j);
+    jtag = tag(j);
+
+    if (itag > jtag) {
+      if ((itag+jtag) % 2 == 0) continue;
+    } else if (itag < jtag) {
+      if ((itag+jtag) % 2 == 1) continue;
+    } else {
+      if (x(j,2)  < ztmp) continue;
+      if (x(j,2) == ztmp && x(j,1)  < ytmp) continue;
+      if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) continue;
+    }
+
+    const X_FLOAT delx = xtmp - x(j,0);
+    const X_FLOAT dely = ytmp - x(j,1);
+    const X_FLOAT delz = ztmp - x(j,2);
+    const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+    const F_FLOAT cutsq = paramskk(itype,jtype,jtype).cutsq;
+
+    if (rsq > cutsq) continue;
+
+    const F_FLOAT r = sqrt(rsq);
+    const F_FLOAT tmp_fce = ters_fc_k(itype,jtype,jtype,r);
+    const F_FLOAT tmp_fcd = ters_dfc(itype,jtype,jtype,r);
+    const F_FLOAT tmp_exp = exp(-paramskk(itype,jtype,jtype).lam1 * r);
+    const F_FLOAT frep = -paramskk(itype,jtype,jtype).biga * tmp_exp * 
+	    		  (tmp_fcd - tmp_fce*paramskk(itype,jtype,jtype).lam1) / r;
+    const F_FLOAT eng = tmp_fce * paramskk(itype,jtype,jtype).biga * tmp_exp;
+
+    a_f(i,0) += delx*frep;
+    a_f(i,1) += dely*frep;
+    a_f(i,2) += delz*frep;
+    a_f(j,0) -= delx*frep;
+    a_f(j,1) -= dely*frep;
+    a_f(j,2) -= delz*frep;
+
+    if (EVFLAG) {
+      if (eflag) ev.evdwl += eng;
+      if (vflag_either || eflag_atom) this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,frep,delx,dely,delz);
+    }
+  }
+
+  // attractive: bond order
+ 
+  for (jj = 0; jj < jnum; jj++) {
+    j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    jtype = type(j);
+
+    delx1 = xtmp - x(j,0);
+    dely1 = ytmp - x(j,1);
+    delz1 = ztmp - x(j,2);
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    cutsq1 = paramskk(itype,jtype,jtype).cutsq;
+
+    bo_ij = 0.0;
+    if (rsq1 > cutsq1) continue;
+    rij = sqrt(rsq1);
+    
+    for (kk = 0; kk < jnum; kk++) {
+      if (jj == kk) continue;
+      k = d_neighbors(i,kk);
+      k &= NEIGHMASK;
+      ktype = type(k);
+
+      delx2 = xtmp - x(k,0);
+      dely2 = ytmp - x(k,1);
+      delz2 = ztmp - x(k,2);
+      rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+      cutsq2 = paramskk(itype,jtype,ktype).cutsq;
+
+      if (rsq2 > cutsq2) continue;
+      rik = sqrt(rsq2);
+      bo_ij += bondorder(itype,jtype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2);
+    }
+
+    // attractive: pairwise potential and force
+
+    const F_FLOAT fa = ters_fa_k(itype,jtype,jtype,rij);
+    const F_FLOAT dfa = ters_dfa(itype,jtype,jtype,rij);
+    const F_FLOAT bij = ters_bij_k(itype,jtype,jtype,bo_ij);
+    const F_FLOAT fatt = -0.5*bij * dfa / rij;
+    const F_FLOAT prefactor = 0.5*fa * ters_dbij(itype,jtype,jtype,bo_ij);
+    const F_FLOAT eng = 0.5*bij * fa;
+
+    a_f(i,0) += delx1*fatt;
+    a_f(i,1) += dely1*fatt;
+    a_f(i,2) += delz1*fatt;
+    a_f(j,0) -= delx1*fatt;
+    a_f(j,1) -= dely1*fatt;
+    a_f(j,2) -= delz1*fatt;
+
+    if (EVFLAG) {
+      if (eflag) ev.evdwl += eng;
+      if (vflag_either || eflag_atom) 
+	this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,fatt,delx1,dely1,delz1);
+    }
+
+    // attractive: three-body force
+    
+    for (kk = 0; kk < jnum; kk++) {
+      if (jj == kk) continue;
+      k = d_neighbors(i,kk);
+      k &= NEIGHMASK;
+      ktype = type(k);
+
+      delx2 = xtmp - x(k,0);
+      dely2 = ytmp - x(k,1);
+      delz2 = ztmp - x(k,2);
+      rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+      cutsq2 = paramskk(itype,jtype,ktype).cutsq;
+
+      if (rsq2 > cutsq2) continue;
+      rik = sqrt(rsq2);
+      ters_dthb(itype,jtype,ktype,prefactor,rij,delx1,dely1,delz1,
+		rik,delx2,dely2,delz2,fi,fj,fk);
+
+      a_f(i,0) += fi[0];
+      a_f(i,1) += fi[1];
+      a_f(i,2) += fi[2];
+      a_f(j,0) += fj[0];
+      a_f(j,1) += fj[1];
+      a_f(j,2) += fj[2];
+      a_f(k,0) += fk[0];
+      a_f(k,1) += fk[1];
+      a_f(k,2) += fk[2];
+
+      if (vflag_atom) {
+	F_FLOAT delrij[3], delrik[3];
+	delrij[0] = -delx1; delrij[1] = -dely1; delrij[2] = -delz1;
+	delrik[0] = -delx2; delrik[1] = -dely2; delrik[2] = -delz2;
+	if (vflag_either) this->template v_tally3<NEIGHFLAG>(ev,i,j,k,fj,fk,delrij,delrik);
+      }
+    }
+  }
+}
+
+template<class DeviceType>
+template<int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeHalf<NEIGHFLAG,EVFLAG>, const int &ii) const {
+  EV_FLOAT ev;
+  this->template operator()<NEIGHFLAG,EVFLAG>(TagPairTersoffComputeHalf<NEIGHFLAG,EVFLAG>(), ii, ev);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeFullA<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
+
+  const int i = d_ilist[ii];
+  const X_FLOAT xtmp = x(i,0);
+  const X_FLOAT ytmp = x(i,1);
+  const X_FLOAT ztmp = x(i,2);
+  const int itype = type(i);
+
+  int j,k,jj,kk,jtype,ktype;
+  F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij;
+  F_FLOAT fi[3], fj[3], fk[3];
+  X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2;
+
+  //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
+  const int jnum = d_numneigh[i];
+
+  // repulsive
+
+  for (jj = 0; jj < jnum; jj++) {
+    j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    const int jtype = type(j);
+
+    const X_FLOAT delx = xtmp - x(j,0);
+    const X_FLOAT dely = ytmp - x(j,1);
+    const X_FLOAT delz = ztmp - x(j,2);
+    const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+    const F_FLOAT cutsq = paramskk(itype,jtype,jtype).cutsq;
+
+    if (rsq > cutsq) continue;
+
+    const F_FLOAT r = sqrt(rsq);
+    const F_FLOAT tmp_fce = ters_fc_k(itype,jtype,jtype,r);
+    const F_FLOAT tmp_fcd = ters_dfc(itype,jtype,jtype,r);
+    const F_FLOAT tmp_exp = exp(-paramskk(itype,jtype,jtype).lam1 * r);
+    const F_FLOAT frep = -paramskk(itype,jtype,jtype).biga * tmp_exp * 
+	    		  (tmp_fcd - tmp_fce*paramskk(itype,jtype,jtype).lam1) / r;
+    const F_FLOAT eng = tmp_fce * paramskk(itype,jtype,jtype).biga * tmp_exp;
+
+    f(i,0) += delx*frep;
+    f(i,1) += dely*frep;
+    f(i,2) += delz*frep;
+
+    if (EVFLAG) {
+      if (eflag)
+        ev.evdwl += 0.5*eng;
+      if (vflag_either || eflag_atom) 
+	this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,frep,delx,dely,delz);
+    }
+  }
+
+  // attractive: bond order
+ 
+  for (jj = 0; jj < jnum; jj++) {
+    j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    jtype = type(j);
+
+    delx1 = xtmp - x(j,0);
+    dely1 = ytmp - x(j,1);
+    delz1 = ztmp - x(j,2);
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    cutsq1 = paramskk(itype,jtype,jtype).cutsq;
+
+    bo_ij = 0.0;
+    if (rsq1 > cutsq1) continue;
+    rij = sqrt(rsq1);
+    
+    for (kk = 0; kk < jnum; kk++) {
+      if (jj == kk) continue;
+      k = d_neighbors(i,kk);
+      k &= NEIGHMASK;
+      ktype = type(k);
+
+      delx2 = xtmp - x(k,0);
+      dely2 = ytmp - x(k,1);
+      delz2 = ztmp - x(k,2);
+      rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+      cutsq2 = paramskk(itype,jtype,ktype).cutsq;
+
+      if (rsq2 > cutsq2) continue;
+      rik = sqrt(rsq2);
+      bo_ij += bondorder(itype,jtype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2);
+    }
+
+    // attractive: pairwise potential and force
+
+    const F_FLOAT fa = ters_fa_k(itype,jtype,jtype,rij);
+    const F_FLOAT dfa = ters_dfa(itype,jtype,jtype,rij);
+    const F_FLOAT bij = ters_bij_k(itype,jtype,jtype,bo_ij);
+    const F_FLOAT fatt = -0.5*bij * dfa / rij;
+    const F_FLOAT prefactor = 0.5*fa * ters_dbij(itype,jtype,jtype,bo_ij);
+    const F_FLOAT eng = 0.5*bij * fa;
+
+    f(i,0) += delx1*fatt;
+    f(i,1) += dely1*fatt;
+    f(i,2) += delz1*fatt;
+
+    if (EVFLAG) {
+      if (eflag) ev.evdwl += 0.5*eng;
+      if (vflag_either || eflag_atom) 
+	this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,fatt,delx1,dely1,delz1);
+    }
+
+    // attractive: three-body force
+    
+    for (kk = 0; kk < jnum; kk++) {
+      if (jj == kk) continue;
+      k = d_neighbors(i,kk);
+      k &= NEIGHMASK;
+      ktype = type(k);
+
+      delx2 = xtmp - x(k,0);
+      dely2 = ytmp - x(k,1);
+      delz2 = ztmp - x(k,2);
+      rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+      cutsq2 = paramskk(itype,jtype,ktype).cutsq;
+
+      if (rsq2 > cutsq2) continue;
+      rik = sqrt(rsq2);
+      ters_dthb(itype,jtype,ktype,prefactor,rij,delx1,dely1,delz1,
+		rik,delx2,dely2,delz2,fi,fj,fk);
+
+      f(i,0) += fi[0];
+      f(i,1) += fi[1];
+      f(i,2) += fi[2];
+
+      if (vflag_atom) {
+	F_FLOAT delrij[3], delrik[3];
+	delrij[0] = -delx1; delrij[1] = -dely1; delrij[2] = -delz1;
+	delrik[0] = -delx2; delrik[1] = -dely2; delrik[2] = -delz2;
+	if (vflag_either) this->template v_tally3<NEIGHFLAG>(ev,i,j,k,fj,fk,delrij,delrik);
+      }
+    }
+  }
+}
+
+template<class DeviceType>
+template<int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeFullA<NEIGHFLAG,EVFLAG>, const int &ii) const {
+  EV_FLOAT ev;
+  this->template operator()<NEIGHFLAG,EVFLAG>(TagPairTersoffComputeFullA<NEIGHFLAG,EVFLAG>(), ii, ev);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeFullB<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
+
+  const int i = d_ilist[ii];
+  const X_FLOAT xtmp = x(i,0);
+  const X_FLOAT ytmp = x(i,1);
+  const X_FLOAT ztmp = x(i,2);
+  const int itype = type(i);
+
+  int j,k,jj,kk,jtype,ktype,j_jnum;
+  F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij;
+  F_FLOAT fj[3], fk[3];
+  X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2;
+
+  const int jnum = d_numneigh[i];
+
+  // attractive: bond order
+ 
+  for (jj = 0; jj < jnum; jj++) {
+    j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    if (j >= nlocal) continue;
+    jtype = type(j);
+
+    delx1 = x(j,0) - xtmp;
+    dely1 = x(j,1) - ytmp;
+    delz1 = x(j,2) - ztmp;
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    cutsq1 = paramskk(jtype,itype,itype).cutsq;
+
+    bo_ij = 0.0;
+    if (rsq1 > cutsq1) continue;
+    rij = sqrt(rsq1);
+
+    j_jnum = d_numneigh[j];
+    
+    for (kk = 0; kk < j_jnum; kk++) {
+      k = d_neighbors(j,kk);
+      if (k == i) continue;
+      k &= NEIGHMASK;
+      ktype = type(k);
+
+      delx2 = x(j,0) - x(k,0);
+      dely2 = x(j,1) - x(k,1);
+      delz2 = x(j,2) - x(k,2);
+      rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+      cutsq2 = paramskk(jtype,itype,ktype).cutsq;
+
+      if (rsq2 > cutsq2) continue;
+      rik = sqrt(rsq2);
+      bo_ij += bondorder(jtype,itype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2);
+
+    }
+
+    // attractive: pairwise potential and force
+
+    const F_FLOAT fa = ters_fa_k(jtype,itype,itype,rij);
+    const F_FLOAT dfa = ters_dfa(jtype,itype,itype,rij);
+    const F_FLOAT bij = ters_bij_k(jtype,itype,itype,bo_ij);
+    const F_FLOAT fatt = -0.5*bij * dfa / rij;
+    const F_FLOAT prefactor = 0.5*fa * ters_dbij(jtype,itype,itype,bo_ij);
+    const F_FLOAT eng = 0.5*bij * fa;
+
+    f(i,0) -= delx1*fatt;
+    f(i,1) -= dely1*fatt;
+    f(i,2) -= delz1*fatt;
+
+    if (EVFLAG) {
+      if (eflag) 
+        ev.evdwl += 0.5 * eng;
+      if (vflag_either || eflag_atom) 
+	this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,fatt,delx1,dely1,delz1);
+    }
+
+    // attractive: three-body force
+    
+    for (kk = 0; kk < j_jnum; kk++) {
+      k = d_neighbors(j,kk);
+      if (k == i) continue;
+      k &= NEIGHMASK;
+      ktype = type(k);
+
+      delx2 = x(j,0) - x(k,0);
+      dely2 = x(j,1) - x(k,1);
+      delz2 = x(j,2) - x(k,2);
+      rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+      cutsq2 = paramskk(jtype,itype,ktype).cutsq;
+
+      if (rsq2 > cutsq2) continue;
+      rik = sqrt(rsq2);
+      ters_dthbj(jtype,itype,ktype,prefactor,rij,delx1,dely1,delz1,
+		rik,delx2,dely2,delz2,fj,fk);
+      f(i,0) += fj[0];
+      f(i,1) += fj[1];
+      f(i,2) += fj[2];
+
+      if (vflag_atom) {
+	F_FLOAT delrji[3], delrjk[3];
+	delrji[0] = -delx1; delrji[1] = -dely1; delrji[2] = -delz1;
+	delrjk[0] = -delx2; delrjk[1] = -dely2; delrjk[2] = -delz2;
+	if (vflag_either) v_tally3_atom(ev,i,j,k,fj,fk,delrji,delrjk);
+      }
+
+      const F_FLOAT fa_jk = ters_fa_k(jtype,ktype,itype,rik);
+      const F_FLOAT prefactor_jk = 0.5*fa_jk * ters_dbij(jtype,ktype,itype,bo_ij);
+      ters_dthbk(jtype,ktype,itype,prefactor_jk,rik,delx2,dely2,delz2,
+		rij,delx1,dely1,delz1,fk);
+      f(i,0) += fk[0];
+      f(i,1) += fk[1];
+      f(i,2) += fk[2];
+    }
+  }
+}
+
+template<class DeviceType>
+template<int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeFullB<NEIGHFLAG,EVFLAG>, const int &ii) const {
+  EV_FLOAT ev;
+  this->template operator()<NEIGHFLAG,EVFLAG>(TagPairTersoffComputeFullB<NEIGHFLAG,EVFLAG>(), ii, ev);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffKokkos<DeviceType>::ters_fc_k(const int &i, const int &j,
+		const int &k, const F_FLOAT &r) const 
+{
+  const F_FLOAT ters_R = paramskk(i,j,k).bigr;
+  const F_FLOAT ters_D = paramskk(i,j,k).bigd;
+
+  if (r < ters_R-ters_D) return 1.0;
+  if (r > ters_R+ters_D) return 0.0;
+  return 0.5*(1.0 - sin(MY_PI2*(r - ters_R)/ters_D));
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffKokkos<DeviceType>::ters_dfc(const int &i, const int &j,
+		const int &k, const F_FLOAT &r) const 
+{
+  const F_FLOAT ters_R = paramskk(i,j,k).bigr;
+  const F_FLOAT ters_D = paramskk(i,j,k).bigd;
+
+  if (r < ters_R-ters_D) return 0.0;
+  if (r > ters_R+ters_D) return 0.0;
+  return -(MY_PI4/ters_D) * cos(MY_PI2*(r - ters_R)/ters_D);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffKokkos<DeviceType>::bondorder(const int &i, const int &j, const int &k,
+	const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
+	const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2) const 
+{
+  F_FLOAT arg, ex_delr;
+
+  const F_FLOAT costheta = (dx1*dx2 + dy1*dy2 + dz1*dz2)/(rij*rik);
+
+  if (int(paramskk(i,j,k).powerm) == 3) arg = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0);
+  else arg = paramskk(i,j,k).lam3 * (rij-rik);
+
+  if (arg > 69.0776) ex_delr = 1.e30;
+  else if (arg < -69.0776) ex_delr = 0.0;
+  else ex_delr = exp(arg);
+
+  return ters_fc_k(i,j,k,rik) * ters_gijk(i,j,k,costheta) * ex_delr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffKokkos<DeviceType>::
+	ters_gijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const
+{
+  const F_FLOAT ters_c = paramskk(i,j,k).c * paramskk(i,j,k).c;
+  const F_FLOAT ters_d = paramskk(i,j,k).d * paramskk(i,j,k).d;
+  const F_FLOAT hcth = paramskk(i,j,k).h - cos;
+
+  return paramskk(i,j,k).gamma*(1.0 + ters_c/ters_d - ters_c/(ters_d+hcth*hcth));
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffKokkos<DeviceType>::
+	ters_dgijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const
+{
+
+  const F_FLOAT ters_c = paramskk(i,j,k).c * paramskk(i,j,k).c;
+  const F_FLOAT ters_d = paramskk(i,j,k).d * paramskk(i,j,k).d;
+  const F_FLOAT hcth = paramskk(i,j,k).h - cos;
+  const F_FLOAT numerator = -2.0 * ters_c * hcth;
+  const F_FLOAT denominator = 1.0/(ters_d + hcth*hcth);
+  return paramskk(i,j,k).gamma * numerator * denominator * denominator;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffKokkos<DeviceType>::ters_fa_k(const int &i, const int &j,
+		const int &k, const F_FLOAT &r) const 
+{
+  if (r > paramskk(i,j,k).bigr + paramskk(i,j,k).bigd) return 0.0;
+  return -paramskk(i,j,k).bigb * exp(-paramskk(i,j,k).lam2 * r) 
+	  * ters_fc_k(i,j,k,r);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffKokkos<DeviceType>::ters_dfa(const int &i, const int &j,
+		const int &k, const F_FLOAT &r) const 
+{
+  if (r > paramskk(i,j,k).bigr + paramskk(i,j,k).bigd) return 0.0;
+  return paramskk(i,j,k).bigb * exp(-paramskk(i,j,k).lam2 * r) *
+    (paramskk(i,j,k).lam2 * ters_fc_k(i,j,k,r) - ters_dfc(i,j,k,r));
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffKokkos<DeviceType>::ters_bij_k(const int &i, const int &j,
+		const int &k, const F_FLOAT &bo) const 
+{
+  const F_FLOAT tmp = paramskk(i,j,k).beta * bo;
+  if (tmp > paramskk(i,j,k).c1) return 1.0/sqrt(tmp);
+  if (tmp > paramskk(i,j,k).c2)
+    return (1.0 - pow(tmp,-paramskk(i,j,k).powern) / (2.0*paramskk(i,j,k).powern))/sqrt(tmp);
+  if (tmp < paramskk(i,j,k).c4) return 1.0;
+  if (tmp < paramskk(i,j,k).c3)
+    return 1.0 - pow(tmp,paramskk(i,j,k).powern)/(2.0*paramskk(i,j,k).powern);
+  return pow(1.0 + pow(tmp,paramskk(i,j,k).powern), -1.0/(2.0*paramskk(i,j,k).powern));
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffKokkos<DeviceType>::ters_dbij(const int &i, const int &j,
+		const int &k, const F_FLOAT &bo) const 
+{
+  const F_FLOAT tmp = paramskk(i,j,k).beta * bo;
+  if (tmp > paramskk(i,j,k).c1) return paramskk(i,j,k).beta * -0.5*pow(tmp,-1.5);
+  if (tmp > paramskk(i,j,k).c2)
+    return paramskk(i,j,k).beta * (-0.5*pow(tmp,-1.5) *
+           (1.0 - 0.5*(1.0 +  1.0/(2.0*paramskk(i,j,k).powern)) *
+           pow(tmp,-paramskk(i,j,k).powern)));
+  if (tmp < paramskk(i,j,k).c4) return 0.0;
+  if (tmp < paramskk(i,j,k).c3)
+    return -0.5*paramskk(i,j,k).beta * pow(tmp,paramskk(i,j,k).powern-1.0);
+
+  const F_FLOAT tmp_n = pow(tmp,paramskk(i,j,k).powern);
+  return -0.5 * pow(1.0+tmp_n, -1.0-(1.0/(2.0*paramskk(i,j,k).powern)))*tmp_n / bo;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffKokkos<DeviceType>::ters_dthb(
+	const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
+	const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
+	const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
+	F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk) const
+{
+  // from PairTersoff::attractive
+  F_FLOAT rij_hat[3],rik_hat[3];
+  F_FLOAT rijinv,rikinv;
+  F_FLOAT delrij[3], delrik[3];
+
+  delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1;
+  delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2;
+
+  //rij = sqrt(rsq1);
+  rijinv = 1.0/rij;
+  vec3_scale(rijinv,delrij,rij_hat);
+
+  //rik = sqrt(rsq2);
+  rikinv = 1.0/rik;
+  vec3_scale(rikinv,delrik,rik_hat);
+
+  // from PairTersoff::ters_zetaterm_d
+  F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp;
+  F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3];
+
+  fc = ters_fc_k(i,j,k,rik);
+  dfc = ters_dfc(i,j,k,rik);
+  if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0);
+  else tmp = paramskk(i,j,k).lam3 * (rij-rik);
+
+  if (tmp > 69.0776) ex_delr = 1.e30;
+  else if (tmp < -69.0776) ex_delr = 0.0;
+  else ex_delr = exp(tmp);
+
+  if (int(paramskk(i,j,k).powerm) == 3)
+    dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr;
+  else dex_delr = paramskk(i,j,k).lam3 * ex_delr;
+
+  cos = vec3_dot(rij_hat,rik_hat);
+  gijk = ters_gijk(i,j,k,cos);
+  dgijk = ters_dgijk(i,j,k,cos);
+
+  // from PairTersoff::costheta_d
+  vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj);
+  vec3_scale(rijinv,dcosfj,dcosfj);
+  vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk);
+  vec3_scale(rikinv,dcosfk,dcosfk);
+  vec3_add(dcosfj,dcosfk,dcosfi);
+  vec3_scale(-1.0,dcosfi,dcosfi);
+
+  vec3_scale(-dfc*gijk*ex_delr,rik_hat,fi);
+  vec3_scaleadd(fc*dgijk*ex_delr,dcosfi,fi,fi);
+  vec3_scaleadd(fc*gijk*dex_delr,rik_hat,fi,fi);
+  vec3_scaleadd(-fc*gijk*dex_delr,rij_hat,fi,fi);
+  vec3_scale(prefactor,fi,fi);
+
+  vec3_scale(fc*dgijk*ex_delr,dcosfj,fj);
+  vec3_scaleadd(fc*gijk*dex_delr,rij_hat,fj,fj);
+  vec3_scale(prefactor,fj,fj);
+
+  vec3_scale(dfc*gijk*ex_delr,rik_hat,fk);
+  vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk);
+  vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk);
+  vec3_scale(prefactor,fk,fk);
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffKokkos<DeviceType>::ters_dthbj(
+	const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
+	const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
+	const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
+	F_FLOAT *fj, F_FLOAT *fk) const
+{
+  F_FLOAT rij_hat[3],rik_hat[3];
+  F_FLOAT rijinv,rikinv;
+  F_FLOAT delrij[3], delrik[3];
+
+  delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1;
+  delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2;
+
+  rijinv = 1.0/rij;
+  vec3_scale(rijinv,delrij,rij_hat);
+
+  rikinv = 1.0/rik;
+  vec3_scale(rikinv,delrik,rik_hat);
+
+  F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp;
+  F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3];
+
+  fc = ters_fc_k(i,j,k,rik);
+  dfc = ters_dfc(i,j,k,rik);
+  if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0);
+  else tmp = paramskk(i,j,k).lam3 * (rij-rik);
+
+  if (tmp > 69.0776) ex_delr = 1.e30;
+  else if (tmp < -69.0776) ex_delr = 0.0;
+  else ex_delr = exp(tmp);
+
+  if (int(paramskk(i,j,k).powerm) == 3)
+    dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr;
+  else dex_delr = paramskk(i,j,k).lam3 * ex_delr;
+
+  cos = vec3_dot(rij_hat,rik_hat);
+  gijk = ters_gijk(i,j,k,cos);
+  dgijk = ters_dgijk(i,j,k,cos);
+
+  vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj);
+  vec3_scale(rijinv,dcosfj,dcosfj);
+  vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk);
+  vec3_scale(rikinv,dcosfk,dcosfk);
+  vec3_add(dcosfj,dcosfk,dcosfi);
+  vec3_scale(-1.0,dcosfi,dcosfi);
+
+  vec3_scale(fc*dgijk*ex_delr,dcosfj,fj);
+  vec3_scaleadd(fc*gijk*dex_delr,rij_hat,fj,fj);
+  vec3_scale(prefactor,fj,fj);
+
+  vec3_scale(dfc*gijk*ex_delr,rik_hat,fk);
+  vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk);
+  vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk);
+  vec3_scale(prefactor,fk,fk);
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffKokkos<DeviceType>::ters_dthbk(
+	const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
+	const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
+	const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
+	F_FLOAT *fk) const
+{
+  F_FLOAT rij_hat[3],rik_hat[3];
+  F_FLOAT rijinv,rikinv;
+  F_FLOAT delrij[3], delrik[3];
+
+  delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1;
+  delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2;
+
+  rijinv = 1.0/rij;
+  vec3_scale(rijinv,delrij,rij_hat);
+
+  rikinv = 1.0/rik;
+  vec3_scale(rikinv,delrik,rik_hat);
+
+  F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp;
+  F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3];
+
+  fc = ters_fc_k(i,j,k,rik);
+  dfc = ters_dfc(i,j,k,rik);
+  if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0);
+  else tmp = paramskk(i,j,k).lam3 * (rij-rik);
+
+  if (tmp > 69.0776) ex_delr = 1.e30;
+  else if (tmp < -69.0776) ex_delr = 0.0;
+  else ex_delr = exp(tmp);
+
+  if (int(paramskk(i,j,k).powerm) == 3)
+    dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr;
+  else dex_delr = paramskk(i,j,k).lam3 * ex_delr;
+
+  cos = vec3_dot(rij_hat,rik_hat);
+  gijk = ters_gijk(i,j,k,cos);
+  dgijk = ters_dgijk(i,j,k,cos);
+
+  vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj);
+  vec3_scale(rijinv,dcosfj,dcosfj);
+  vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk);
+  vec3_scale(rikinv,dcosfk,dcosfk);
+  vec3_add(dcosfj,dcosfk,dcosfi);
+  vec3_scale(-1.0,dcosfi,dcosfi);
+
+  vec3_scale(dfc*gijk*ex_delr,rik_hat,fk);
+  vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk);
+  vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk);
+  vec3_scale(prefactor,fk,fk);
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<int NEIGHFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &j,
+      const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
+                const F_FLOAT &dely, const F_FLOAT &delz) const
+{
+  const int VFLAG = vflag_either;
+
+  // The eatom and vatom arrays are atomic for Half/Thread neighbor style
+  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
+  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
+
+  if (eflag_atom) {
+    const E_FLOAT epairhalf = 0.5 * epair;
+    v_eatom[i] += epairhalf;
+    if (NEIGHFLAG != FULL) v_eatom[j] += epairhalf;
+  }
+
+  if (VFLAG) {
+    const E_FLOAT v0 = delx*delx*fpair;
+    const E_FLOAT v1 = dely*dely*fpair;
+    const E_FLOAT v2 = delz*delz*fpair;
+    const E_FLOAT v3 = delx*dely*fpair;
+    const E_FLOAT v4 = delx*delz*fpair;
+    const E_FLOAT v5 = dely*delz*fpair;
+
+    if (vflag_global) {
+      if (NEIGHFLAG != FULL) {
+        ev.v[0] += v0;
+        ev.v[1] += v1;
+        ev.v[2] += v2;
+        ev.v[3] += v3;
+        ev.v[4] += v4;
+        ev.v[5] += v5;
+      } else {
+        ev.v[0] += 0.5*v0;
+        ev.v[1] += 0.5*v1;
+        ev.v[2] += 0.5*v2;
+        ev.v[3] += 0.5*v3;
+        ev.v[4] += 0.5*v4;
+        ev.v[5] += 0.5*v5;
+      }
+    }
+
+    if (vflag_atom) {
+      v_vatom(i,0) += 0.5*v0;
+      v_vatom(i,1) += 0.5*v1;
+      v_vatom(i,2) += 0.5*v2;
+      v_vatom(i,3) += 0.5*v3;
+      v_vatom(i,4) += 0.5*v4;
+      v_vatom(i,5) += 0.5*v5;
+
+      if (NEIGHFLAG != FULL) {
+        v_vatom(j,0) += 0.5*v0;
+        v_vatom(j,1) += 0.5*v1;
+        v_vatom(j,2) += 0.5*v2;
+        v_vatom(j,3) += 0.5*v3;
+        v_vatom(j,4) += 0.5*v4;
+        v_vatom(j,5) += 0.5*v5;
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<int NEIGHFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffKokkos<DeviceType>::v_tally3(EV_FLOAT &ev, const int &i, const int &j, const int &k, 
+	F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const
+{
+
+  // The eatom and vatom arrays are atomic for Half/Thread neighbor style
+  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
+
+  F_FLOAT v[6];
+
+  v[0] = THIRD * (drij[0]*fj[0] + drik[0]*fk[0]);
+  v[1] = THIRD * (drij[1]*fj[1] + drik[1]*fk[1]);
+  v[2] = THIRD * (drij[2]*fj[2] + drik[2]*fk[2]);
+  v[3] = THIRD * (drij[0]*fj[1] + drik[0]*fk[1]);
+  v[4] = THIRD * (drij[0]*fj[2] + drik[0]*fk[2]);
+  v[5] = THIRD * (drij[1]*fj[2] + drik[1]*fk[2]);
+
+  if (vflag_global) {
+    ev.v[0] += v[0];
+    ev.v[1] += v[1];
+    ev.v[2] += v[2];
+    ev.v[3] += v[3];
+    ev.v[4] += v[4];
+    ev.v[5] += v[5];
+  }
+
+  if (vflag_atom) {
+    v_vatom(i,0) += v[0]; v_vatom(i,1) += v[1]; v_vatom(i,2) += v[2];
+    v_vatom(i,3) += v[3]; v_vatom(i,4) += v[4]; v_vatom(i,5) += v[5];
+    if (NEIGHFLAG != FULL) {
+      v_vatom(j,0) += v[0]; v_vatom(j,1) += v[1]; v_vatom(j,2) += v[2];
+      v_vatom(j,3) += v[3]; v_vatom(j,4) += v[4]; v_vatom(j,5) += v[5];
+      v_vatom(k,0) += v[0]; v_vatom(k,1) += v[1]; v_vatom(k,2) += v[2];
+      v_vatom(k,3) += v[3]; v_vatom(k,4) += v[4]; v_vatom(k,5) += v[5];
+    }
+  }
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffKokkos<DeviceType>::v_tally3_atom(EV_FLOAT &ev, const int &i, const int &j, const int &k,
+        F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drjk) const
+{
+  F_FLOAT v[6];
+
+  v[0] = THIRD * (drji[0]*fj[0] + drjk[0]*fk[0]);
+  v[1] = THIRD * (drji[1]*fj[1] + drjk[1]*fk[1]);
+  v[2] = THIRD * (drji[2]*fj[2] + drjk[2]*fk[2]);
+  v[3] = THIRD * (drji[0]*fj[1] + drjk[0]*fk[1]);
+  v[4] = THIRD * (drji[0]*fj[2] + drjk[0]*fk[2]);
+  v[5] = THIRD * (drji[1]*fj[2] + drjk[1]*fk[2]);
+
+  if (vflag_global) {
+    ev.v[0] += v[0];
+    ev.v[1] += v[1];
+    ev.v[2] += v[2];
+    ev.v[3] += v[3];
+    ev.v[4] += v[4];
+    ev.v[5] += v[5];
+  }
+
+  if (vflag_atom) {
+    d_vatom(i,0) += v[0]; d_vatom(i,1) += v[1]; d_vatom(i,2) += v[2];
+    d_vatom(i,3) += v[3]; d_vatom(i,4) += v[4]; d_vatom(i,5) += v[5];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION 
+int PairTersoffKokkos<DeviceType>::sbmask(const int& j) const {
+  return j >> SBBITS & 3;
+}
+
+template class PairTersoffKokkos<LMPDeviceType>;
+#ifdef KOKKOS_HAVE_CUDA
+template class PairTersoffKokkos<LMPHostType>;
+#endif
diff --git a/src/KOKKOS/pair_tersoff_kokkos.h b/src/KOKKOS/pair_tersoff_kokkos.h
new file mode 100755
index 0000000000..ea02e374be
--- /dev/null
+++ b/src/KOKKOS/pair_tersoff_kokkos.h
@@ -0,0 +1,220 @@
+/* -*- c++ -*- ----------------------------------------------------------
+
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(tersoff/kk,PairTersoffKokkos<LMPDeviceType>)
+PairStyle(tersoff/kk/device,PairTersoffKokkos<LMPDeviceType>)
+PairStyle(tersoff/kk/host,PairTersoffKokkos<LMPHostType>)
+
+#else
+
+#ifndef LMP_PAIR_TERSOFF_KOKKOS_H
+#define LMP_PAIR_TERSOFF_KOKKOS_H
+
+#include "stdio.h"
+#include "pair_kokkos.h"
+#include "pair_tersoff.h"
+#include "neigh_list_kokkos.h"
+
+namespace LAMMPS_NS {
+
+template<int NEIGHFLAG, int EVFLAG>
+struct TagPairTersoffComputeHalf{};
+
+template<int NEIGHFLAG, int EVFLAG>
+struct TagPairTersoffComputeFullA{};
+
+template<int NEIGHFLAG, int EVFLAG>
+struct TagPairTersoffComputeFullB{};
+
+template<class DeviceType>
+class PairTersoffKokkos : public PairTersoff {
+ public:
+  enum {EnabledNeighFlags=FULL};
+  enum {COUL_FLAG=0};
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typedef EV_FLOAT value_type;
+
+  PairTersoffKokkos(class LAMMPS *);
+  virtual ~PairTersoffKokkos();
+  virtual void compute(int, int);
+  void init_style();
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairTersoffComputeHalf<NEIGHFLAG,EVFLAG>, const int&, EV_FLOAT&) const;
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairTersoffComputeHalf<NEIGHFLAG,EVFLAG>, const int&) const;
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairTersoffComputeFullA<NEIGHFLAG,EVFLAG>, const int&, EV_FLOAT&) const;
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairTersoffComputeFullA<NEIGHFLAG,EVFLAG>, const int&) const;
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairTersoffComputeFullB<NEIGHFLAG,EVFLAG>, const int&, EV_FLOAT&) const;
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairTersoffComputeFullB<NEIGHFLAG,EVFLAG>, const int&) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double ters_fc_k(const int &i, const int &j, const int &k, const F_FLOAT &r) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double ters_dfc(const int &i, const int &j, const int &k, const F_FLOAT &r) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double ters_fa_k(const int &i, const int &j, const int &k, const F_FLOAT &r) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double ters_dfa(const int &i, const int &j, const int &k, const F_FLOAT &r) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double ters_bij_k(const int &i, const int &j, const int &k, const F_FLOAT &bo) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double ters_dbij(const int &i, const int &j, const int &k, const F_FLOAT &bo) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double bondorder(const int &i, const int &j, const int &k,
+	      const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
+	      const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double ters_gijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double ters_dgijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void ters_dthb(const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
+	      const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
+	      const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
+	      F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void ters_dthbj(const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
+	      const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
+	      const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
+	      F_FLOAT *fj, F_FLOAT *fk) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void ters_dthbk(const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
+	      const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
+	      const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
+	      F_FLOAT *fk) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double vec3_dot(const F_FLOAT x[3], const double y[3]) const {
+    return x[0]*y[0] + x[1]*y[1] + x[2]*y[2];
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void vec3_add(const F_FLOAT x[3], const double y[3], double * const z) const {
+    z[0] = x[0]+y[0]; z[1] = x[1]+y[1]; z[2] = x[2]+y[2];
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void vec3_scale(const F_FLOAT k, const double x[3], double y[3]) const {
+    y[0] = k*x[0]; y[1] = k*x[1]; y[2] = k*x[2];
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void vec3_scaleadd(const F_FLOAT k, const double x[3], const double y[3], double * const z) const {
+    z[0] = k*x[0]+y[0]; z[1] = k*x[1]+y[1]; z[2] = k*x[2]+y[2];
+  }
+	        
+  KOKKOS_INLINE_FUNCTION
+  int sbmask(const int& j) const;
+
+  struct params_ters{
+    params_ters(){powerm=0;gamma=0;lam3=0;c=0;d=0;h=0;powern=0;beta=0;lam2=0;bigb=0;
+	    	  bigr=0;bigd=0;lam1=0;biga=0;cutsq=0;c1=0;c2=0;c3=0;c4=0;};
+    params_ters(int i){powerm=0;gamma=0;lam3=0;c=0;d=0;h=0;powern=0;beta=0;lam2=0;bigb=0;
+	    	  bigr=0;bigd=0;lam1=0;biga=0;cutsq=0;c1=0;c2=0;c3=0;c4=0;};
+    F_FLOAT powerm, gamma, lam3, c, d, h, powern, beta, lam2, bigb, bigr, 
+	    bigd, lam1, biga, cutsq, c1, c2, c3, c4;
+  };
+
+  template<int NEIGHFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
+      const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
+                  const F_FLOAT &dely, const F_FLOAT &delz) const;
+
+  template<int NEIGHFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void v_tally3(EV_FLOAT &ev, const int &i, const int &j, const int &k, 
+		F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void v_tally3_atom(EV_FLOAT &ev, const int &i, const int &j, const int &k, 
+		F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drjk) const;
+
+  void allocate();
+  void setup();
+
+ protected:
+  void cleanup_copy();
+
+  typedef Kokkos::DualView<int***,DeviceType> tdual_int_3d;
+  Kokkos::DualView<params_ters***,Kokkos::LayoutRight,DeviceType> k_params;
+  typename Kokkos::DualView<params_ters***,
+    Kokkos::LayoutRight,DeviceType>::t_dev_const paramskk;
+  // hardwired to space for 15 atom types
+  //params_ters m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+
+  typename AT::t_x_array_randomread x;
+  typename AT::t_f_array f;
+  typename AT::t_int_1d_randomread type;
+  typename AT::t_tagint_1d tag;
+
+  DAT::tdual_efloat_1d k_eatom;
+  DAT::tdual_virial_array k_vatom;
+  DAT::t_efloat_1d d_eatom;
+  DAT::t_virial_array d_vatom;
+
+  typedef Kokkos::DualView<F_FLOAT**[7],Kokkos::LayoutRight,DeviceType> tdual_ffloat_2d_n7;
+  typedef typename tdual_ffloat_2d_n7::t_dev_const_randomread t_ffloat_2d_n7_randomread;
+  typedef typename tdual_ffloat_2d_n7::t_host t_host_ffloat_2d_n7;
+
+  typename ArrayTypes<DeviceType>::t_neighbors_2d d_neighbors;
+  typename ArrayTypes<DeviceType>::t_int_1d_randomread d_ilist;
+  typename ArrayTypes<DeviceType>::t_int_1d_randomread d_numneigh;
+  //NeighListKokkos<DeviceType> k_list;
+
+  class AtomKokkos *atomKK;
+  int neighflag,newton_pair;
+  int nlocal,nall,eflag,vflag;
+
+  friend void pair_virial_fdotr_compute<PairTersoffKokkos>(PairTersoffKokkos*);
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+*/
diff --git a/src/KOKKOS/pair_tersoff_mod_kokkos.cpp b/src/KOKKOS/pair_tersoff_mod_kokkos.cpp
new file mode 100755
index 0000000000..ff84e2d392
--- /dev/null
+++ b/src/KOKKOS/pair_tersoff_mod_kokkos.cpp
@@ -0,0 +1,1208 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Ray Shan (SNL)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "pair_tersoff_mod_kokkos.h"
+#include "kokkos.h"
+#include "atom_kokkos.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_request.h"
+#include "neigh_list_kokkos.h"
+#include "update.h"
+#include "integrate.h"
+#include "respa.h"
+#include "math_const.h"
+#include "memory.h"
+#include "error.h"
+#include "atom_masks.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define KOKKOS_CUDA_MAX_THREADS 256
+#define KOKKOS_CUDA_MIN_BLOCKS 8
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairTersoffMODKokkos<DeviceType>::PairTersoffMODKokkos(LAMMPS *lmp) : PairTersoffMOD(lmp)
+{
+  THIRD = 1.0/3.0;
+
+  respa_enable = 0;
+
+  atomKK = (AtomKokkos *) atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK;
+  datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairTersoffMODKokkos<DeviceType>::~PairTersoffMODKokkos()
+{
+  if (!copymode) {
+    memory->destroy_kokkos(k_eatom,eatom);
+    memory->destroy_kokkos(k_vatom,vatom);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTersoffMODKokkos<DeviceType>::allocate()
+{
+  PairTersoffMOD::allocate();
+
+  int n = atom->ntypes;
+
+  k_params = Kokkos::DualView<params_ters***,Kokkos::LayoutRight,DeviceType>
+	  ("PairTersoffMOD::paramskk",n+1,n+1,n+1);
+  paramskk = k_params.d_view;
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTersoffMODKokkos<DeviceType>::init_style()
+{
+  PairTersoffMOD::init_style();
+
+  // irequest = neigh request made by parent class
+
+  neighflag = lmp->kokkos->neighflag;
+  int irequest = neighbor->nrequest - 1;
+
+  neighbor->requests[irequest]->
+    kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
+    !Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
+  neighbor->requests[irequest]->
+    kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
+
+  if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
+    neighbor->requests[irequest]->full = 1;
+    neighbor->requests[irequest]->half = 0;
+    neighbor->requests[irequest]->full_cluster = 0;
+    if (neighflag == FULL)
+      neighbor->requests[irequest]->ghost = 1;
+    else
+      neighbor->requests[irequest]->ghost = 0;
+  } else {
+    error->all(FLERR,"Cannot use chosen neighbor list style with tersoff/kk");
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTersoffMODKokkos<DeviceType>::setup()
+{
+  PairTersoffMOD::setup();
+
+  int i,j,k,m;
+  int n = atom->ntypes;
+
+  for (i = 1; i <= n; i++)
+    for (j = 1; j <= n; j++)
+      for (k = 1; k <= n; k++) {
+	m = elem2param[i-1][j-1][k-1];
+	k_params.h_view(i,j,k).powerm = params[m].powerm;
+	k_params.h_view(i,j,k).lam3 = params[m].lam3;
+	k_params.h_view(i,j,k).h = params[m].h;
+	k_params.h_view(i,j,k).powern = params[m].powern;
+	k_params.h_view(i,j,k).beta = params[m].beta;
+	k_params.h_view(i,j,k).lam2 = params[m].lam2;
+	k_params.h_view(i,j,k).bigb = params[m].bigb;
+	k_params.h_view(i,j,k).bigr = params[m].bigr;
+	k_params.h_view(i,j,k).bigd = params[m].bigd;
+	k_params.h_view(i,j,k).lam1 = params[m].lam1;
+	k_params.h_view(i,j,k).biga = params[m].biga;
+	k_params.h_view(i,j,k).cutsq = params[m].cutsq;
+	k_params.h_view(i,j,k).c1 = params[m].c1;
+	k_params.h_view(i,j,k).c2 = params[m].c2;
+	k_params.h_view(i,j,k).c3 = params[m].c3;
+	k_params.h_view(i,j,k).c4 = params[m].c4;
+	k_params.h_view(i,j,k).c5 = params[m].c5;
+	k_params.h_view(i,j,k).ca1 = params[m].ca1;
+	k_params.h_view(i,j,k).ca4 = params[m].ca4;
+	k_params.h_view(i,j,k).powern_del = params[m].powern_del;
+      }
+
+  k_params.template modify<LMPHostType>();
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTersoffMODKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
+{
+  eflag = eflag_in;
+  vflag = vflag_in;
+
+  if (neighflag == FULL) no_virial_fdotr_compute = 1;
+
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = vflag_fdotr = 0;
+
+  // reallocate per-atom arrays if necessary
+
+  if (eflag_atom) {
+    memory->destroy_kokkos(k_eatom,eatom);
+    memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
+    d_eatom = k_eatom.d_view;
+  }
+  if (vflag_atom) {
+    memory->destroy_kokkos(k_vatom,vatom);
+    memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom");
+    d_vatom = k_vatom.d_view;
+  }
+
+  atomKK->sync(execution_space,datamask_read);
+  k_params.template sync<DeviceType>();
+  if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
+  else atomKK->modified(execution_space,F_MASK);
+
+  x = atomKK->k_x.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  type = atomKK->k_type.view<DeviceType>();
+  tag = atomKK->k_tag.view<DeviceType>();
+  nlocal = atom->nlocal;
+  nall = atom->nlocal + atom->nghost;
+  newton_pair = force->newton_pair;
+
+  const int inum = list->inum;
+  const int ignum = inum + list->gnum;
+  NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
+  d_numneigh = k_list->d_numneigh;
+  d_neighbors = k_list->d_neighbors;
+  d_ilist = k_list->d_ilist;
+
+  k_list->clean_copy();
+  copymode = 1;
+
+  EV_FLOAT ev;
+  EV_FLOAT ev_all;
+
+  if (neighflag == HALF) {
+    if (evflag)
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffMODComputeHalf<HALF,1> >(0,inum),*this,ev);
+    else
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffMODComputeHalf<HALF,0> >(0,inum),*this);
+    DeviceType::fence();
+    ev_all += ev;
+  } else if (neighflag == HALFTHREAD) {
+    if (evflag)
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffMODComputeHalf<HALFTHREAD,1> >(0,inum),*this,ev);
+    else
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffMODComputeHalf<HALFTHREAD,0> >(0,inum),*this);
+    DeviceType::fence();
+    ev_all += ev;
+  } else if (neighflag == FULL) {
+    if (evflag)
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffMODComputeFullA<FULL,1> >(0,inum),*this,ev);
+    else
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffMODComputeFullA<FULL,0> >(0,inum),*this);
+    DeviceType::fence();
+    ev_all += ev;
+    
+    if (evflag)
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffMODComputeFullB<FULL,1> >(0,ignum),*this,ev);
+    else
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffMODComputeFullB<FULL,0> >(0,ignum),*this);
+    DeviceType::fence();
+    ev_all += ev;
+  }
+
+  if (eflag_global) eng_vdwl += ev_all.evdwl;
+  if (vflag_global) {
+    virial[0] += ev_all.v[0];
+    virial[1] += ev_all.v[1];
+    virial[2] += ev_all.v[2];
+    virial[3] += ev_all.v[3];
+    virial[4] += ev_all.v[4];
+    virial[5] += ev_all.v[5];
+  }
+
+  if (vflag_fdotr) pair_virial_fdotr_compute(this);
+
+  if (eflag_atom) {
+    k_eatom.template modify<DeviceType>();
+    k_eatom.template sync<LMPHostType>();
+  }
+
+  if (vflag_atom) {
+    k_vatom.template modify<DeviceType>();
+    k_vatom.template sync<LMPHostType>();
+  }
+
+  copymode = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffMODKokkos<DeviceType>::operator()(TagPairTersoffMODComputeHalf<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
+
+  // The f array is atomic for Half/Thread neighbor style
+  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
+
+  const int i = d_ilist[ii];
+  if (i >= nlocal) return;
+  const X_FLOAT xtmp = x(i,0);
+  const X_FLOAT ytmp = x(i,1);
+  const X_FLOAT ztmp = x(i,2);
+  const int itype = type(i);
+  const int itag = tag(i);
+
+  int j,k,jj,kk,jtag,jtype,ktype;
+  F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij;
+  F_FLOAT fi[3], fj[3], fk[3];
+  X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2;
+
+  //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
+  const int jnum = d_numneigh[i];
+
+  // repulsive
+
+  for (jj = 0; jj < jnum; jj++) {
+    j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    jtype = type(j);
+    jtag = tag(j);
+
+    if (itag > jtag) {
+      if ((itag+jtag) % 2 == 0) continue;
+    } else if (itag < jtag) {
+      if ((itag+jtag) % 2 == 1) continue;
+    } else {
+      if (x(j,2)  < ztmp) continue;
+      if (x(j,2) == ztmp && x(j,1)  < ytmp) continue;
+      if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) continue;
+    }
+
+    const X_FLOAT delx = xtmp - x(j,0);
+    const X_FLOAT dely = ytmp - x(j,1);
+    const X_FLOAT delz = ztmp - x(j,2);
+    const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+    const F_FLOAT cutsq = paramskk(itype,jtype,jtype).cutsq;
+
+    if (rsq > cutsq) continue;
+
+    const F_FLOAT r = sqrt(rsq);
+    const F_FLOAT tmp_fce = ters_fc_k(itype,jtype,jtype,r);
+    const F_FLOAT tmp_fcd = ters_dfc(itype,jtype,jtype,r);
+    const F_FLOAT tmp_exp = exp(-paramskk(itype,jtype,jtype).lam1 * r);
+    const F_FLOAT frep = -paramskk(itype,jtype,jtype).biga * tmp_exp * 
+	    		  (tmp_fcd - tmp_fce*paramskk(itype,jtype,jtype).lam1) / r;
+    const F_FLOAT eng = tmp_fce * paramskk(itype,jtype,jtype).biga * tmp_exp;
+
+    a_f(i,0) += delx*frep;
+    a_f(i,1) += dely*frep;
+    a_f(i,2) += delz*frep;
+    a_f(j,0) -= delx*frep;
+    a_f(j,1) -= dely*frep;
+    a_f(j,2) -= delz*frep;
+
+    if (EVFLAG) {
+      if (eflag) ev.evdwl += eng;
+      if (vflag_either || eflag_atom) this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,frep,delx,dely,delz);
+    }
+  }
+
+  // attractive: bond order
+ 
+  for (jj = 0; jj < jnum; jj++) {
+    j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    jtype = type(j);
+
+    delx1 = xtmp - x(j,0);
+    dely1 = ytmp - x(j,1);
+    delz1 = ztmp - x(j,2);
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    cutsq1 = paramskk(itype,jtype,jtype).cutsq;
+
+    bo_ij = 0.0;
+    if (rsq1 > cutsq1) continue;
+    rij = sqrt(rsq1);
+    
+    for (kk = 0; kk < jnum; kk++) {
+      if (jj == kk) continue;
+      k = d_neighbors(i,kk);
+      k &= NEIGHMASK;
+      ktype = type(k);
+
+      delx2 = xtmp - x(k,0);
+      dely2 = ytmp - x(k,1);
+      delz2 = ztmp - x(k,2);
+      rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+      cutsq2 = paramskk(itype,jtype,ktype).cutsq;
+
+      if (rsq2 > cutsq2) continue;
+      rik = sqrt(rsq2);
+      bo_ij += bondorder(itype,jtype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2);
+    }
+
+    // attractive: pairwise potential and force
+
+    const F_FLOAT fa = ters_fa_k(itype,jtype,jtype,rij);
+    const F_FLOAT dfa = ters_dfa(itype,jtype,jtype,rij);
+    const F_FLOAT bij = ters_bij_k(itype,jtype,jtype,bo_ij);
+    const F_FLOAT fatt = -0.5*bij * dfa / rij;
+    const F_FLOAT prefactor = 0.5*fa * ters_dbij(itype,jtype,jtype,bo_ij);
+    const F_FLOAT eng = 0.5*bij * fa;
+
+    a_f(i,0) += delx1*fatt;
+    a_f(i,1) += dely1*fatt;
+    a_f(i,2) += delz1*fatt;
+    a_f(j,0) -= delx1*fatt;
+    a_f(j,1) -= dely1*fatt;
+    a_f(j,2) -= delz1*fatt;
+
+    if (EVFLAG) {
+      if (eflag) ev.evdwl += eng;
+      if (vflag_either || eflag_atom) 
+	this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,fatt,delx1,dely1,delz1);
+    }
+
+    // attractive: three-body force
+    
+    for (kk = 0; kk < jnum; kk++) {
+      if (jj == kk) continue;
+      k = d_neighbors(i,kk);
+      k &= NEIGHMASK;
+      ktype = type(k);
+
+      delx2 = xtmp - x(k,0);
+      dely2 = ytmp - x(k,1);
+      delz2 = ztmp - x(k,2);
+      rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+      cutsq2 = paramskk(itype,jtype,ktype).cutsq;
+
+      if (rsq2 > cutsq2) continue;
+      rik = sqrt(rsq2);
+      ters_dthb(itype,jtype,ktype,prefactor,rij,delx1,dely1,delz1,
+		rik,delx2,dely2,delz2,fi,fj,fk);
+
+      a_f(i,0) += fi[0];
+      a_f(i,1) += fi[1];
+      a_f(i,2) += fi[2];
+      a_f(j,0) += fj[0];
+      a_f(j,1) += fj[1];
+      a_f(j,2) += fj[2];
+      a_f(k,0) += fk[0];
+      a_f(k,1) += fk[1];
+      a_f(k,2) += fk[2];
+
+      if (vflag_atom) {
+	F_FLOAT delrij[3], delrik[3];
+	delrij[0] = -delx1; delrij[1] = -dely1; delrij[2] = -delz1;
+	delrik[0] = -delx2; delrik[1] = -dely2; delrik[2] = -delz2;
+	if (vflag_either) this->template v_tally3<NEIGHFLAG>(ev,i,j,k,fj,fk,delrij,delrik);
+      }
+    }
+  }
+}
+
+template<class DeviceType>
+template<int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffMODKokkos<DeviceType>::operator()(TagPairTersoffMODComputeHalf<NEIGHFLAG,EVFLAG>, const int &ii) const {
+  EV_FLOAT ev;
+  this->template operator()<NEIGHFLAG,EVFLAG>(TagPairTersoffMODComputeHalf<NEIGHFLAG,EVFLAG>(), ii, ev);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffMODKokkos<DeviceType>::operator()(TagPairTersoffMODComputeFullA<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
+
+  const int i = d_ilist[ii];
+  const X_FLOAT xtmp = x(i,0);
+  const X_FLOAT ytmp = x(i,1);
+  const X_FLOAT ztmp = x(i,2);
+  const int itype = type(i);
+
+  int j,k,jj,kk,jtype,ktype;
+  F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij;
+  F_FLOAT fi[3], fj[3], fk[3];
+  X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2;
+
+  //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
+  const int jnum = d_numneigh[i];
+
+  // repulsive
+
+  for (jj = 0; jj < jnum; jj++) {
+    j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    const int jtype = type(j);
+
+    const X_FLOAT delx = xtmp - x(j,0);
+    const X_FLOAT dely = ytmp - x(j,1);
+    const X_FLOAT delz = ztmp - x(j,2);
+    const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+    const F_FLOAT cutsq = paramskk(itype,jtype,jtype).cutsq;
+
+    if (rsq > cutsq) continue;
+
+    const F_FLOAT r = sqrt(rsq);
+    const F_FLOAT tmp_fce = ters_fc_k(itype,jtype,jtype,r);
+    const F_FLOAT tmp_fcd = ters_dfc(itype,jtype,jtype,r);
+    const F_FLOAT tmp_exp = exp(-paramskk(itype,jtype,jtype).lam1 * r);
+    const F_FLOAT frep = -paramskk(itype,jtype,jtype).biga * tmp_exp * 
+	    		  (tmp_fcd - tmp_fce*paramskk(itype,jtype,jtype).lam1) / r;
+    const F_FLOAT eng = tmp_fce * paramskk(itype,jtype,jtype).biga * tmp_exp;
+
+    f(i,0) += delx*frep;
+    f(i,1) += dely*frep;
+    f(i,2) += delz*frep;
+
+    if (EVFLAG) {
+      if (eflag)
+        ev.evdwl += 0.5*eng;
+      if (vflag_either || eflag_atom) 
+	this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,frep,delx,dely,delz);
+    }
+  }
+
+  // attractive: bond order
+ 
+  for (jj = 0; jj < jnum; jj++) {
+    j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    jtype = type(j);
+
+    delx1 = xtmp - x(j,0);
+    dely1 = ytmp - x(j,1);
+    delz1 = ztmp - x(j,2);
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    cutsq1 = paramskk(itype,jtype,jtype).cutsq;
+
+    bo_ij = 0.0;
+    if (rsq1 > cutsq1) continue;
+    rij = sqrt(rsq1);
+    
+    for (kk = 0; kk < jnum; kk++) {
+      if (jj == kk) continue;
+      k = d_neighbors(i,kk);
+      k &= NEIGHMASK;
+      ktype = type(k);
+
+      delx2 = xtmp - x(k,0);
+      dely2 = ytmp - x(k,1);
+      delz2 = ztmp - x(k,2);
+      rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+      cutsq2 = paramskk(itype,jtype,ktype).cutsq;
+
+      if (rsq2 > cutsq2) continue;
+      rik = sqrt(rsq2);
+      bo_ij += bondorder(itype,jtype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2);
+    }
+
+    // attractive: pairwise potential and force
+
+    const F_FLOAT fa = ters_fa_k(itype,jtype,jtype,rij);
+    const F_FLOAT dfa = ters_dfa(itype,jtype,jtype,rij);
+    const F_FLOAT bij = ters_bij_k(itype,jtype,jtype,bo_ij);
+    const F_FLOAT fatt = -0.5*bij * dfa / rij;
+    const F_FLOAT prefactor = 0.5*fa * ters_dbij(itype,jtype,jtype,bo_ij);
+    const F_FLOAT eng = 0.5*bij * fa;
+
+    f(i,0) += delx1*fatt;
+    f(i,1) += dely1*fatt;
+    f(i,2) += delz1*fatt;
+
+    if (EVFLAG) {
+      if (eflag) ev.evdwl += 0.5*eng;
+      if (vflag_either || eflag_atom) 
+	this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,fatt,delx1,dely1,delz1);
+    }
+
+    // attractive: three-body force
+    
+    for (kk = 0; kk < jnum; kk++) {
+      if (jj == kk) continue;
+      k = d_neighbors(i,kk);
+      k &= NEIGHMASK;
+      ktype = type(k);
+
+      delx2 = xtmp - x(k,0);
+      dely2 = ytmp - x(k,1);
+      delz2 = ztmp - x(k,2);
+      rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+      cutsq2 = paramskk(itype,jtype,ktype).cutsq;
+
+      if (rsq2 > cutsq2) continue;
+      rik = sqrt(rsq2);
+      ters_dthb(itype,jtype,ktype,prefactor,rij,delx1,dely1,delz1,
+		rik,delx2,dely2,delz2,fi,fj,fk);
+
+      f(i,0) += fi[0];
+      f(i,1) += fi[1];
+      f(i,2) += fi[2];
+
+      if (vflag_atom) {
+	F_FLOAT delrij[3], delrik[3];
+	delrij[0] = -delx1; delrij[1] = -dely1; delrij[2] = -delz1;
+	delrik[0] = -delx2; delrik[1] = -dely2; delrik[2] = -delz2;
+	if (vflag_either) this->template v_tally3<NEIGHFLAG>(ev,i,j,k,fj,fk,delrij,delrik);
+      }
+    }
+  }
+}
+
+template<class DeviceType>
+template<int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffMODKokkos<DeviceType>::operator()(TagPairTersoffMODComputeFullA<NEIGHFLAG,EVFLAG>, const int &ii) const {
+  EV_FLOAT ev;
+  this->template operator()<NEIGHFLAG,EVFLAG>(TagPairTersoffMODComputeFullA<NEIGHFLAG,EVFLAG>(), ii, ev);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffMODKokkos<DeviceType>::operator()(TagPairTersoffMODComputeFullB<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
+
+  const int i = d_ilist[ii];
+  const X_FLOAT xtmp = x(i,0);
+  const X_FLOAT ytmp = x(i,1);
+  const X_FLOAT ztmp = x(i,2);
+  const int itype = type(i);
+
+  int j,k,jj,kk,jtype,ktype,j_jnum;
+  F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij;
+  F_FLOAT fj[3], fk[3];
+  X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2;
+
+  const int jnum = d_numneigh[i];
+
+  // attractive: bond order
+ 
+  for (jj = 0; jj < jnum; jj++) {
+    j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    if (j >= nlocal) continue;
+    jtype = type(j);
+
+    delx1 = x(j,0) - xtmp;
+    dely1 = x(j,1) - ytmp;
+    delz1 = x(j,2) - ztmp;
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    cutsq1 = paramskk(jtype,itype,itype).cutsq;
+
+    bo_ij = 0.0;
+    if (rsq1 > cutsq1) continue;
+    rij = sqrt(rsq1);
+
+    j_jnum = d_numneigh[j];
+    
+    for (kk = 0; kk < j_jnum; kk++) {
+      k = d_neighbors(j,kk);
+      if (k == i) continue;
+      k &= NEIGHMASK;
+      ktype = type(k);
+
+      delx2 = x(j,0) - x(k,0);
+      dely2 = x(j,1) - x(k,1);
+      delz2 = x(j,2) - x(k,2);
+      rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+      cutsq2 = paramskk(jtype,itype,ktype).cutsq;
+
+      if (rsq2 > cutsq2) continue;
+      rik = sqrt(rsq2);
+      bo_ij += bondorder(jtype,itype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2);
+
+    }
+
+    // attractive: pairwise potential and force
+
+    const F_FLOAT fa = ters_fa_k(jtype,itype,itype,rij);
+    const F_FLOAT dfa = ters_dfa(jtype,itype,itype,rij);
+    const F_FLOAT bij = ters_bij_k(jtype,itype,itype,bo_ij);
+    const F_FLOAT fatt = -0.5*bij * dfa / rij;
+    const F_FLOAT prefactor = 0.5*fa * ters_dbij(jtype,itype,itype,bo_ij);
+    const F_FLOAT eng = 0.5*bij * fa;
+
+    f(i,0) -= delx1*fatt;
+    f(i,1) -= dely1*fatt;
+    f(i,2) -= delz1*fatt;
+
+    if (EVFLAG) {
+      if (eflag) 
+        ev.evdwl += 0.5 * eng;
+      if (vflag_either || eflag_atom) 
+	this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,fatt,delx1,dely1,delz1);
+    }
+
+    // attractive: three-body force
+    
+    for (kk = 0; kk < j_jnum; kk++) {
+      k = d_neighbors(j,kk);
+      if (k == i) continue;
+      k &= NEIGHMASK;
+      ktype = type(k);
+
+      delx2 = x(j,0) - x(k,0);
+      dely2 = x(j,1) - x(k,1);
+      delz2 = x(j,2) - x(k,2);
+      rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+      cutsq2 = paramskk(jtype,itype,ktype).cutsq;
+
+      if (rsq2 > cutsq2) continue;
+      rik = sqrt(rsq2);
+      ters_dthbj(jtype,itype,ktype,prefactor,rij,delx1,dely1,delz1,
+		rik,delx2,dely2,delz2,fj,fk);
+      f(i,0) += fj[0];
+      f(i,1) += fj[1];
+      f(i,2) += fj[2];
+
+      if (vflag_atom) {
+	F_FLOAT delrji[3], delrjk[3];
+	delrji[0] = -delx1; delrji[1] = -dely1; delrji[2] = -delz1;
+	delrjk[0] = -delx2; delrjk[1] = -dely2; delrjk[2] = -delz2;
+	if (vflag_either) v_tally3_atom(ev,i,j,k,fj,fk,delrji,delrjk);
+      }
+
+      const F_FLOAT fa_jk = ters_fa_k(jtype,ktype,itype,rik);
+      const F_FLOAT prefactor_jk = 0.5*fa_jk * ters_dbij(jtype,ktype,itype,bo_ij);
+      ters_dthbk(jtype,ktype,itype,prefactor_jk,rik,delx2,dely2,delz2,
+		rij,delx1,dely1,delz1,fk);
+      f(i,0) += fk[0];
+      f(i,1) += fk[1];
+      f(i,2) += fk[2];
+    }
+  }
+}
+
+template<class DeviceType>
+template<int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffMODKokkos<DeviceType>::operator()(TagPairTersoffMODComputeFullB<NEIGHFLAG,EVFLAG>, const int &ii) const {
+  EV_FLOAT ev;
+  this->template operator()<NEIGHFLAG,EVFLAG>(TagPairTersoffMODComputeFullB<NEIGHFLAG,EVFLAG>(), ii, ev);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffMODKokkos<DeviceType>::ters_fc_k(const int &i, const int &j,
+		const int &k, const F_FLOAT &r) const 
+{
+  const F_FLOAT ters_R = paramskk(i,j,k).bigr;
+  const F_FLOAT ters_D = paramskk(i,j,k).bigd;
+
+  if (r < ters_R-ters_D) return 1.0;
+  if (r > ters_R+ters_D) return 0.0;
+  return 0.5*(1.0 - 1.125*sin(MY_PI2*(r - ters_R)/ters_D) - 
+              0.125*sin(3.0*MY_PI2*(r - ters_R)/ters_D));
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffMODKokkos<DeviceType>::ters_dfc(const int &i, const int &j,
+		const int &k, const F_FLOAT &r) const 
+{
+  const F_FLOAT ters_R = paramskk(i,j,k).bigr;
+  const F_FLOAT ters_D = paramskk(i,j,k).bigd;
+
+  if (r < ters_R-ters_D) return 0.0;
+  if (r > ters_R+ters_D) return 0.0;
+  return -(0.375*MY_PI4/ters_D) * (3.0*cos(MY_PI2*(r - ters_R)/ters_D) + 
+                                   cos(3.0*MY_PI2*(r - ters_R)/ters_D));
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffMODKokkos<DeviceType>::bondorder(const int &i, const int &j, const int &k,
+	const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
+	const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2) const 
+{
+  F_FLOAT arg, ex_delr;
+
+  const F_FLOAT costheta = (dx1*dx2 + dy1*dy2 + dz1*dz2)/(rij*rik);
+
+  if (int(paramskk(i,j,k).powerm) == 3) arg = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0);
+  else arg = paramskk(i,j,k).lam3 * (rij-rik);
+
+  if (arg > 69.0776) ex_delr = 1.e30;
+  else if (arg < -69.0776) ex_delr = 0.0;
+  else ex_delr = exp(arg);
+
+  return ters_fc_k(i,j,k,rik) * ters_gijk(i,j,k,costheta) * ex_delr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffMODKokkos<DeviceType>::
+	ters_gijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const
+{
+  const F_FLOAT ters_c1 = paramskk(i,j,k).c1;
+  const F_FLOAT ters_c2 = paramskk(i,j,k).c2;
+  const F_FLOAT ters_c3 = paramskk(i,j,k).c3;
+  const F_FLOAT ters_c4 = paramskk(i,j,k).c4;
+  const F_FLOAT ters_c5 = paramskk(i,j,k).c5;
+  const F_FLOAT tmp_h = (paramskk(i,j,k).h - cos)*(paramskk(i,j,k).h - cos);
+
+  return ters_c1 + (ters_c2*tmp_h/(ters_c3 + tmp_h)) * 
+      (1.0 + ters_c4*exp(-ters_c5*tmp_h));
+ 
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffMODKokkos<DeviceType>::
+	ters_dgijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const
+{
+  const F_FLOAT ters_c2 = paramskk(i,j,k).c2;
+  const F_FLOAT ters_c3 = paramskk(i,j,k).c3;
+  const F_FLOAT ters_c4 = paramskk(i,j,k).c4;
+  const F_FLOAT ters_c5 = paramskk(i,j,k).c5;
+  const F_FLOAT tmp_h = (paramskk(i,j,k).h - cos)*(paramskk(i,j,k).h - cos);
+  const F_FLOAT g1 = (paramskk(i,j,k).h - cos)/(ters_c3 + tmp_h);
+  const F_FLOAT g2 = exp(-ters_c5*tmp_h);
+
+  return -2.0*ters_c2*g1*((1 + ters_c4*g2)*(1 + g1*(cos - paramskk(i,j,k).h)) - 
+                            tmp_h*ters_c4*ters_c5*g2);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffMODKokkos<DeviceType>::ters_fa_k(const int &i, const int &j,
+		const int &k, const F_FLOAT &r) const 
+{
+  if (r > paramskk(i,j,k).bigr + paramskk(i,j,k).bigd) return 0.0;
+  return -paramskk(i,j,k).bigb * exp(-paramskk(i,j,k).lam2 * r) 
+	  * ters_fc_k(i,j,k,r);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffMODKokkos<DeviceType>::ters_dfa(const int &i, const int &j,
+		const int &k, const F_FLOAT &r) const 
+{
+  if (r > paramskk(i,j,k).bigr + paramskk(i,j,k).bigd) return 0.0;
+  return paramskk(i,j,k).bigb * exp(-paramskk(i,j,k).lam2 * r) *
+    (paramskk(i,j,k).lam2 * ters_fc_k(i,j,k,r) - ters_dfc(i,j,k,r));
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffMODKokkos<DeviceType>::ters_bij_k(const int &i, const int &j,
+		const int &k, const F_FLOAT &bo) const 
+{
+  const F_FLOAT tmp = paramskk(i,j,k).beta * bo;
+  if (tmp > paramskk(i,j,k).ca1) 
+    return pow(tmp, -paramskk(i,j,k).powern/(2.0*paramskk(i,j,k).powern_del));
+  if (tmp < paramskk(i,j,k).ca4) 
+    return 1.0;
+  return pow(1.0 + pow(tmp,paramskk(i,j,k).powern), -1.0/(2.0*paramskk(i,j,k).powern_del));
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffMODKokkos<DeviceType>::ters_dbij(const int &i, const int &j,
+		const int &k, const F_FLOAT &bo) const 
+{
+  const F_FLOAT tmp = paramskk(i,j,k).beta * bo;
+  if (tmp > paramskk(i,j,k).ca1) 
+    return -0.5*(paramskk(i,j,k).powern/paramskk(i,j,k).powern_del)*
+	  pow(tmp,-0.5*(paramskk(i,j,k).powern/paramskk(i,j,k).powern_del)) / bo;
+  if (tmp < paramskk(i,j,k).ca4) 
+    return 0.0;
+			  
+  const F_FLOAT tmp_n = pow(tmp,paramskk(i,j,k).powern);
+  return -0.5 *(paramskk(i,j,k).powern/paramskk(i,j,k).powern_del)* 
+	  pow(1.0+tmp_n, -1.0-(1.0/(2.0*paramskk(i,j,k).powern_del)))*tmp_n / bo;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffMODKokkos<DeviceType>::ters_dthb(
+	const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
+	const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
+	const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
+	F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk) const
+{
+  // from PairTersoffMOD::attractive
+  F_FLOAT rij_hat[3],rik_hat[3];
+  F_FLOAT rijinv,rikinv;
+  F_FLOAT delrij[3], delrik[3];
+
+  delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1;
+  delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2;
+
+  //rij = sqrt(rsq1);
+  rijinv = 1.0/rij;
+  vec3_scale(rijinv,delrij,rij_hat);
+
+  //rik = sqrt(rsq2);
+  rikinv = 1.0/rik;
+  vec3_scale(rikinv,delrik,rik_hat);
+
+  // from PairTersoffMOD::ters_zetaterm_d
+  F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp;
+  F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3];
+
+  fc = ters_fc_k(i,j,k,rik);
+  dfc = ters_dfc(i,j,k,rik);
+  if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0);
+  else tmp = paramskk(i,j,k).lam3 * (rij-rik);
+
+  if (tmp > 69.0776) ex_delr = 1.e30;
+  else if (tmp < -69.0776) ex_delr = 0.0;
+  else ex_delr = exp(tmp);
+
+  if (int(paramskk(i,j,k).powerm) == 3)
+    dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr;
+  else dex_delr = paramskk(i,j,k).lam3 * ex_delr;
+
+  cos = vec3_dot(rij_hat,rik_hat);
+  gijk = ters_gijk(i,j,k,cos);
+  dgijk = ters_dgijk(i,j,k,cos);
+
+  // from PairTersoffMOD::costheta_d
+  vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj);
+  vec3_scale(rijinv,dcosfj,dcosfj);
+  vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk);
+  vec3_scale(rikinv,dcosfk,dcosfk);
+  vec3_add(dcosfj,dcosfk,dcosfi);
+  vec3_scale(-1.0,dcosfi,dcosfi);
+
+  vec3_scale(-dfc*gijk*ex_delr,rik_hat,fi);
+  vec3_scaleadd(fc*dgijk*ex_delr,dcosfi,fi,fi);
+  vec3_scaleadd(fc*gijk*dex_delr,rik_hat,fi,fi);
+  vec3_scaleadd(-fc*gijk*dex_delr,rij_hat,fi,fi);
+  vec3_scale(prefactor,fi,fi);
+
+  vec3_scale(fc*dgijk*ex_delr,dcosfj,fj);
+  vec3_scaleadd(fc*gijk*dex_delr,rij_hat,fj,fj);
+  vec3_scale(prefactor,fj,fj);
+
+  vec3_scale(dfc*gijk*ex_delr,rik_hat,fk);
+  vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk);
+  vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk);
+  vec3_scale(prefactor,fk,fk);
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffMODKokkos<DeviceType>::ters_dthbj(
+	const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
+	const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
+	const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
+	F_FLOAT *fj, F_FLOAT *fk) const
+{
+  F_FLOAT rij_hat[3],rik_hat[3];
+  F_FLOAT rijinv,rikinv;
+  F_FLOAT delrij[3], delrik[3];
+
+  delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1;
+  delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2;
+
+  rijinv = 1.0/rij;
+  vec3_scale(rijinv,delrij,rij_hat);
+
+  rikinv = 1.0/rik;
+  vec3_scale(rikinv,delrik,rik_hat);
+
+  F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp;
+  F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3];
+
+  fc = ters_fc_k(i,j,k,rik);
+  dfc = ters_dfc(i,j,k,rik);
+  if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0);
+  else tmp = paramskk(i,j,k).lam3 * (rij-rik);
+
+  if (tmp > 69.0776) ex_delr = 1.e30;
+  else if (tmp < -69.0776) ex_delr = 0.0;
+  else ex_delr = exp(tmp);
+
+  if (int(paramskk(i,j,k).powerm) == 3)
+    dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr;
+  else dex_delr = paramskk(i,j,k).lam3 * ex_delr;
+
+  cos = vec3_dot(rij_hat,rik_hat);
+  gijk = ters_gijk(i,j,k,cos);
+  dgijk = ters_dgijk(i,j,k,cos);
+
+  vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj);
+  vec3_scale(rijinv,dcosfj,dcosfj);
+  vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk);
+  vec3_scale(rikinv,dcosfk,dcosfk);
+  vec3_add(dcosfj,dcosfk,dcosfi);
+  vec3_scale(-1.0,dcosfi,dcosfi);
+
+  vec3_scale(fc*dgijk*ex_delr,dcosfj,fj);
+  vec3_scaleadd(fc*gijk*dex_delr,rij_hat,fj,fj);
+  vec3_scale(prefactor,fj,fj);
+
+  vec3_scale(dfc*gijk*ex_delr,rik_hat,fk);
+  vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk);
+  vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk);
+  vec3_scale(prefactor,fk,fk);
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffMODKokkos<DeviceType>::ters_dthbk(
+	const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
+	const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
+	const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
+	F_FLOAT *fk) const
+{
+  F_FLOAT rij_hat[3],rik_hat[3];
+  F_FLOAT rijinv,rikinv;
+  F_FLOAT delrij[3], delrik[3];
+
+  delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1;
+  delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2;
+
+  rijinv = 1.0/rij;
+  vec3_scale(rijinv,delrij,rij_hat);
+
+  rikinv = 1.0/rik;
+  vec3_scale(rikinv,delrik,rik_hat);
+
+  F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp;
+  F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3];
+
+  fc = ters_fc_k(i,j,k,rik);
+  dfc = ters_dfc(i,j,k,rik);
+  if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0);
+  else tmp = paramskk(i,j,k).lam3 * (rij-rik);
+
+  if (tmp > 69.0776) ex_delr = 1.e30;
+  else if (tmp < -69.0776) ex_delr = 0.0;
+  else ex_delr = exp(tmp);
+
+  if (int(paramskk(i,j,k).powerm) == 3)
+    dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr;
+  else dex_delr = paramskk(i,j,k).lam3 * ex_delr;
+
+  cos = vec3_dot(rij_hat,rik_hat);
+  gijk = ters_gijk(i,j,k,cos);
+  dgijk = ters_dgijk(i,j,k,cos);
+
+  vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj);
+  vec3_scale(rijinv,dcosfj,dcosfj);
+  vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk);
+  vec3_scale(rikinv,dcosfk,dcosfk);
+  vec3_add(dcosfj,dcosfk,dcosfi);
+  vec3_scale(-1.0,dcosfi,dcosfi);
+
+  vec3_scale(dfc*gijk*ex_delr,rik_hat,fk);
+  vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk);
+  vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk);
+  vec3_scale(prefactor,fk,fk);
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<int NEIGHFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffMODKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &j,
+      const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
+                const F_FLOAT &dely, const F_FLOAT &delz) const
+{
+  const int VFLAG = vflag_either;
+
+  // The eatom and vatom arrays are atomic for Half/Thread neighbor style
+  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
+  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
+
+  if (eflag_atom) {
+    const E_FLOAT epairhalf = 0.5 * epair;
+    v_eatom[i] += epairhalf;
+    if (NEIGHFLAG != FULL) v_eatom[j] += epairhalf;
+  }
+
+  if (VFLAG) {
+    const E_FLOAT v0 = delx*delx*fpair;
+    const E_FLOAT v1 = dely*dely*fpair;
+    const E_FLOAT v2 = delz*delz*fpair;
+    const E_FLOAT v3 = delx*dely*fpair;
+    const E_FLOAT v4 = delx*delz*fpair;
+    const E_FLOAT v5 = dely*delz*fpair;
+
+    if (vflag_global) {
+      if (NEIGHFLAG != FULL) {
+        ev.v[0] += v0;
+        ev.v[1] += v1;
+        ev.v[2] += v2;
+        ev.v[3] += v3;
+        ev.v[4] += v4;
+        ev.v[5] += v5;
+      } else {
+        ev.v[0] += 0.5*v0;
+        ev.v[1] += 0.5*v1;
+        ev.v[2] += 0.5*v2;
+        ev.v[3] += 0.5*v3;
+        ev.v[4] += 0.5*v4;
+        ev.v[5] += 0.5*v5;
+      }
+    }
+
+    if (vflag_atom) {
+      v_vatom(i,0) += 0.5*v0;
+      v_vatom(i,1) += 0.5*v1;
+      v_vatom(i,2) += 0.5*v2;
+      v_vatom(i,3) += 0.5*v3;
+      v_vatom(i,4) += 0.5*v4;
+      v_vatom(i,5) += 0.5*v5;
+
+      if (NEIGHFLAG != FULL) {
+        v_vatom(j,0) += 0.5*v0;
+        v_vatom(j,1) += 0.5*v1;
+        v_vatom(j,2) += 0.5*v2;
+        v_vatom(j,3) += 0.5*v3;
+        v_vatom(j,4) += 0.5*v4;
+        v_vatom(j,5) += 0.5*v5;
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<int NEIGHFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffMODKokkos<DeviceType>::v_tally3(EV_FLOAT &ev, const int &i, const int &j, const int &k, 
+	F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const
+{
+
+  // The eatom and vatom arrays are atomic for Half/Thread neighbor style
+  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
+
+  F_FLOAT v[6];
+
+  v[0] = THIRD * (drij[0]*fj[0] + drik[0]*fk[0]);
+  v[1] = THIRD * (drij[1]*fj[1] + drik[1]*fk[1]);
+  v[2] = THIRD * (drij[2]*fj[2] + drik[2]*fk[2]);
+  v[3] = THIRD * (drij[0]*fj[1] + drik[0]*fk[1]);
+  v[4] = THIRD * (drij[0]*fj[2] + drik[0]*fk[2]);
+  v[5] = THIRD * (drij[1]*fj[2] + drik[1]*fk[2]);
+
+  if (vflag_global) {
+    ev.v[0] += v[0];
+    ev.v[1] += v[1];
+    ev.v[2] += v[2];
+    ev.v[3] += v[3];
+    ev.v[4] += v[4];
+    ev.v[5] += v[5];
+  }
+
+  if (vflag_atom) {
+    v_vatom(i,0) += v[0]; v_vatom(i,1) += v[1]; v_vatom(i,2) += v[2];
+    v_vatom(i,3) += v[3]; v_vatom(i,4) += v[4]; v_vatom(i,5) += v[5];
+    if (NEIGHFLAG != FULL) {
+      v_vatom(j,0) += v[0]; v_vatom(j,1) += v[1]; v_vatom(j,2) += v[2];
+      v_vatom(j,3) += v[3]; v_vatom(j,4) += v[4]; v_vatom(j,5) += v[5];
+      v_vatom(k,0) += v[0]; v_vatom(k,1) += v[1]; v_vatom(k,2) += v[2];
+      v_vatom(k,3) += v[3]; v_vatom(k,4) += v[4]; v_vatom(k,5) += v[5];
+    }
+  }
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffMODKokkos<DeviceType>::v_tally3_atom(EV_FLOAT &ev, const int &i, const int &j, const int &k,
+        F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drjk) const
+{
+  F_FLOAT v[6];
+
+  v[0] = THIRD * (drji[0]*fj[0] + drjk[0]*fk[0]);
+  v[1] = THIRD * (drji[1]*fj[1] + drjk[1]*fk[1]);
+  v[2] = THIRD * (drji[2]*fj[2] + drjk[2]*fk[2]);
+  v[3] = THIRD * (drji[0]*fj[1] + drjk[0]*fk[1]);
+  v[4] = THIRD * (drji[0]*fj[2] + drjk[0]*fk[2]);
+  v[5] = THIRD * (drji[1]*fj[2] + drjk[1]*fk[2]);
+
+  if (vflag_global) {
+    ev.v[0] += v[0];
+    ev.v[1] += v[1];
+    ev.v[2] += v[2];
+    ev.v[3] += v[3];
+    ev.v[4] += v[4];
+    ev.v[5] += v[5];
+  }
+
+  if (vflag_atom) {
+    d_vatom(i,0) += v[0]; d_vatom(i,1) += v[1]; d_vatom(i,2) += v[2];
+    d_vatom(i,3) += v[3]; d_vatom(i,4) += v[4]; d_vatom(i,5) += v[5];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION 
+int PairTersoffMODKokkos<DeviceType>::sbmask(const int& j) const {
+  return j >> SBBITS & 3;
+}
+
+template class PairTersoffMODKokkos<LMPDeviceType>;
+#ifdef KOKKOS_HAVE_CUDA
+template class PairTersoffMODKokkos<LMPHostType>;
+#endif
diff --git a/src/KOKKOS/pair_tersoff_mod_kokkos.h b/src/KOKKOS/pair_tersoff_mod_kokkos.h
new file mode 100755
index 0000000000..e6b66a4b22
--- /dev/null
+++ b/src/KOKKOS/pair_tersoff_mod_kokkos.h
@@ -0,0 +1,220 @@
+/* -*- c++ -*- ----------------------------------------------------------
+
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(tersoff/mod/kk,PairTersoffMODKokkos<LMPDeviceType>)
+PairStyle(tersoff/mod/kk/device,PairTersoffMODKokkos<LMPDeviceType>)
+PairStyle(tersoff/mod/kk/host,PairTersoffMODKokkos<LMPHostType>)
+
+#else
+
+#ifndef LMP_PAIR_TERSOFF_MOD_KOKKOS_H
+#define LMP_PAIR_TERSOFF_MOD_KOKKOS_H
+
+#include "stdio.h"
+#include "pair_kokkos.h"
+#include "pair_tersoff_mod.h"
+#include "neigh_list_kokkos.h"
+
+namespace LAMMPS_NS {
+
+template<int NEIGHFLAG, int EVFLAG>
+struct TagPairTersoffMODComputeHalf{};
+
+template<int NEIGHFLAG, int EVFLAG>
+struct TagPairTersoffMODComputeFullA{};
+
+template<int NEIGHFLAG, int EVFLAG>
+struct TagPairTersoffMODComputeFullB{};
+
+template<class DeviceType>
+class PairTersoffMODKokkos : public PairTersoffMOD {
+ public:
+  enum {EnabledNeighFlags=FULL};
+  enum {COUL_FLAG=0};
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typedef EV_FLOAT value_type;
+
+  PairTersoffMODKokkos(class LAMMPS *);
+  virtual ~PairTersoffMODKokkos();
+  virtual void compute(int, int);
+  void init_style();
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairTersoffMODComputeHalf<NEIGHFLAG,EVFLAG>, const int&, EV_FLOAT&) const;
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairTersoffMODComputeHalf<NEIGHFLAG,EVFLAG>, const int&) const;
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairTersoffMODComputeFullA<NEIGHFLAG,EVFLAG>, const int&, EV_FLOAT&) const;
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairTersoffMODComputeFullA<NEIGHFLAG,EVFLAG>, const int&) const;
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairTersoffMODComputeFullB<NEIGHFLAG,EVFLAG>, const int&, EV_FLOAT&) const;
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairTersoffMODComputeFullB<NEIGHFLAG,EVFLAG>, const int&) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double ters_fc_k(const int &i, const int &j, const int &k, const F_FLOAT &r) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double ters_dfc(const int &i, const int &j, const int &k, const F_FLOAT &r) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double ters_fa_k(const int &i, const int &j, const int &k, const F_FLOAT &r) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double ters_dfa(const int &i, const int &j, const int &k, const F_FLOAT &r) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double ters_bij_k(const int &i, const int &j, const int &k, const F_FLOAT &bo) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double ters_dbij(const int &i, const int &j, const int &k, const F_FLOAT &bo) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double bondorder(const int &i, const int &j, const int &k,
+	      const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
+	      const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double ters_gijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double ters_dgijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void ters_dthb(const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
+	      const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
+	      const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
+	      F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void ters_dthbj(const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
+	      const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
+	      const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
+	      F_FLOAT *fj, F_FLOAT *fk) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void ters_dthbk(const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
+	      const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
+	      const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
+	      F_FLOAT *fk) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double vec3_dot(const F_FLOAT x[3], const double y[3]) const {
+    return x[0]*y[0] + x[1]*y[1] + x[2]*y[2];
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void vec3_add(const F_FLOAT x[3], const double y[3], double * const z) const {
+    z[0] = x[0]+y[0]; z[1] = x[1]+y[1]; z[2] = x[2]+y[2];
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void vec3_scale(const F_FLOAT k, const double x[3], double y[3]) const {
+    y[0] = k*x[0]; y[1] = k*x[1]; y[2] = k*x[2];
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void vec3_scaleadd(const F_FLOAT k, const double x[3], const double y[3], double * const z) const {
+    z[0] = k*x[0]+y[0]; z[1] = k*x[1]+y[1]; z[2] = k*x[2]+y[2];
+  }
+	        
+  KOKKOS_INLINE_FUNCTION
+  int sbmask(const int& j) const;
+
+  struct params_ters{
+    params_ters(){powerm=0;lam3=0;h=0;powern=0;beta=0;lam2=0;bigb=0;bigr=0;bigd=0;
+	    lam1=0;biga=0;powern_del=0;cutsq=0;c1=0;c2=0;c3=0;c4=0;c5=0;ca1=0;ca4=0;};
+    params_ters(int i){powerm=0;lam3=0;h=0;powern=0;beta=0;lam2=0;bigb=0;bigr=0;bigd=0;
+	    lam1=0;biga=0;powern_del=0;cutsq=0;c1=0;c2=0;c3=0;c4=0;c5=0;ca1=0;ca4=0;};
+    F_FLOAT powerm, lam3, h, powern, beta, lam2, bigb, bigr, bigd, 
+	    lam1, biga, powern_del, cutsq, c1, c2, c3, c4, c5, ca1, ca4;
+  };
+
+  template<int NEIGHFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
+      const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
+                  const F_FLOAT &dely, const F_FLOAT &delz) const;
+
+  template<int NEIGHFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void v_tally3(EV_FLOAT &ev, const int &i, const int &j, const int &k, 
+		F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void v_tally3_atom(EV_FLOAT &ev, const int &i, const int &j, const int &k, 
+		F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drjk) const;
+
+  void allocate();
+  void setup();
+
+ protected:
+  void cleanup_copy();
+
+  typedef Kokkos::DualView<int***,DeviceType> tdual_int_3d;
+  Kokkos::DualView<params_ters***,Kokkos::LayoutRight,DeviceType> k_params;
+  typename Kokkos::DualView<params_ters***,
+    Kokkos::LayoutRight,DeviceType>::t_dev_const paramskk;
+  // hardwired to space for 15 atom types
+  //params_ters m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+
+  typename AT::t_x_array_randomread x;
+  typename AT::t_f_array f;
+  typename AT::t_int_1d_randomread type;
+  typename AT::t_tagint_1d tag;
+
+  DAT::tdual_efloat_1d k_eatom;
+  DAT::tdual_virial_array k_vatom;
+  DAT::t_efloat_1d d_eatom;
+  DAT::t_virial_array d_vatom;
+
+  typedef Kokkos::DualView<F_FLOAT**[7],Kokkos::LayoutRight,DeviceType> tdual_ffloat_2d_n7;
+  typedef typename tdual_ffloat_2d_n7::t_dev_const_randomread t_ffloat_2d_n7_randomread;
+  typedef typename tdual_ffloat_2d_n7::t_host t_host_ffloat_2d_n7;
+
+  typename ArrayTypes<DeviceType>::t_neighbors_2d d_neighbors;
+  typename ArrayTypes<DeviceType>::t_int_1d_randomread d_ilist;
+  typename ArrayTypes<DeviceType>::t_int_1d_randomread d_numneigh;
+  //NeighListKokkos<DeviceType> k_list;
+
+  class AtomKokkos *atomKK;
+  int neighflag,newton_pair;
+  int nlocal,nall,eflag,vflag;
+
+  friend void pair_virial_fdotr_compute<PairTersoffMODKokkos>(PairTersoffMODKokkos*);
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+*/
diff --git a/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp
new file mode 100755
index 0000000000..12e69e6802
--- /dev/null
+++ b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp
@@ -0,0 +1,1302 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Ray Shan (SNL)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "pair_tersoff_zbl_kokkos.h"
+#include "kokkos.h"
+#include "atom_kokkos.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_request.h"
+#include "neigh_list_kokkos.h"
+#include "update.h"
+#include "integrate.h"
+#include "respa.h"
+#include "math_const.h"
+#include "memory.h"
+#include "error.h"
+#include "atom_masks.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define KOKKOS_CUDA_MAX_THREADS 256
+#define KOKKOS_CUDA_MIN_BLOCKS 8
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairTersoffZBLKokkos<DeviceType>::PairTersoffZBLKokkos(LAMMPS *lmp) : PairTersoffZBL(lmp)
+{
+  THIRD = 1.0/3.0;
+
+  respa_enable = 0;
+
+  atomKK = (AtomKokkos *) atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK;
+  datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
+
+  if (strcmp(update->unit_style,"metal") == 0) {
+    global_a_0 = 0.529;
+    global_epsilon_0 = 0.00552635;
+    global_e = 1.0;
+  } else if (strcmp(update->unit_style,"real") == 0) {
+    global_a_0 = 0.529;
+    global_epsilon_0 = 0.00552635 * 0.043365121;
+    global_e = 1.0;
+  } else error->all(FLERR,"Pair tersoff/zbl/kk requires metal or real units");
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairTersoffZBLKokkos<DeviceType>::~PairTersoffZBLKokkos()
+{
+  if (!copymode) {
+    memory->destroy_kokkos(k_eatom,eatom);
+    memory->destroy_kokkos(k_vatom,vatom);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTersoffZBLKokkos<DeviceType>::allocate()
+{
+  PairTersoffZBL::allocate();
+
+  int n = atom->ntypes;
+
+  k_params = Kokkos::DualView<params_ters***,Kokkos::LayoutRight,DeviceType>
+	  ("PairTersoffZBL::paramskk",n+1,n+1,n+1);
+  paramskk = k_params.d_view;
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTersoffZBLKokkos<DeviceType>::init_style()
+{
+  PairTersoffZBL::init_style();
+
+  // irequest = neigh request made by parent class
+
+  neighflag = lmp->kokkos->neighflag;
+  int irequest = neighbor->nrequest - 1;
+
+  neighbor->requests[irequest]->
+    kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
+    !Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
+  neighbor->requests[irequest]->
+    kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
+
+  if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
+    neighbor->requests[irequest]->full = 1;
+    neighbor->requests[irequest]->half = 0;
+    neighbor->requests[irequest]->full_cluster = 0;
+    if (neighflag == FULL)
+      neighbor->requests[irequest]->ghost = 1;
+    else
+      neighbor->requests[irequest]->ghost = 0;
+  } else {
+    error->all(FLERR,"Cannot use chosen neighbor list style with tersoff/zbl/kk");
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTersoffZBLKokkos<DeviceType>::setup()
+{
+  PairTersoffZBL::setup();
+
+  int i,j,k,m;
+  int n = atom->ntypes;
+
+  for (i = 1; i <= n; i++)
+    for (j = 1; j <= n; j++)
+      for (k = 1; k <= n; k++) {
+	m = elem2param[i-1][j-1][k-1];
+	k_params.h_view(i,j,k).powerm = params[m].powerm;
+	k_params.h_view(i,j,k).gamma = params[m].gamma;
+	k_params.h_view(i,j,k).lam3 = params[m].lam3;
+	k_params.h_view(i,j,k).c = params[m].c;
+	k_params.h_view(i,j,k).d = params[m].d;
+	k_params.h_view(i,j,k).h = params[m].h;
+	k_params.h_view(i,j,k).powern = params[m].powern;
+	k_params.h_view(i,j,k).beta = params[m].beta;
+	k_params.h_view(i,j,k).lam2 = params[m].lam2;
+	k_params.h_view(i,j,k).bigb = params[m].bigb;
+	k_params.h_view(i,j,k).bigr = params[m].bigr;
+	k_params.h_view(i,j,k).bigd = params[m].bigd;
+	k_params.h_view(i,j,k).lam1 = params[m].lam1;
+	k_params.h_view(i,j,k).biga = params[m].biga;
+	k_params.h_view(i,j,k).cutsq = params[m].cutsq;
+	k_params.h_view(i,j,k).c1 = params[m].c1;
+	k_params.h_view(i,j,k).c2 = params[m].c2;
+	k_params.h_view(i,j,k).c3 = params[m].c3;
+	k_params.h_view(i,j,k).c4 = params[m].c4;
+	k_params.h_view(i,j,k).Z_i = params[m].Z_i;
+	k_params.h_view(i,j,k).Z_j = params[m].Z_j;
+	k_params.h_view(i,j,k).ZBLcut = params[m].ZBLcut;
+	k_params.h_view(i,j,k).ZBLexpscale = params[m].ZBLexpscale;
+      }
+
+  k_params.template modify<LMPHostType>();
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTersoffZBLKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
+{
+  eflag = eflag_in;
+  vflag = vflag_in;
+
+  if (neighflag == FULL) no_virial_fdotr_compute = 1;
+
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = vflag_fdotr = 0;
+
+  // reallocate per-atom arrays if necessary
+
+  if (eflag_atom) {
+    memory->destroy_kokkos(k_eatom,eatom);
+    memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
+    d_eatom = k_eatom.d_view;
+  }
+  if (vflag_atom) {
+    memory->destroy_kokkos(k_vatom,vatom);
+    memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom");
+    d_vatom = k_vatom.d_view;
+  }
+
+  atomKK->sync(execution_space,datamask_read);
+  k_params.template sync<DeviceType>();
+  if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
+  else atomKK->modified(execution_space,F_MASK);
+
+  x = atomKK->k_x.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  type = atomKK->k_type.view<DeviceType>();
+  tag = atomKK->k_tag.view<DeviceType>();
+  nlocal = atom->nlocal;
+  nall = atom->nlocal + atom->nghost;
+  newton_pair = force->newton_pair;
+
+  const int inum = list->inum;
+  const int ignum = inum + list->gnum;
+  NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
+  d_numneigh = k_list->d_numneigh;
+  d_neighbors = k_list->d_neighbors;
+  d_ilist = k_list->d_ilist;
+
+  k_list->clean_copy();
+  copymode = 1;
+
+  EV_FLOAT ev;
+  EV_FLOAT ev_all;
+
+  if (neighflag == HALF) {
+    if (evflag)
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeHalf<HALF,1> >(0,inum),*this,ev);
+    else
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeHalf<HALF,0> >(0,inum),*this);
+    DeviceType::fence();
+    ev_all += ev;
+  } else if (neighflag == HALFTHREAD) {
+    if (evflag)
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeHalf<HALFTHREAD,1> >(0,inum),*this,ev);
+    else
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeHalf<HALFTHREAD,0> >(0,inum),*this);
+    DeviceType::fence();
+    ev_all += ev;
+  } else if (neighflag == FULL) {
+    if (evflag)
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeFullA<FULL,1> >(0,inum),*this,ev);
+    else
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeFullA<FULL,0> >(0,inum),*this);
+    DeviceType::fence();
+    ev_all += ev;
+    
+    if (evflag)
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeFullB<FULL,1> >(0,ignum),*this,ev);
+    else
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeFullB<FULL,0> >(0,ignum),*this);
+    DeviceType::fence();
+    ev_all += ev;
+  }
+
+  if (eflag_global) eng_vdwl += ev_all.evdwl;
+  if (vflag_global) {
+    virial[0] += ev_all.v[0];
+    virial[1] += ev_all.v[1];
+    virial[2] += ev_all.v[2];
+    virial[3] += ev_all.v[3];
+    virial[4] += ev_all.v[4];
+    virial[5] += ev_all.v[5];
+  }
+
+  if (vflag_fdotr) pair_virial_fdotr_compute(this);
+
+  if (eflag_atom) {
+    k_eatom.template modify<DeviceType>();
+    k_eatom.template sync<LMPHostType>();
+  }
+
+  if (vflag_atom) {
+    k_vatom.template modify<DeviceType>();
+    k_vatom.template sync<LMPHostType>();
+  }
+
+  copymode = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffZBLKokkos<DeviceType>::operator()(TagPairTersoffZBLComputeHalf<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
+
+  // The f array is atomic for Half/Thread neighbor style
+  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
+
+  const int i = d_ilist[ii];
+  if (i >= nlocal) return;
+  const X_FLOAT xtmp = x(i,0);
+  const X_FLOAT ytmp = x(i,1);
+  const X_FLOAT ztmp = x(i,2);
+  const int itype = type(i);
+  const int itag = tag(i);
+
+  int j,k,jj,kk,jtag,jtype,ktype;
+  F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij;
+  F_FLOAT fi[3], fj[3], fk[3];
+  X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2;
+
+  //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
+  const int jnum = d_numneigh[i];
+
+  // repulsive
+
+  for (jj = 0; jj < jnum; jj++) {
+    j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    jtype = type(j);
+    jtag = tag(j);
+
+    if (itag > jtag) {
+      if ((itag+jtag) % 2 == 0) continue;
+    } else if (itag < jtag) {
+      if ((itag+jtag) % 2 == 1) continue;
+    } else {
+      if (x(j,2)  < ztmp) continue;
+      if (x(j,2) == ztmp && x(j,1)  < ytmp) continue;
+      if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) continue;
+    }
+
+    const X_FLOAT delx = xtmp - x(j,0);
+    const X_FLOAT dely = ytmp - x(j,1);
+    const X_FLOAT delz = ztmp - x(j,2);
+    const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+    const F_FLOAT cutsq = paramskk(itype,jtype,jtype).cutsq;
+
+    if (rsq > cutsq) continue;
+
+    // Tersoff repulsive portion
+
+    const F_FLOAT r = sqrt(rsq);
+    const F_FLOAT tmp_fce = ters_fc_k(itype,jtype,jtype,r);
+    const F_FLOAT tmp_fcd = ters_dfc(itype,jtype,jtype,r);
+    const F_FLOAT tmp_exp = exp(-paramskk(itype,jtype,jtype).lam1 * r);
+    const F_FLOAT frep_t = paramskk(itype,jtype,jtype).biga * tmp_exp * 
+	    		  (tmp_fcd - tmp_fce*paramskk(itype,jtype,jtype).lam1);
+    const F_FLOAT eng_t = tmp_fce * paramskk(itype,jtype,jtype).biga * tmp_exp;
+
+    // ZBL repulsive portion
+
+    const F_FLOAT esq = pow(global_e,2.0);
+    const F_FLOAT a_ij = (0.8854*global_a_0) / 
+	    (pow(paramskk(itype,jtype,jtype).Z_i,0.23) + pow(paramskk(itype,jtype,jtype).Z_j,0.23));
+    const F_FLOAT premult = (paramskk(itype,jtype,jtype).Z_i * paramskk(itype,jtype,jtype).Z_j * esq)/
+	    (4.0*MY_PI*global_epsilon_0);
+    const F_FLOAT r_ov_a = r/a_ij;
+    const F_FLOAT phi = 0.1818*exp(-3.2*r_ov_a) + 0.5099*exp(-0.9423*r_ov_a) + 
+	    0.2802*exp(-0.4029*r_ov_a) + 0.02817*exp(-0.2016*r_ov_a);
+    const F_FLOAT dphi = (1.0/a_ij) * (-3.2*0.1818*exp(-3.2*r_ov_a) -
+                              0.9423*0.5099*exp(-0.9423*r_ov_a) -
+                              0.4029*0.2802*exp(-0.4029*r_ov_a) -
+                              0.2016*0.02817*exp(-0.2016*r_ov_a));
+    const F_FLOAT frep_z = premult*-phi/rsq + premult*dphi/r;
+    const F_FLOAT eng_z = premult*(1.0/r)*phi;
+
+    // combine two parts with smoothing by Fermi-like function
+
+    F_FLOAT frep, eng;
+    frep = -(-fermi_d_k(itype,jtype,jtype,r) * eng_z +
+             (1.0 - fermi_k(itype,jtype,jtype,r))*frep_z +
+             fermi_d_k(itype,jtype,jtype,r)*eng_t + fermi_k(itype,jtype,jtype,r)*frep_t) / r;
+
+    if (eflag)
+      eng = (1.0 - fermi_k(itype,jtype,jtype,r)) * eng_z + 
+	      fermi_k(itype,jtype,jtype,r) * eng_t;
+
+    a_f(i,0) += delx*frep;
+    a_f(i,1) += dely*frep;
+    a_f(i,2) += delz*frep;
+    a_f(j,0) -= delx*frep;
+    a_f(j,1) -= dely*frep;
+    a_f(j,2) -= delz*frep;
+
+    if (EVFLAG) {
+      if (eflag) ev.evdwl += eng;
+      if (vflag_either || eflag_atom) this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,frep,delx,dely,delz);
+    }
+  }
+
+  // attractive: bond order
+ 
+  for (jj = 0; jj < jnum; jj++) {
+    j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    jtype = type(j);
+
+    delx1 = xtmp - x(j,0);
+    dely1 = ytmp - x(j,1);
+    delz1 = ztmp - x(j,2);
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    cutsq1 = paramskk(itype,jtype,jtype).cutsq;
+
+    bo_ij = 0.0;
+    if (rsq1 > cutsq1) continue;
+    rij = sqrt(rsq1);
+    
+    for (kk = 0; kk < jnum; kk++) {
+      if (jj == kk) continue;
+      k = d_neighbors(i,kk);
+      k &= NEIGHMASK;
+      ktype = type(k);
+
+      delx2 = xtmp - x(k,0);
+      dely2 = ytmp - x(k,1);
+      delz2 = ztmp - x(k,2);
+      rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+      cutsq2 = paramskk(itype,jtype,ktype).cutsq;
+
+      if (rsq2 > cutsq2) continue;
+      rik = sqrt(rsq2);
+      bo_ij += bondorder(itype,jtype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2);
+    }
+
+    // attractive: pairwise potential and force
+
+    const F_FLOAT fa = ters_fa_k(itype,jtype,jtype,rij);
+    const F_FLOAT dfa = ters_dfa(itype,jtype,jtype,rij);
+    const F_FLOAT bij = ters_bij_k(itype,jtype,jtype,bo_ij);
+    const F_FLOAT fatt = -0.5*bij * dfa / rij;
+    const F_FLOAT prefactor = 0.5*fa * ters_dbij(itype,jtype,jtype,bo_ij);
+    const F_FLOAT eng = 0.5*bij * fa;
+
+    a_f(i,0) += delx1*fatt;
+    a_f(i,1) += dely1*fatt;
+    a_f(i,2) += delz1*fatt;
+    a_f(j,0) -= delx1*fatt;
+    a_f(j,1) -= dely1*fatt;
+    a_f(j,2) -= delz1*fatt;
+
+    if (EVFLAG) {
+      if (eflag) ev.evdwl += eng;
+      if (vflag_either || eflag_atom) 
+	this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,fatt,delx1,dely1,delz1);
+    }
+
+    // attractive: three-body force
+    
+    for (kk = 0; kk < jnum; kk++) {
+      if (jj == kk) continue;
+      k = d_neighbors(i,kk);
+      k &= NEIGHMASK;
+      ktype = type(k);
+
+      delx2 = xtmp - x(k,0);
+      dely2 = ytmp - x(k,1);
+      delz2 = ztmp - x(k,2);
+      rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+      cutsq2 = paramskk(itype,jtype,ktype).cutsq;
+
+      if (rsq2 > cutsq2) continue;
+      rik = sqrt(rsq2);
+      ters_dthb(itype,jtype,ktype,prefactor,rij,delx1,dely1,delz1,
+		rik,delx2,dely2,delz2,fi,fj,fk);
+
+      a_f(i,0) += fi[0];
+      a_f(i,1) += fi[1];
+      a_f(i,2) += fi[2];
+      a_f(j,0) += fj[0];
+      a_f(j,1) += fj[1];
+      a_f(j,2) += fj[2];
+      a_f(k,0) += fk[0];
+      a_f(k,1) += fk[1];
+      a_f(k,2) += fk[2];
+
+      if (vflag_atom) {
+	F_FLOAT delrij[3], delrik[3];
+	delrij[0] = -delx1; delrij[1] = -dely1; delrij[2] = -delz1;
+	delrik[0] = -delx2; delrik[1] = -dely2; delrik[2] = -delz2;
+	if (vflag_either) this->template v_tally3<NEIGHFLAG>(ev,i,j,k,fj,fk,delrij,delrik);
+      }
+    }
+  }
+}
+
+template<class DeviceType>
+template<int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffZBLKokkos<DeviceType>::operator()(TagPairTersoffZBLComputeHalf<NEIGHFLAG,EVFLAG>, const int &ii) const {
+  EV_FLOAT ev;
+  this->template operator()<NEIGHFLAG,EVFLAG>(TagPairTersoffZBLComputeHalf<NEIGHFLAG,EVFLAG>(), ii, ev);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffZBLKokkos<DeviceType>::operator()(TagPairTersoffZBLComputeFullA<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
+
+  const int i = d_ilist[ii];
+  const X_FLOAT xtmp = x(i,0);
+  const X_FLOAT ytmp = x(i,1);
+  const X_FLOAT ztmp = x(i,2);
+  const int itype = type(i);
+
+  int j,k,jj,kk,jtype,ktype;
+  F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij;
+  F_FLOAT fi[3], fj[3], fk[3];
+  X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2;
+
+  //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
+  const int jnum = d_numneigh[i];
+
+  // repulsive
+
+  for (jj = 0; jj < jnum; jj++) {
+    j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    const int jtype = type(j);
+
+    const X_FLOAT delx = xtmp - x(j,0);
+    const X_FLOAT dely = ytmp - x(j,1);
+    const X_FLOAT delz = ztmp - x(j,2);
+    const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+    const F_FLOAT cutsq = paramskk(itype,jtype,jtype).cutsq;
+
+    if (rsq > cutsq) continue;
+
+    // Tersoff repulsive portion
+
+    const F_FLOAT r = sqrt(rsq);
+    const F_FLOAT tmp_fce = ters_fc_k(itype,jtype,jtype,r);
+    const F_FLOAT tmp_fcd = ters_dfc(itype,jtype,jtype,r);
+    const F_FLOAT tmp_exp = exp(-paramskk(itype,jtype,jtype).lam1 * r);
+    const F_FLOAT frep_t = paramskk(itype,jtype,jtype).biga * tmp_exp * 
+	    		  (tmp_fcd - tmp_fce*paramskk(itype,jtype,jtype).lam1);
+    const F_FLOAT eng_t = tmp_fce * paramskk(itype,jtype,jtype).biga * tmp_exp;
+
+    // ZBL repulsive portion
+
+    const F_FLOAT esq = pow(global_e,2.0);
+    const F_FLOAT a_ij = (0.8854*global_a_0) / 
+	    (pow(paramskk(itype,jtype,jtype).Z_i,0.23) + pow(paramskk(itype,jtype,jtype).Z_j,0.23));
+    const F_FLOAT premult = (paramskk(itype,jtype,jtype).Z_i * paramskk(itype,jtype,jtype).Z_j * esq)/
+	    (4.0*MY_PI*global_epsilon_0);
+    const F_FLOAT r_ov_a = r/a_ij;
+    const F_FLOAT phi = 0.1818*exp(-3.2*r_ov_a) + 0.5099*exp(-0.9423*r_ov_a) + 
+	    0.2802*exp(-0.4029*r_ov_a) + 0.02817*exp(-0.2016*r_ov_a);
+    const F_FLOAT dphi = (1.0/a_ij) * (-3.2*0.1818*exp(-3.2*r_ov_a) -
+                              0.9423*0.5099*exp(-0.9423*r_ov_a) -
+                              0.4029*0.2802*exp(-0.4029*r_ov_a) -
+                              0.2016*0.02817*exp(-0.2016*r_ov_a));
+    const F_FLOAT frep_z = premult*-phi/rsq + premult*dphi/r;
+    const F_FLOAT eng_z = premult*(1.0/r)*phi;
+
+    // combine two parts with smoothing by Fermi-like function
+
+    F_FLOAT frep, eng;
+    frep = -(-fermi_d_k(itype,jtype,jtype,r) * eng_z +
+             (1.0 - fermi_k(itype,jtype,jtype,r))*frep_z +
+             fermi_d_k(itype,jtype,jtype,r)*eng_t + fermi_k(itype,jtype,jtype,r)*frep_t) / r;
+
+    if (eflag)
+      eng = (1.0 - fermi_k(itype,jtype,jtype,r)) * eng_z + 
+	      fermi_k(itype,jtype,jtype,r) * eng_t;
+
+    f(i,0) += delx*frep;
+    f(i,1) += dely*frep;
+    f(i,2) += delz*frep;
+
+    if (EVFLAG) {
+      if (eflag)
+        ev.evdwl += 0.5*eng;
+      if (vflag_either || eflag_atom) 
+	this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,frep,delx,dely,delz);
+    }
+  }
+
+  // attractive: bond order
+ 
+  for (jj = 0; jj < jnum; jj++) {
+    j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    jtype = type(j);
+
+    delx1 = xtmp - x(j,0);
+    dely1 = ytmp - x(j,1);
+    delz1 = ztmp - x(j,2);
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    cutsq1 = paramskk(itype,jtype,jtype).cutsq;
+
+    bo_ij = 0.0;
+    if (rsq1 > cutsq1) continue;
+    rij = sqrt(rsq1);
+    
+    for (kk = 0; kk < jnum; kk++) {
+      if (jj == kk) continue;
+      k = d_neighbors(i,kk);
+      k &= NEIGHMASK;
+      ktype = type(k);
+
+      delx2 = xtmp - x(k,0);
+      dely2 = ytmp - x(k,1);
+      delz2 = ztmp - x(k,2);
+      rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+      cutsq2 = paramskk(itype,jtype,ktype).cutsq;
+
+      if (rsq2 > cutsq2) continue;
+      rik = sqrt(rsq2);
+      bo_ij += bondorder(itype,jtype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2);
+    }
+
+    // attractive: pairwise potential and force
+
+    const F_FLOAT fa = ters_fa_k(itype,jtype,jtype,rij);
+    const F_FLOAT dfa = ters_dfa(itype,jtype,jtype,rij);
+    const F_FLOAT bij = ters_bij_k(itype,jtype,jtype,bo_ij);
+    const F_FLOAT fatt = -0.5*bij * dfa / rij;
+    const F_FLOAT prefactor = 0.5*fa * ters_dbij(itype,jtype,jtype,bo_ij);
+    const F_FLOAT eng = 0.5*bij * fa;
+
+    f(i,0) += delx1*fatt;
+    f(i,1) += dely1*fatt;
+    f(i,2) += delz1*fatt;
+
+    if (EVFLAG) {
+      if (eflag) ev.evdwl += 0.5*eng;
+      if (vflag_either || eflag_atom) 
+	this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,fatt,delx1,dely1,delz1);
+    }
+
+    // attractive: three-body force
+    
+    for (kk = 0; kk < jnum; kk++) {
+      if (jj == kk) continue;
+      k = d_neighbors(i,kk);
+      k &= NEIGHMASK;
+      ktype = type(k);
+
+      delx2 = xtmp - x(k,0);
+      dely2 = ytmp - x(k,1);
+      delz2 = ztmp - x(k,2);
+      rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+      cutsq2 = paramskk(itype,jtype,ktype).cutsq;
+
+      if (rsq2 > cutsq2) continue;
+      rik = sqrt(rsq2);
+      ters_dthb(itype,jtype,ktype,prefactor,rij,delx1,dely1,delz1,
+		rik,delx2,dely2,delz2,fi,fj,fk);
+
+      f(i,0) += fi[0];
+      f(i,1) += fi[1];
+      f(i,2) += fi[2];
+
+      if (vflag_atom) {
+	F_FLOAT delrij[3], delrik[3];
+	delrij[0] = -delx1; delrij[1] = -dely1; delrij[2] = -delz1;
+	delrik[0] = -delx2; delrik[1] = -dely2; delrik[2] = -delz2;
+	if (vflag_either) this->template v_tally3<NEIGHFLAG>(ev,i,j,k,fj,fk,delrij,delrik);
+      }
+    }
+  }
+}
+
+template<class DeviceType>
+template<int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffZBLKokkos<DeviceType>::operator()(TagPairTersoffZBLComputeFullA<NEIGHFLAG,EVFLAG>, const int &ii) const {
+  EV_FLOAT ev;
+  this->template operator()<NEIGHFLAG,EVFLAG>(TagPairTersoffZBLComputeFullA<NEIGHFLAG,EVFLAG>(), ii, ev);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffZBLKokkos<DeviceType>::operator()(TagPairTersoffZBLComputeFullB<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
+
+  const int i = d_ilist[ii];
+  const X_FLOAT xtmp = x(i,0);
+  const X_FLOAT ytmp = x(i,1);
+  const X_FLOAT ztmp = x(i,2);
+  const int itype = type(i);
+
+  int j,k,jj,kk,jtype,ktype,j_jnum;
+  F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij;
+  F_FLOAT fj[3], fk[3];
+  X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2;
+
+  const int jnum = d_numneigh[i];
+
+  // attractive: bond order
+ 
+  for (jj = 0; jj < jnum; jj++) {
+    j = d_neighbors(i,jj);
+    j &= NEIGHMASK;
+    if (j >= nlocal) continue;
+    jtype = type(j);
+
+    delx1 = x(j,0) - xtmp;
+    dely1 = x(j,1) - ytmp;
+    delz1 = x(j,2) - ztmp;
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    cutsq1 = paramskk(jtype,itype,itype).cutsq;
+
+    bo_ij = 0.0;
+    if (rsq1 > cutsq1) continue;
+    rij = sqrt(rsq1);
+
+    j_jnum = d_numneigh[j];
+    
+    for (kk = 0; kk < j_jnum; kk++) {
+      k = d_neighbors(j,kk);
+      if (k == i) continue;
+      k &= NEIGHMASK;
+      ktype = type(k);
+
+      delx2 = x(j,0) - x(k,0);
+      dely2 = x(j,1) - x(k,1);
+      delz2 = x(j,2) - x(k,2);
+      rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+      cutsq2 = paramskk(jtype,itype,ktype).cutsq;
+
+      if (rsq2 > cutsq2) continue;
+      rik = sqrt(rsq2);
+      bo_ij += bondorder(jtype,itype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2);
+
+    }
+
+    // attractive: pairwise potential and force
+
+    const F_FLOAT fa = ters_fa_k(jtype,itype,itype,rij);
+    const F_FLOAT dfa = ters_dfa(jtype,itype,itype,rij);
+    const F_FLOAT bij = ters_bij_k(jtype,itype,itype,bo_ij);
+    const F_FLOAT fatt = -0.5*bij * dfa / rij;
+    const F_FLOAT prefactor = 0.5*fa * ters_dbij(jtype,itype,itype,bo_ij);
+    const F_FLOAT eng = 0.5*bij * fa;
+
+    f(i,0) -= delx1*fatt;
+    f(i,1) -= dely1*fatt;
+    f(i,2) -= delz1*fatt;
+
+    if (EVFLAG) {
+      if (eflag) 
+        ev.evdwl += 0.5 * eng;
+      if (vflag_either || eflag_atom) 
+	this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,fatt,delx1,dely1,delz1);
+    }
+
+    // attractive: three-body force
+    
+    for (kk = 0; kk < j_jnum; kk++) {
+      k = d_neighbors(j,kk);
+      if (k == i) continue;
+      k &= NEIGHMASK;
+      ktype = type(k);
+
+      delx2 = x(j,0) - x(k,0);
+      dely2 = x(j,1) - x(k,1);
+      delz2 = x(j,2) - x(k,2);
+      rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+      cutsq2 = paramskk(jtype,itype,ktype).cutsq;
+
+      if (rsq2 > cutsq2) continue;
+      rik = sqrt(rsq2);
+      ters_dthbj(jtype,itype,ktype,prefactor,rij,delx1,dely1,delz1,
+		rik,delx2,dely2,delz2,fj,fk);
+      f(i,0) += fj[0];
+      f(i,1) += fj[1];
+      f(i,2) += fj[2];
+
+      if (vflag_atom) {
+	F_FLOAT delrji[3], delrjk[3];
+	delrji[0] = -delx1; delrji[1] = -dely1; delrji[2] = -delz1;
+	delrjk[0] = -delx2; delrjk[1] = -dely2; delrjk[2] = -delz2;
+	if (vflag_either) v_tally3_atom(ev,i,j,k,fj,fk,delrji,delrjk);
+      }
+
+      const F_FLOAT fa_jk = ters_fa_k(jtype,ktype,itype,rik);
+      const F_FLOAT prefactor_jk = 0.5*fa_jk * ters_dbij(jtype,ktype,itype,bo_ij);
+      ters_dthbk(jtype,ktype,itype,prefactor_jk,rik,delx2,dely2,delz2,
+		rij,delx1,dely1,delz1,fk);
+      f(i,0) += fk[0];
+      f(i,1) += fk[1];
+      f(i,2) += fk[2];
+    }
+  }
+}
+
+template<class DeviceType>
+template<int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffZBLKokkos<DeviceType>::operator()(TagPairTersoffZBLComputeFullB<NEIGHFLAG,EVFLAG>, const int &ii) const {
+  EV_FLOAT ev;
+  this->template operator()<NEIGHFLAG,EVFLAG>(TagPairTersoffZBLComputeFullB<NEIGHFLAG,EVFLAG>(), ii, ev);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffZBLKokkos<DeviceType>::ters_fc_k(const int &i, const int &j,
+		const int &k, const F_FLOAT &r) const 
+{
+  const F_FLOAT ters_R = paramskk(i,j,k).bigr;
+  const F_FLOAT ters_D = paramskk(i,j,k).bigd;
+
+  if (r < ters_R-ters_D) return 1.0;
+  if (r > ters_R+ters_D) return 0.0;
+  return 0.5*(1.0 - sin(MY_PI2*(r - ters_R)/ters_D));
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffZBLKokkos<DeviceType>::ters_dfc(const int &i, const int &j,
+		const int &k, const F_FLOAT &r) const 
+{
+  const F_FLOAT ters_R = paramskk(i,j,k).bigr;
+  const F_FLOAT ters_D = paramskk(i,j,k).bigd;
+
+  if (r < ters_R-ters_D) return 0.0;
+  if (r > ters_R+ters_D) return 0.0;
+  return -(MY_PI4/ters_D) * cos(MY_PI2*(r - ters_R)/ters_D);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffZBLKokkos<DeviceType>::bondorder(const int &i, const int &j, const int &k,
+	const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
+	const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2) const 
+{
+  F_FLOAT arg, ex_delr;
+
+  const F_FLOAT costheta = (dx1*dx2 + dy1*dy2 + dz1*dz2)/(rij*rik);
+
+  if (int(paramskk(i,j,k).powerm) == 3) arg = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0);
+  else arg = paramskk(i,j,k).lam3 * (rij-rik);
+
+  if (arg > 69.0776) ex_delr = 1.e30;
+  else if (arg < -69.0776) ex_delr = 0.0;
+  else ex_delr = exp(arg);
+
+  return ters_fc_k(i,j,k,rik) * ters_gijk(i,j,k,costheta) * ex_delr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffZBLKokkos<DeviceType>::
+	ters_gijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const
+{
+  const F_FLOAT ters_c = paramskk(i,j,k).c * paramskk(i,j,k).c;
+  const F_FLOAT ters_d = paramskk(i,j,k).d * paramskk(i,j,k).d;
+  const F_FLOAT hcth = paramskk(i,j,k).h - cos;
+
+  return paramskk(i,j,k).gamma*(1.0 + ters_c/ters_d - ters_c/(ters_d+hcth*hcth));
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffZBLKokkos<DeviceType>::
+	ters_dgijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const
+{
+
+  const F_FLOAT ters_c = paramskk(i,j,k).c * paramskk(i,j,k).c;
+  const F_FLOAT ters_d = paramskk(i,j,k).d * paramskk(i,j,k).d;
+  const F_FLOAT hcth = paramskk(i,j,k).h - cos;
+  const F_FLOAT numerator = -2.0 * ters_c * hcth;
+  const F_FLOAT denominator = 1.0/(ters_d + hcth*hcth);
+  return paramskk(i,j,k).gamma * numerator * denominator * denominator;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffZBLKokkos<DeviceType>::ters_fa_k(const int &i, const int &j,
+		const int &k, const F_FLOAT &r) const 
+{
+  if (r > paramskk(i,j,k).bigr + paramskk(i,j,k).bigd) return 0.0;
+  return -paramskk(i,j,k).bigb * exp(-paramskk(i,j,k).lam2 * r) 
+	  * ters_fc_k(i,j,k,r) * fermi_k(i,j,k,r);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffZBLKokkos<DeviceType>::ters_dfa(const int &i, const int &j,
+		const int &k, const F_FLOAT &r) const 
+{
+  if (r > paramskk(i,j,k).bigr + paramskk(i,j,k).bigd) return 0.0;
+  return paramskk(i,j,k).bigb * exp(-paramskk(i,j,k).lam2 * r) *
+    (paramskk(i,j,k).lam2 * ters_fc_k(i,j,k,r) * fermi_k(i,j,k,r) - 
+     ters_dfc(i,j,k,r) * fermi_k(i,j,k,r) - ters_fc_k(i,j,k,r) * 
+     fermi_d_k(i,j,k,r));
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffZBLKokkos<DeviceType>::ters_bij_k(const int &i, const int &j,
+		const int &k, const F_FLOAT &bo) const 
+{
+  const F_FLOAT tmp = paramskk(i,j,k).beta * bo;
+  if (tmp > paramskk(i,j,k).c1) return 1.0/sqrt(tmp);
+  if (tmp > paramskk(i,j,k).c2)
+    return (1.0 - pow(tmp,-paramskk(i,j,k).powern) / (2.0*paramskk(i,j,k).powern))/sqrt(tmp);
+  if (tmp < paramskk(i,j,k).c4) return 1.0;
+  if (tmp < paramskk(i,j,k).c3)
+    return 1.0 - pow(tmp,paramskk(i,j,k).powern)/(2.0*paramskk(i,j,k).powern);
+  return pow(1.0 + pow(tmp,paramskk(i,j,k).powern), -1.0/(2.0*paramskk(i,j,k).powern));
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffZBLKokkos<DeviceType>::ters_dbij(const int &i, const int &j,
+		const int &k, const F_FLOAT &bo) const 
+{
+  const F_FLOAT tmp = paramskk(i,j,k).beta * bo;
+  if (tmp > paramskk(i,j,k).c1) return paramskk(i,j,k).beta * -0.5*pow(tmp,-1.5);
+  if (tmp > paramskk(i,j,k).c2)
+    return paramskk(i,j,k).beta * (-0.5*pow(tmp,-1.5) *
+           (1.0 - 0.5*(1.0 +  1.0/(2.0*paramskk(i,j,k).powern)) *
+           pow(tmp,-paramskk(i,j,k).powern)));
+  if (tmp < paramskk(i,j,k).c4) return 0.0;
+  if (tmp < paramskk(i,j,k).c3)
+    return -0.5*paramskk(i,j,k).beta * pow(tmp,paramskk(i,j,k).powern-1.0);
+
+  const F_FLOAT tmp_n = pow(tmp,paramskk(i,j,k).powern);
+  return -0.5 * pow(1.0+tmp_n, -1.0-(1.0/(2.0*paramskk(i,j,k).powern)))*tmp_n / bo;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffZBLKokkos<DeviceType>::ters_dthb(
+	const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
+	const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
+	const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
+	F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk) const
+{
+  // from PairTersoffZBL::attractive
+  F_FLOAT rij_hat[3],rik_hat[3];
+  F_FLOAT rijinv,rikinv;
+  F_FLOAT delrij[3], delrik[3];
+
+  delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1;
+  delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2;
+
+  //rij = sqrt(rsq1);
+  rijinv = 1.0/rij;
+  vec3_scale(rijinv,delrij,rij_hat);
+
+  //rik = sqrt(rsq2);
+  rikinv = 1.0/rik;
+  vec3_scale(rikinv,delrik,rik_hat);
+
+  // from PairTersoffZBL::ters_zetaterm_d
+  F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp;
+  F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3];
+
+  fc = ters_fc_k(i,j,k,rik);
+  dfc = ters_dfc(i,j,k,rik);
+  if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0);
+  else tmp = paramskk(i,j,k).lam3 * (rij-rik);
+
+  if (tmp > 69.0776) ex_delr = 1.e30;
+  else if (tmp < -69.0776) ex_delr = 0.0;
+  else ex_delr = exp(tmp);
+
+  if (int(paramskk(i,j,k).powerm) == 3)
+    dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr;
+  else dex_delr = paramskk(i,j,k).lam3 * ex_delr;
+
+  cos = vec3_dot(rij_hat,rik_hat);
+  gijk = ters_gijk(i,j,k,cos);
+  dgijk = ters_dgijk(i,j,k,cos);
+
+  // from PairTersoffZBL::costheta_d
+  vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj);
+  vec3_scale(rijinv,dcosfj,dcosfj);
+  vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk);
+  vec3_scale(rikinv,dcosfk,dcosfk);
+  vec3_add(dcosfj,dcosfk,dcosfi);
+  vec3_scale(-1.0,dcosfi,dcosfi);
+
+  vec3_scale(-dfc*gijk*ex_delr,rik_hat,fi);
+  vec3_scaleadd(fc*dgijk*ex_delr,dcosfi,fi,fi);
+  vec3_scaleadd(fc*gijk*dex_delr,rik_hat,fi,fi);
+  vec3_scaleadd(-fc*gijk*dex_delr,rij_hat,fi,fi);
+  vec3_scale(prefactor,fi,fi);
+
+  vec3_scale(fc*dgijk*ex_delr,dcosfj,fj);
+  vec3_scaleadd(fc*gijk*dex_delr,rij_hat,fj,fj);
+  vec3_scale(prefactor,fj,fj);
+
+  vec3_scale(dfc*gijk*ex_delr,rik_hat,fk);
+  vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk);
+  vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk);
+  vec3_scale(prefactor,fk,fk);
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffZBLKokkos<DeviceType>::ters_dthbj(
+	const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
+	const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
+	const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
+	F_FLOAT *fj, F_FLOAT *fk) const
+{
+  F_FLOAT rij_hat[3],rik_hat[3];
+  F_FLOAT rijinv,rikinv;
+  F_FLOAT delrij[3], delrik[3];
+
+  delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1;
+  delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2;
+
+  rijinv = 1.0/rij;
+  vec3_scale(rijinv,delrij,rij_hat);
+
+  rikinv = 1.0/rik;
+  vec3_scale(rikinv,delrik,rik_hat);
+
+  F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp;
+  F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3];
+
+  fc = ters_fc_k(i,j,k,rik);
+  dfc = ters_dfc(i,j,k,rik);
+  if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0);
+  else tmp = paramskk(i,j,k).lam3 * (rij-rik);
+
+  if (tmp > 69.0776) ex_delr = 1.e30;
+  else if (tmp < -69.0776) ex_delr = 0.0;
+  else ex_delr = exp(tmp);
+
+  if (int(paramskk(i,j,k).powerm) == 3)
+    dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr;
+  else dex_delr = paramskk(i,j,k).lam3 * ex_delr;
+
+  cos = vec3_dot(rij_hat,rik_hat);
+  gijk = ters_gijk(i,j,k,cos);
+  dgijk = ters_dgijk(i,j,k,cos);
+
+  vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj);
+  vec3_scale(rijinv,dcosfj,dcosfj);
+  vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk);
+  vec3_scale(rikinv,dcosfk,dcosfk);
+  vec3_add(dcosfj,dcosfk,dcosfi);
+  vec3_scale(-1.0,dcosfi,dcosfi);
+
+  vec3_scale(fc*dgijk*ex_delr,dcosfj,fj);
+  vec3_scaleadd(fc*gijk*dex_delr,rij_hat,fj,fj);
+  vec3_scale(prefactor,fj,fj);
+
+  vec3_scale(dfc*gijk*ex_delr,rik_hat,fk);
+  vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk);
+  vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk);
+  vec3_scale(prefactor,fk,fk);
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffZBLKokkos<DeviceType>::ters_dthbk(
+	const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
+	const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
+	const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
+	F_FLOAT *fk) const
+{
+  F_FLOAT rij_hat[3],rik_hat[3];
+  F_FLOAT rijinv,rikinv;
+  F_FLOAT delrij[3], delrik[3];
+
+  delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1;
+  delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2;
+
+  rijinv = 1.0/rij;
+  vec3_scale(rijinv,delrij,rij_hat);
+
+  rikinv = 1.0/rik;
+  vec3_scale(rikinv,delrik,rik_hat);
+
+  F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp;
+  F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3];
+
+  fc = ters_fc_k(i,j,k,rik);
+  dfc = ters_dfc(i,j,k,rik);
+  if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0);
+  else tmp = paramskk(i,j,k).lam3 * (rij-rik);
+
+  if (tmp > 69.0776) ex_delr = 1.e30;
+  else if (tmp < -69.0776) ex_delr = 0.0;
+  else ex_delr = exp(tmp);
+
+  if (int(paramskk(i,j,k).powerm) == 3)
+    dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr;
+  else dex_delr = paramskk(i,j,k).lam3 * ex_delr;
+
+  cos = vec3_dot(rij_hat,rik_hat);
+  gijk = ters_gijk(i,j,k,cos);
+  dgijk = ters_dgijk(i,j,k,cos);
+
+  vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj);
+  vec3_scale(rijinv,dcosfj,dcosfj);
+  vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk);
+  vec3_scale(rikinv,dcosfk,dcosfk);
+  vec3_add(dcosfj,dcosfk,dcosfi);
+  vec3_scale(-1.0,dcosfi,dcosfi);
+
+  vec3_scale(dfc*gijk*ex_delr,rik_hat,fk);
+  vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk);
+  vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk);
+  vec3_scale(prefactor,fk,fk);
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffZBLKokkos<DeviceType>::fermi_k(const int &i, const int &j,
+		const int &k, const F_FLOAT &r) const 
+{
+  return 1.0 / (1.0 + exp(-paramskk(i,j,k).ZBLexpscale *
+			  (r - paramskk(i,j,k).ZBLcut)));
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double PairTersoffZBLKokkos<DeviceType>::fermi_d_k(const int &i, const int &j,
+		const int &k, const F_FLOAT &r) const 
+{
+  return paramskk(i,j,k).ZBLexpscale * exp(-paramskk(i,j,k).ZBLexpscale * 
+	 (r - paramskk(i,j,k).ZBLcut)) /
+         pow(1.0 + exp(-paramskk(i,j,k).ZBLexpscale * 
+	 (r - paramskk(i,j,k).ZBLcut)),2.0);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<int NEIGHFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffZBLKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &j,
+      const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
+                const F_FLOAT &dely, const F_FLOAT &delz) const
+{
+  const int VFLAG = vflag_either;
+
+  // The eatom and vatom arrays are atomic for Half/Thread neighbor style
+  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
+  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
+
+  if (eflag_atom) {
+    const E_FLOAT epairhalf = 0.5 * epair;
+    v_eatom[i] += epairhalf;
+    if (NEIGHFLAG != FULL) v_eatom[j] += epairhalf;
+  }
+
+  if (VFLAG) {
+    const E_FLOAT v0 = delx*delx*fpair;
+    const E_FLOAT v1 = dely*dely*fpair;
+    const E_FLOAT v2 = delz*delz*fpair;
+    const E_FLOAT v3 = delx*dely*fpair;
+    const E_FLOAT v4 = delx*delz*fpair;
+    const E_FLOAT v5 = dely*delz*fpair;
+
+    if (vflag_global) {
+      if (NEIGHFLAG != FULL) {
+        ev.v[0] += v0;
+        ev.v[1] += v1;
+        ev.v[2] += v2;
+        ev.v[3] += v3;
+        ev.v[4] += v4;
+        ev.v[5] += v5;
+      } else {
+        ev.v[0] += 0.5*v0;
+        ev.v[1] += 0.5*v1;
+        ev.v[2] += 0.5*v2;
+        ev.v[3] += 0.5*v3;
+        ev.v[4] += 0.5*v4;
+        ev.v[5] += 0.5*v5;
+      }
+    }
+
+    if (vflag_atom) {
+      v_vatom(i,0) += 0.5*v0;
+      v_vatom(i,1) += 0.5*v1;
+      v_vatom(i,2) += 0.5*v2;
+      v_vatom(i,3) += 0.5*v3;
+      v_vatom(i,4) += 0.5*v4;
+      v_vatom(i,5) += 0.5*v5;
+
+      if (NEIGHFLAG != FULL) {
+        v_vatom(j,0) += 0.5*v0;
+        v_vatom(j,1) += 0.5*v1;
+        v_vatom(j,2) += 0.5*v2;
+        v_vatom(j,3) += 0.5*v3;
+        v_vatom(j,4) += 0.5*v4;
+        v_vatom(j,5) += 0.5*v5;
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<int NEIGHFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffZBLKokkos<DeviceType>::v_tally3(EV_FLOAT &ev, const int &i, const int &j, const int &k, 
+	F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const
+{
+
+  // The eatom and vatom arrays are atomic for Half/Thread neighbor style
+  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
+
+  F_FLOAT v[6];
+
+  v[0] = THIRD * (drij[0]*fj[0] + drik[0]*fk[0]);
+  v[1] = THIRD * (drij[1]*fj[1] + drik[1]*fk[1]);
+  v[2] = THIRD * (drij[2]*fj[2] + drik[2]*fk[2]);
+  v[3] = THIRD * (drij[0]*fj[1] + drik[0]*fk[1]);
+  v[4] = THIRD * (drij[0]*fj[2] + drik[0]*fk[2]);
+  v[5] = THIRD * (drij[1]*fj[2] + drik[1]*fk[2]);
+
+  if (vflag_global) {
+    ev.v[0] += v[0];
+    ev.v[1] += v[1];
+    ev.v[2] += v[2];
+    ev.v[3] += v[3];
+    ev.v[4] += v[4];
+    ev.v[5] += v[5];
+  }
+
+  if (vflag_atom) {
+    v_vatom(i,0) += v[0]; v_vatom(i,1) += v[1]; v_vatom(i,2) += v[2];
+    v_vatom(i,3) += v[3]; v_vatom(i,4) += v[4]; v_vatom(i,5) += v[5];
+    if (NEIGHFLAG != FULL) {
+      v_vatom(j,0) += v[0]; v_vatom(j,1) += v[1]; v_vatom(j,2) += v[2];
+      v_vatom(j,3) += v[3]; v_vatom(j,4) += v[4]; v_vatom(j,5) += v[5];
+      v_vatom(k,0) += v[0]; v_vatom(k,1) += v[1]; v_vatom(k,2) += v[2];
+      v_vatom(k,3) += v[3]; v_vatom(k,4) += v[4]; v_vatom(k,5) += v[5];
+    }
+  }
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void PairTersoffZBLKokkos<DeviceType>::v_tally3_atom(EV_FLOAT &ev, const int &i, const int &j, const int &k,
+        F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drjk) const
+{
+  F_FLOAT v[6];
+
+  v[0] = THIRD * (drji[0]*fj[0] + drjk[0]*fk[0]);
+  v[1] = THIRD * (drji[1]*fj[1] + drjk[1]*fk[1]);
+  v[2] = THIRD * (drji[2]*fj[2] + drjk[2]*fk[2]);
+  v[3] = THIRD * (drji[0]*fj[1] + drjk[0]*fk[1]);
+  v[4] = THIRD * (drji[0]*fj[2] + drjk[0]*fk[2]);
+  v[5] = THIRD * (drji[1]*fj[2] + drjk[1]*fk[2]);
+
+  if (vflag_global) {
+    ev.v[0] += v[0];
+    ev.v[1] += v[1];
+    ev.v[2] += v[2];
+    ev.v[3] += v[3];
+    ev.v[4] += v[4];
+    ev.v[5] += v[5];
+  }
+
+  if (vflag_atom) {
+    d_vatom(i,0) += v[0]; d_vatom(i,1) += v[1]; d_vatom(i,2) += v[2];
+    d_vatom(i,3) += v[3]; d_vatom(i,4) += v[4]; d_vatom(i,5) += v[5];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION 
+int PairTersoffZBLKokkos<DeviceType>::sbmask(const int& j) const {
+  return j >> SBBITS & 3;
+}
+
+template class PairTersoffZBLKokkos<LMPDeviceType>;
+#ifdef KOKKOS_HAVE_CUDA
+template class PairTersoffZBLKokkos<LMPHostType>;
+#endif
diff --git a/src/KOKKOS/pair_tersoff_zbl_kokkos.h b/src/KOKKOS/pair_tersoff_zbl_kokkos.h
new file mode 100755
index 0000000000..7bc88759c7
--- /dev/null
+++ b/src/KOKKOS/pair_tersoff_zbl_kokkos.h
@@ -0,0 +1,231 @@
+/* -*- c++ -*- ----------------------------------------------------------
+
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(tersoff/zbl/kk,PairTersoffZBLKokkos<LMPDeviceType>)
+PairStyle(tersoff/zbl/kk/device,PairTersoffZBLKokkos<LMPDeviceType>)
+PairStyle(tersoff/zbl/kk/host,PairTersoffZBLKokkos<LMPHostType>)
+
+#else
+
+#ifndef LMP_PAIR_TERSOFF_ZBL_KOKKOS_H
+#define LMP_PAIR_TERSOFF_ZBL_KOKKOS_H
+
+#include "stdio.h"
+#include "pair_kokkos.h"
+#include "pair_tersoff_zbl.h"
+#include "neigh_list_kokkos.h"
+
+namespace LAMMPS_NS {
+
+template<int NEIGHFLAG, int EVFLAG>
+struct TagPairTersoffZBLComputeHalf{};
+
+template<int NEIGHFLAG, int EVFLAG>
+struct TagPairTersoffZBLComputeFullA{};
+
+template<int NEIGHFLAG, int EVFLAG>
+struct TagPairTersoffZBLComputeFullB{};
+
+template<class DeviceType>
+class PairTersoffZBLKokkos : public PairTersoffZBL {
+ public:
+  enum {EnabledNeighFlags=FULL};
+  enum {COUL_FLAG=0};
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typedef EV_FLOAT value_type;
+
+  PairTersoffZBLKokkos(class LAMMPS *);
+  virtual ~PairTersoffZBLKokkos();
+  virtual void compute(int, int);
+  void init_style();
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairTersoffZBLComputeHalf<NEIGHFLAG,EVFLAG>, const int&, EV_FLOAT&) const;
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairTersoffZBLComputeHalf<NEIGHFLAG,EVFLAG>, const int&) const;
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairTersoffZBLComputeFullA<NEIGHFLAG,EVFLAG>, const int&, EV_FLOAT&) const;
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairTersoffZBLComputeFullA<NEIGHFLAG,EVFLAG>, const int&) const;
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairTersoffZBLComputeFullB<NEIGHFLAG,EVFLAG>, const int&, EV_FLOAT&) const;
+
+  template<int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairTersoffZBLComputeFullB<NEIGHFLAG,EVFLAG>, const int&) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double ters_fc_k(const int &i, const int &j, const int &k, const F_FLOAT &r) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double ters_dfc(const int &i, const int &j, const int &k, const F_FLOAT &r) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double ters_fa_k(const int &i, const int &j, const int &k, const F_FLOAT &r) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double ters_dfa(const int &i, const int &j, const int &k, const F_FLOAT &r) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double ters_bij_k(const int &i, const int &j, const int &k, const F_FLOAT &bo) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double ters_dbij(const int &i, const int &j, const int &k, const F_FLOAT &bo) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double bondorder(const int &i, const int &j, const int &k,
+	      const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
+	      const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double ters_gijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double ters_dgijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void ters_dthb(const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
+	      const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
+	      const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
+	      F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void ters_dthbj(const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
+	      const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
+	      const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
+	      F_FLOAT *fj, F_FLOAT *fk) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void ters_dthbk(const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
+	      const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
+	      const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
+	      F_FLOAT *fk) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double vec3_dot(const F_FLOAT x[3], const double y[3]) const {
+    return x[0]*y[0] + x[1]*y[1] + x[2]*y[2];
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void vec3_add(const F_FLOAT x[3], const double y[3], double * const z) const {
+    z[0] = x[0]+y[0]; z[1] = x[1]+y[1]; z[2] = x[2]+y[2];
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void vec3_scale(const F_FLOAT k, const double x[3], double y[3]) const {
+    y[0] = k*x[0]; y[1] = k*x[1]; y[2] = k*x[2];
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void vec3_scaleadd(const F_FLOAT k, const double x[3], const double y[3], double * const z) const {
+    z[0] = k*x[0]+y[0]; z[1] = k*x[1]+y[1]; z[2] = k*x[2]+y[2];
+  }
+	        
+  KOKKOS_INLINE_FUNCTION
+  int sbmask(const int& j) const;
+
+  struct params_ters{
+    params_ters(){powerm=0;gamma=0;lam3=0;c=0;d=0;h=0;powern=0;beta=0;lam2=0;bigb=0;
+	    	  bigr=0;bigd=0;lam1=0;biga=0;cutsq=0;c1=0;c2=0;c3=0;c4=0;Z_i=0;Z_j=0;ZBLcut=0;ZBLexpscale=0;};
+    params_ters(int i){powerm=0;gamma=0;lam3=0;c=0;d=0;h=0;powern=0;beta=0;lam2=0;bigb=0;
+	    	  bigr=0;bigd=0;lam1=0;biga=0;cutsq=0;c1=0;c2=0;c3=0;c4=0;Z_i=0;Z_j=0;ZBLcut=0;ZBLexpscale=0;};
+    F_FLOAT powerm, gamma, lam3, c, d, h, powern, beta, lam2, bigb, bigr, 
+	    bigd, lam1, biga, cutsq, c1, c2, c3, c4, Z_i, Z_j, ZBLcut, ZBLexpscale;
+  };
+
+  template<int NEIGHFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
+      const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
+                  const F_FLOAT &dely, const F_FLOAT &delz) const;
+
+  template<int NEIGHFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void v_tally3(EV_FLOAT &ev, const int &i, const int &j, const int &k, 
+		F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void v_tally3_atom(EV_FLOAT &ev, const int &i, const int &j, const int &k, 
+		F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drjk) const;
+
+  void allocate();
+  void setup();
+
+  KOKKOS_INLINE_FUNCTION
+  double fermi_k(const int &i, const int &j, const int &k, const F_FLOAT &r) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double fermi_d_k(const int &i, const int &j, const int &k, const F_FLOAT &r) const;
+
+ protected:
+  void cleanup_copy();
+
+  typedef Kokkos::DualView<int***,DeviceType> tdual_int_3d;
+  Kokkos::DualView<params_ters***,Kokkos::LayoutRight,DeviceType> k_params;
+  typename Kokkos::DualView<params_ters***,
+    Kokkos::LayoutRight,DeviceType>::t_dev_const paramskk;
+  // hardwired to space for 15 atom types
+  //params_ters m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+
+  typename AT::t_x_array_randomread x;
+  typename AT::t_f_array f;
+  typename AT::t_int_1d_randomread type;
+  typename AT::t_tagint_1d tag;
+
+  DAT::tdual_efloat_1d k_eatom;
+  DAT::tdual_virial_array k_vatom;
+  DAT::t_efloat_1d d_eatom;
+  DAT::t_virial_array d_vatom;
+
+  typedef Kokkos::DualView<F_FLOAT**[7],Kokkos::LayoutRight,DeviceType> tdual_ffloat_2d_n7;
+  typedef typename tdual_ffloat_2d_n7::t_dev_const_randomread t_ffloat_2d_n7_randomread;
+  typedef typename tdual_ffloat_2d_n7::t_host t_host_ffloat_2d_n7;
+
+  typename ArrayTypes<DeviceType>::t_neighbors_2d d_neighbors;
+  typename ArrayTypes<DeviceType>::t_int_1d_randomread d_ilist;
+  typename ArrayTypes<DeviceType>::t_int_1d_randomread d_numneigh;
+  //NeighListKokkos<DeviceType> k_list;
+
+  class AtomKokkos *atomKK;
+  int neighflag,newton_pair;
+  int nlocal,nall,eflag,vflag;
+
+  // ZBL
+  F_FLOAT global_a_0;                // Bohr radius for Coulomb repulsion
+  F_FLOAT global_epsilon_0;        // permittivity of vacuum for Coulomb repulsion
+  F_FLOAT global_e;                // proton charge (negative of electron charge)
+
+  friend void pair_virial_fdotr_compute<PairTersoffZBLKokkos>(PairTersoffZBLKokkos*);
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+*/