From 10a3e857963165350c4d72d5a665678207926425 Mon Sep 17 00:00:00 2001
From: julient31 <julien.tranchida1@gmail.com>
Date: Wed, 22 Apr 2020 11:53:08 -0600
Subject: [PATCH 01/44] C1 JT 042220 - added biquadratic pair/spin exchange

---
 .../llg_exchange.py                           |   2 +-
 src/SPIN/pair_spin_exchange.cpp               |  24 +-
 src/SPIN/pair_spin_exchange_biquadratic.cpp   | 594 ++++++++++++++++++
 src/SPIN/pair_spin_exchange_biquadratic.h     |  85 +++
 4 files changed, 695 insertions(+), 10 deletions(-)
 create mode 100644 src/SPIN/pair_spin_exchange_biquadratic.cpp
 create mode 100644 src/SPIN/pair_spin_exchange_biquadratic.h

diff --git a/examples/SPIN/test_problems/validation_damped_exchange/llg_exchange.py b/examples/SPIN/test_problems/validation_damped_exchange/llg_exchange.py
index 49eecb5b44..dd1c543bb3 100755
--- a/examples/SPIN/test_problems/validation_damped_exchange/llg_exchange.py
+++ b/examples/SPIN/test_problems/validation_damped_exchange/llg_exchange.py
@@ -65,6 +65,6 @@ for t in range (0,N):
   # calc. average magnetization
   Sm = (S1+S2)*0.5
   # calc. energy
-  en = -2.0*J0*(np.dot(S1,S2))
+  en = -J0*(np.dot(S1,S2))
   # print res. in ps for comparison with LAMMPS
   print(t*dt/1000.0,Sm[0],Sm[1],Sm[2],en)
diff --git a/src/SPIN/pair_spin_exchange.cpp b/src/SPIN/pair_spin_exchange.cpp
index 5c5d5cb1a4..b23f4fa0cb 100644
--- a/src/SPIN/pair_spin_exchange.cpp
+++ b/src/SPIN/pair_spin_exchange.cpp
@@ -231,9 +231,15 @@ void PairSpinExchange::compute(int eflag, int vflag)
 
       if (rsq <= local_cut2) {
         compute_exchange(i,j,rsq,fmi,spj);
-        if (lattice_flag) {
+        
+        if (lattice_flag)
           compute_exchange_mech(i,j,rsq,eij,fi,spi,spj);
-        }
+        
+        if (eflag) {
+          evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]);
+          evdwl *= 0.5*hbar;
+          emag[i] += evdwl;
+        } else evdwl = 0.0;
       }
 
       f[i][0] += fi[0];
@@ -243,11 +249,11 @@ void PairSpinExchange::compute(int eflag, int vflag)
       fm[i][1] += fmi[1];
       fm[i][2] += fmi[2];
 
-      if (eflag) {
-        evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]);
-        evdwl *= 0.5*hbar;
-        emag[i] += evdwl;
-      } else evdwl = 0.0;
+      // if (eflag) {
+      //   evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]);
+      //   evdwl *= 0.5*hbar;
+      //   emag[i] += evdwl;
+      // } else evdwl = 0.0;
 
       if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
           evdwl,ecoul,fi[0],fi[1],fi[2],delx,dely,delz);
@@ -364,8 +370,8 @@ void PairSpinExchange::compute_exchange(int i, int j, double rsq, double fmi[3],
    compute the mechanical force due to the exchange interaction between atom i and atom j
 ------------------------------------------------------------------------- */
 
-void PairSpinExchange::compute_exchange_mech(int i, int j, double rsq, double eij[3],
-    double fi[3],  double spi[3], double spj[3])
+void PairSpinExchange::compute_exchange_mech(int i, int j, double rsq, 
+    double eij[3], double fi[3],  double spi[3], double spj[3])
 {
   int *type = atom->type;
   int itype, jtype;
diff --git a/src/SPIN/pair_spin_exchange_biquadratic.cpp b/src/SPIN/pair_spin_exchange_biquadratic.cpp
new file mode 100644
index 0000000000..a7f64690af
--- /dev/null
+++ b/src/SPIN/pair_spin_exchange_biquadratic.cpp
@@ -0,0 +1,594 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ------------------------------------------------------------------------
+   Contributing authors: Julien Tranchida (SNL)
+                         Aidan Thompson (SNL)
+
+   Please cite the related publication:
+   Tranchida, J., Plimpton, S. J., Thibaudeau, P., & Thompson, A. P. (2018).
+   Massively parallel symplectic algorithm for coupled magnetic spin dynamics
+   and molecular dynamics. Journal of Computational Physics.
+------------------------------------------------------------------------- */
+
+#include "pair_spin_exchange_biquadratic.h"
+#include <mpi.h>
+#include <cmath>
+#include <cstring>
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "fix.h"
+#include "force.h"
+#include "neigh_list.h"
+#include "memory.h"
+#include "modify.h"
+#include "update.h"
+#include "utils.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairSpinExchangeBiquadratic::~PairSpinExchangeBiquadratic()
+{
+  if (allocated) {
+    memory->destroy(setflag);
+    memory->destroy(cut_spin_exchange);
+    memory->destroy(J1_mag);
+    memory->destroy(J1_mech);
+    memory->destroy(J2);
+    memory->destroy(J3);
+    memory->destroy(K1_mag);
+    memory->destroy(K1_mech);
+    memory->destroy(K2);
+    memory->destroy(K3);
+    memory->destroy(cutsq); // to be implemented
+    memory->destroy(emag);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void PairSpinExchangeBiquadratic::settings(int narg, char **arg)
+{
+  PairSpin::settings(narg,arg);
+
+  cut_spin_exchange_global = force->numeric(FLERR,arg[0]);
+
+  // reset cutoffs that have been explicitly set
+
+  if (allocated) {
+    int i,j;
+    for (i = 1; i <= atom->ntypes; i++)
+      for (j = i+1; j <= atom->ntypes; j++)
+        if (setflag[i][j]) {
+          cut_spin_exchange[i][j] = cut_spin_exchange_global;
+        }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more type spin pairs
+------------------------------------------------------------------------- */
+
+void PairSpinExchangeBiquadratic::coeff(int narg, char **arg)
+{
+  if (!allocated) allocate();
+
+  // check if args correct
+
+  if (strcmp(arg[2],"biquadratic") != 0)
+    error->all(FLERR,"Incorrect args in pair_style command");
+  if (narg != 10)
+    error->all(FLERR,"Incorrect args in pair_style command");
+
+  int ilo,ihi,jlo,jhi;
+  force->bounds(FLERR,arg[0],atom->ntypes,ilo,ihi);
+  force->bounds(FLERR,arg[1],atom->ntypes,jlo,jhi);
+
+  // get exchange arguments from input command
+
+  const double rc = force->numeric(FLERR,arg[3]);
+  const double j1 = force->numeric(FLERR,arg[4]);
+  const double j2 = force->numeric(FLERR,arg[5]);
+  const double j3 = force->numeric(FLERR,arg[6]);
+  const double k1 = force->numeric(FLERR,arg[7]);
+  const double k2 = force->numeric(FLERR,arg[8]);
+  const double k3 = force->numeric(FLERR,arg[9]);
+
+  int count = 0;
+  for (int i = ilo; i <= ihi; i++) {
+    for (int j = MAX(jlo,i); j <= jhi; j++) {
+      cut_spin_exchange[i][j] = rc;
+      J1_mag[i][j] = j1/hbar;
+      J1_mech[i][j] = j1;
+      J2[i][j] = j2;
+      J3[i][j] = j3;
+      K1_mag[i][j] = k1/hbar;
+      K1_mech[i][j] = k1;
+      K2[i][j] = k2;
+      K3[i][j] = k3;
+      setflag[i][j] = 1;
+      count++;
+    }
+  }
+
+  if (count == 0) error->all(FLERR,"Incorrect args in pair_style command");
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+double PairSpinExchangeBiquadratic::init_one(int i, int j)
+{
+
+   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
+
+  J1_mag[j][i] = J1_mag[i][j];
+  J1_mech[j][i] = J1_mech[i][j];
+  J2[j][i] = J2[i][j];
+  J3[j][i] = J3[i][j];
+  K1_mag[j][i] = K1_mag[i][j];
+  K1_mech[j][i] = K1_mech[i][j];
+  K2[j][i] = K2[i][j];
+  K3[j][i] = K3[i][j];
+  cut_spin_exchange[j][i] = cut_spin_exchange[i][j];
+
+  return cut_spin_exchange_global;
+}
+
+/* ----------------------------------------------------------------------
+   extract the larger cutoff
+------------------------------------------------------------------------- */
+
+void *PairSpinExchangeBiquadratic::extract(const char *str, int &dim)
+{
+  dim = 0;
+  if (strcmp(str,"cut") == 0) return (void *) &cut_spin_exchange_global;
+  return NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairSpinExchangeBiquadratic::compute(int eflag, int vflag)
+{
+  int i,j,ii,jj,inum,jnum,itype,jtype;
+  double evdwl, ecoul;
+  double xi[3], eij[3];
+  double delx,dely,delz;
+  double spi[3], spj[3];
+  double fi[3], fmi[3];
+  double local_cut2;
+  double rsq, inorm;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  evdwl = ecoul = 0.0;
+  ev_init(eflag,vflag);
+
+  double **x = atom->x;
+  double **f = atom->f;
+  double **fm = atom->fm;
+  double **sp = atom->sp;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  int newton_pair = force->newton_pair;
+
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // checking size of emag
+
+  if (nlocal_max < nlocal) {    // grow emag lists if necessary
+    nlocal_max = nlocal;
+    memory->grow(emag,nlocal_max,"pair/spin:emag");
+  }
+
+  // computation of the exchange interaction
+  // loop over atoms and their neighbors
+
+  for (ii = 0; ii < inum; ii++) {
+    i = ilist[ii];
+    itype = type[i];
+
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    xi[0] = x[i][0];
+    xi[1] = x[i][1];
+    xi[2] = x[i][2];
+    spi[0] = sp[i][0];
+    spi[1] = sp[i][1];
+    spi[2] = sp[i][2];
+    emag[i] = 0.0;
+
+    // loop on neighbors
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+      jtype = type[j];
+
+      spj[0] = sp[j][0];
+      spj[1] = sp[j][1];
+      spj[2] = sp[j][2];
+
+      evdwl = 0.0;
+      fi[0] = fi[1] = fi[2] = 0.0;
+      fmi[0] = fmi[1] = fmi[2] = 0.0;
+
+      delx = xi[0] - x[j][0];
+      dely = xi[1] - x[j][1];
+      delz = xi[2] - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      inorm = 1.0/sqrt(rsq);
+      eij[0] = -inorm*delx;
+      eij[1] = -inorm*dely;
+      eij[2] = -inorm*delz;
+
+      local_cut2 = cut_spin_exchange[itype][jtype]*cut_spin_exchange[itype][jtype];
+
+      // compute exchange interaction
+
+      if (rsq <= local_cut2) {
+        compute_exchange(i,j,rsq,fmi,spi,spj);
+        if (lattice_flag)
+          compute_exchange_mech(i,j,rsq,eij,fi,spi,spj);
+      
+        if (eflag) {
+          evdwl -= compute_energy(i,j,rsq,spi,spj);
+          emag[i] += evdwl;
+        } else evdwl = 0.0;
+      }
+
+      f[i][0] += fi[0];
+      f[i][1] += fi[1];
+      f[i][2] += fi[2];
+      fm[i][0] += fmi[0];
+      fm[i][1] += fmi[1];
+      fm[i][2] += fmi[2];
+
+      if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
+          evdwl,ecoul,fi[0],fi[1],fi[2],delx,dely,delz);
+    }
+  }
+
+  if (vflag_fdotr) virial_fdotr_compute();
+
+}
+
+/* ----------------------------------------------------------------------
+   update the pair interactions fmi acting on the spin ii
+------------------------------------------------------------------------- */
+
+void PairSpinExchangeBiquadratic::compute_single_pair(int ii, double fmi[3])
+{
+  int *type = atom->type;
+  double **x = atom->x;
+  double **sp = atom->sp;
+  double local_cut2;
+  double xi[3];
+  double delx,dely,delz;
+  double spi[3],spj[3];
+
+  int j,jnum,itype,jtype,ntypes;
+  int k,locflag;
+  int *jlist,*numneigh,**firstneigh;
+
+  double rsq;
+
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // check if interaction applies to type of ii
+
+  itype = type[ii];
+  ntypes = atom->ntypes;
+  locflag = 0;
+  k = 1;
+  while (k <= ntypes) {
+    if (k <= itype) {
+      if (setflag[k][itype] == 1) {
+        locflag =1;
+        break;
+      }
+      k++;
+    } else if (k > itype) {
+      if (setflag[itype][k] == 1) {
+        locflag =1;
+        break;
+      }
+      k++;
+    } else error->all(FLERR,"Wrong type number");
+  }
+
+  // if interaction applies to type ii,
+  // locflag = 1 and compute pair interaction
+
+  if (locflag == 1) {
+
+    xi[0] = x[ii][0];
+    xi[1] = x[ii][1];
+    xi[2] = x[ii][2];
+    spi[0] = sp[ii][0];
+    spi[1] = sp[ii][1];
+    spi[2] = sp[ii][2];
+
+    jlist = firstneigh[ii];
+    jnum = numneigh[ii];
+
+    for (int jj = 0; jj < jnum; jj++) {
+
+      j = jlist[jj];
+      j &= NEIGHMASK;
+      jtype = type[j];
+      local_cut2 = cut_spin_exchange[itype][jtype]*cut_spin_exchange[itype][jtype];
+
+      spj[0] = sp[j][0];
+      spj[1] = sp[j][1];
+      spj[2] = sp[j][2];
+
+      delx = xi[0] - x[j][0];
+      dely = xi[1] - x[j][1];
+      delz = xi[2] - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+
+      if (rsq <= local_cut2) {
+        compute_exchange(ii,j,rsq,fmi,spi,spj);
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   compute exchange interaction between spins i and j
+------------------------------------------------------------------------- */
+
+void PairSpinExchangeBiquadratic::compute_exchange(int i, int j, double rsq, 
+    double fmi[3], double spi[3], double spj[3])
+{
+  int *type = atom->type;
+  int itype,jtype;
+  double Jex,Kex,ra,sdots;
+  double rj,rk,r2j,r2k,ir3j,ir3k;
+  itype = type[i];
+  jtype = type[j];
+
+  ra = sqrt(rsq);
+  rj = ra/J3[itype][jtype];
+  r2j = rsq/J3[itype][jtype]/J3[itype][jtype];
+  ir3j = 1.0/(rj*rj*rj);
+  rk = ra/K3[itype][jtype];
+  r2k = rsq/K3[itype][jtype]/K3[itype][jtype];
+  ir3k = 1.0/(rk*rk*rk);
+  
+  // modified Yukawa
+  Jex = (1.0-J2[itype][jtype]*r2j);
+  Jex *= J1_mag[itype][jtype]*ir3j;
+  Jex *= exp((J3[itype][jtype]-ra)/J3[itype][jtype]);
+  
+  Kex = (1.0-K2[itype][jtype]*r2k);
+  Kex *= K1_mag[itype][jtype]*ir3k;
+  Kex *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]);
+ 
+  sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
+
+  fmi[0] += Jex*spj[0] + 2.0*Kex*spj[0]*sdots;
+  fmi[1] += Jex*spj[1] + 2.0*Kex*spj[1]*sdots;
+  fmi[2] += Jex*spj[2] + 2.0*Kex*spj[2]*sdots;
+}
+
+/* ----------------------------------------------------------------------
+   compute the mechanical force due to the exchange interaction between atom i and atom j
+------------------------------------------------------------------------- */
+
+void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j, double rsq, 
+    double eij[3], double fi[3],  double spi[3], double spj[3])
+{
+  int *type = atom->type;
+  int itype,jtype;
+  double Jex,Jex_mech,Kex,Kex_mech,ra,sdots;
+  double rj,rk,r2j,r2k,ir3j,ir3k;
+  itype = type[i];
+  jtype = type[j];
+
+  ra = sqrt(rsq);
+  rj = ra/J3[itype][jtype];
+  r2j = rsq/J3[itype][jtype]/J3[itype][jtype];
+  ir3j = 1.0/(rj*rj*rj);
+  rk = ra/K3[itype][jtype];
+  r2k = rsq/K3[itype][jtype]/K3[itype][jtype];
+  ir3k = 1.0/(rk*rk*rk);
+  
+  // modified Yukawa
+  Jex_mech = J2[itype][jtype]*2.0*ra/(J3[itype][jtype]*J3[itype][jtype]);
+  Jex_mech += (3.0/ra+1.0/J3[itype][jtype])*(1.0-J2[itype][jtype]*r2j);
+  Jex_mech *= -J1_mech[itype][jtype]*ir3j;
+  Jex_mech *= exp((J3[itype][jtype]-ra)/J3[itype][jtype]);
+
+  Kex_mech = K2[itype][jtype]*2.0*ra/(K3[itype][jtype]*K3[itype][jtype]);
+  Kex_mech += (3.0/ra+1.0/K3[itype][jtype])*(1.0-K2[itype][jtype]*r2k);
+  Kex_mech *= -K1_mech[itype][jtype]*ir3k;
+  Kex_mech *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]);
+  
+  sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
+
+  fi[0] -= (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[0];
+  fi[1] -= (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[1];
+  fi[2] -= (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[2];
+}
+
+/* ----------------------------------------------------------------------
+   compute energy of spin pair i and j
+------------------------------------------------------------------------- */
+
+double PairSpinExchangeBiquadratic::compute_energy(int i, int j, double rsq, 
+    double spi[3], double spj[3])
+{
+  int *type = atom->type;
+  int itype,jtype;
+  double Jex,Kex,ra,sdots;
+  double rj,rk,r2j,r2k,ir3j,ir3k;
+  double energy = 0.0;
+  itype = type[i];
+  jtype = type[j];
+
+  ra = sqrt(rsq);
+  rj = ra/J3[itype][jtype];
+  r2j = rsq/J3[itype][jtype]/J3[itype][jtype];
+  ir3j = 1.0/(rj*rj*rj);
+  rk = ra/K3[itype][jtype];
+  r2k = rsq/K3[itype][jtype]/K3[itype][jtype];
+  ir3k = 1.0/(rk*rk*rk);
+  
+  // modified Yukawa
+  Jex = (1.0-J2[itype][jtype]*r2j);
+  Jex *= J1_mech[itype][jtype]*ir3j;
+  Jex *= exp((J3[itype][jtype]-ra)/J3[itype][jtype]);
+  
+  Kex = (1.0-K2[itype][jtype]*r2k);
+  Kex *= K1_mech[itype][jtype]*ir3k;
+  Kex *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]);
+
+  sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);  
+
+  energy = 0.5*(Jex*sdots + Kex*sdots*sdots);
+  return energy;
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays
+------------------------------------------------------------------------- */
+
+void PairSpinExchangeBiquadratic::allocate()
+{
+  allocated = 1;
+  int n = atom->ntypes;
+
+  memory->create(setflag,n+1,n+1,"pair:setflag");
+  for (int i = 1; i <= n; i++)
+    for (int j = i; j <= n; j++)
+      setflag[i][j] = 0;
+
+  memory->create(cut_spin_exchange,n+1,n+1,"pair/spin/exchange:cut_spin_exchange");
+  memory->create(J1_mag,n+1,n+1,"pair/spin/exchange:J1_mag");
+  memory->create(J1_mech,n+1,n+1,"pair/spin/exchange:J1_mech");
+  memory->create(J2,n+1,n+1,"pair/spin/exchange:J2");
+  memory->create(J3,n+1,n+1,"pair/spin/exchange:J3");
+  memory->create(K1_mag,n+1,n+1,"pair/spin/exchange:J1_mag");
+  memory->create(K1_mech,n+1,n+1,"pair/spin/exchange:J1_mech");
+  memory->create(K2,n+1,n+1,"pair/spin/exchange:J2");
+  memory->create(K3,n+1,n+1,"pair/spin/exchange:J3");
+  memory->create(cutsq,n+1,n+1,"pair:cutsq");
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairSpinExchangeBiquadratic::write_restart(FILE *fp)
+{
+  write_restart_settings(fp);
+
+  int i,j;
+  for (i = 1; i <= atom->ntypes; i++) {
+    for (j = i; j <= atom->ntypes; j++) {
+      fwrite(&setflag[i][j],sizeof(int),1,fp);
+      if (setflag[i][j]) {
+        fwrite(&J1_mag[i][j],sizeof(double),1,fp);
+        fwrite(&J1_mech[i][j],sizeof(double),1,fp);
+        fwrite(&J2[i][j],sizeof(double),1,fp);
+        fwrite(&J3[i][j],sizeof(double),1,fp);
+        fwrite(&K1_mag[i][j],sizeof(double),1,fp);
+        fwrite(&K1_mech[i][j],sizeof(double),1,fp);
+        fwrite(&K2[i][j],sizeof(double),1,fp);
+        fwrite(&K3[i][j],sizeof(double),1,fp);
+        fwrite(&cut_spin_exchange[i][j],sizeof(double),1,fp);
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairSpinExchangeBiquadratic::read_restart(FILE *fp)
+{
+  read_restart_settings(fp);
+
+  allocate();
+
+  int i,j;
+  int me = comm->me;
+  for (i = 1; i <= atom->ntypes; i++) {
+    for (j = i; j <= atom->ntypes; j++) {
+      if (me == 0) utils::sfread(FLERR,&setflag[i][j],sizeof(int),1,fp,NULL,error);
+      MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
+      if (setflag[i][j]) {
+        if (me == 0) {
+          utils::sfread(FLERR,&J1_mag[i][j],sizeof(double),1,fp,NULL,error);
+          utils::sfread(FLERR,&J1_mech[i][j],sizeof(double),1,fp,NULL,error);
+          utils::sfread(FLERR,&J2[i][j],sizeof(double),1,fp,NULL,error);
+          utils::sfread(FLERR,&J3[i][j],sizeof(double),1,fp,NULL,error);
+          utils::sfread(FLERR,&K1_mag[i][j],sizeof(double),1,fp,NULL,error);
+          utils::sfread(FLERR,&K1_mech[i][j],sizeof(double),1,fp,NULL,error);
+          utils::sfread(FLERR,&K2[i][j],sizeof(double),1,fp,NULL,error);
+          utils::sfread(FLERR,&K3[i][j],sizeof(double),1,fp,NULL,error);
+          utils::sfread(FLERR,&cut_spin_exchange[i][j],sizeof(double),1,fp,NULL,error);
+        }
+        MPI_Bcast(&J1_mag[i][j],1,MPI_DOUBLE,0,world);
+        MPI_Bcast(&J1_mech[i][j],1,MPI_DOUBLE,0,world);
+        MPI_Bcast(&J2[i][j],1,MPI_DOUBLE,0,world);
+        MPI_Bcast(&J3[i][j],1,MPI_DOUBLE,0,world);
+        MPI_Bcast(&K1_mag[i][j],1,MPI_DOUBLE,0,world);
+        MPI_Bcast(&K1_mech[i][j],1,MPI_DOUBLE,0,world);
+        MPI_Bcast(&K2[i][j],1,MPI_DOUBLE,0,world);
+        MPI_Bcast(&K3[i][j],1,MPI_DOUBLE,0,world);
+        MPI_Bcast(&cut_spin_exchange[i][j],1,MPI_DOUBLE,0,world);
+      }
+    }
+  }
+}
+
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairSpinExchangeBiquadratic::write_restart_settings(FILE *fp)
+{
+  fwrite(&cut_spin_exchange_global,sizeof(double),1,fp);
+  fwrite(&offset_flag,sizeof(int),1,fp);
+  fwrite(&mix_flag,sizeof(int),1,fp);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairSpinExchangeBiquadratic::read_restart_settings(FILE *fp)
+{
+  if (comm->me == 0) {
+    utils::sfread(FLERR,&cut_spin_exchange_global,sizeof(double),1,fp,NULL,error);
+    utils::sfread(FLERR,&offset_flag,sizeof(int),1,fp,NULL,error);
+    utils::sfread(FLERR,&mix_flag,sizeof(int),1,fp,NULL,error);
+  }
+  MPI_Bcast(&cut_spin_exchange_global,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
+  MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
+}
diff --git a/src/SPIN/pair_spin_exchange_biquadratic.h b/src/SPIN/pair_spin_exchange_biquadratic.h
new file mode 100644
index 0000000000..6fb9a7a94c
--- /dev/null
+++ b/src/SPIN/pair_spin_exchange_biquadratic.h
@@ -0,0 +1,85 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(spin/exchange/biquadratic,PairSpinExchangeBiquadratic)
+
+#else
+
+#ifndef LMP_PAIR_SPIN_EXCHANGE_BIQUADRATIC_H
+#define LMP_PAIR_SPIN_EXCHANGE_BIQUADRATIC_H
+
+#include "pair_spin.h"
+
+namespace LAMMPS_NS {
+
+class PairSpinExchangeBiquadratic : public PairSpin {
+ public:
+  PairSpinExchangeBiquadratic(LAMMPS *lmp) : PairSpin(lmp) {}
+  virtual ~PairSpinExchangeBiquadratic();
+  void settings(int, char **);
+  void coeff(int, char **);
+  double init_one(int, int);
+  void *extract(const char *, int &);
+
+  void compute(int, int);
+  void compute_single_pair(int, double *);
+
+  void compute_exchange(int, int, double, double *, double *, double *);
+  void compute_exchange_mech(int, int, double, double *, double *, double *, double *);
+  double compute_energy(int , int , double , double *, double *);
+
+  void write_restart(FILE *);
+  void read_restart(FILE *);
+  void write_restart_settings(FILE *);
+  void read_restart_settings(FILE *);
+
+  double cut_spin_exchange_global;      // global exchange cutoff distance
+
+ protected:
+  double **J1_mag;                      // H exchange coeffs in eV
+  double **J1_mech;                     // mech exchange coeffs in
+  double **J2, **J3;                    // J1 in eV, J2 in Ang-1, J3 in Ang
+  double **K1_mag;                      // Bi exchange coeffs in eV
+  double **K1_mech;                     // mech exchange coeffs in
+  double **K2, **K3;                    // K1 in eV, K2 Ang-1, K3 in Ang
+  double **cut_spin_exchange;           // cutoff distance exchange
+
+  void allocate();
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: Incorrect args in pair_spin command
+
+Self-explanatory.
+
+E: Spin simulations require metal unit style
+
+Self-explanatory.
+
+E: Incorrect args for pair coefficients
+
+Self-explanatory.  Check the input script or data file.
+
+E: Pair spin requires atom attribute spin
+
+The atom style defined does not have these attributes.
+
+*/

From e941670f2c7ae02a22ce1617a01fa967dbeaff56 Mon Sep 17 00:00:00 2001
From: julient31 <julien.tranchida1@gmail.com>
Date: Mon, 13 Jul 2020 14:43:14 -0600
Subject: [PATCH 02/44] Commit modif biquad

---
 doc/src/fix_precession_spin.rst    |  2 +-
 src/SPIN/pair_spin_dipole_cut.cpp  |  5 +++--
 src/SPIN/pair_spin_dipole_long.cpp | 12 ++++++++----
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/doc/src/fix_precession_spin.rst b/doc/src/fix_precession_spin.rst
index 783963af72..043c5cb200 100644
--- a/doc/src/fix_precession_spin.rst
+++ b/doc/src/fix_precession_spin.rst
@@ -62,7 +62,7 @@ with:
 
 The field value in Tesla is multiplied by the gyromagnetic
 ratio, :math:`g \cdot \mu_B/\hbar`, converting it into a precession frequency in
-rad.THz (in metal units and with :math:`\mu_B = 5.788 eV/T`).
+rad.THz (in metal units and with :math:`\mu_B = 5.788\cdot 10^{-5}` eV/T).
 
 As a comparison, the figure below displays the simulation of a
 single spin (of norm :math:`\mu_i = 1.0`) submitted to an external
diff --git a/src/SPIN/pair_spin_dipole_cut.cpp b/src/SPIN/pair_spin_dipole_cut.cpp
index cdae3c0bab..e18c24bcc0 100644
--- a/src/SPIN/pair_spin_dipole_cut.cpp
+++ b/src/SPIN/pair_spin_dipole_cut.cpp
@@ -48,9 +48,10 @@ PairSpinDipoleCut::PairSpinDipoleCut(LAMMPS *lmp) : PairSpin(lmp)
 
   hbar = force->hplanck/MY_2PI;                       // eV/(rad.THz)
   mub = 9.274e-4;                             // in A.Ang^2
-  mu_0 = 785.15;                              // in eV/Ang/A^2
+  // mu_0 = 785.15;                              // in eV/Ang/A^2
+  mu_0 = 784.15;                              // in eV/Ang/A^2
   mub2mu0 = mub * mub * mu_0 / (4.0*MY_PI);   // in eV.Ang^3
-  //mub2mu0 = mub * mub * mu_0 / (4.0*MY_PI);   // in eV
+  // mub2mu0 = mub * mub * mu_0 / (4.0*MY_PI);   // in eV
   mub2mu0hbinv = mub2mu0 / hbar;              // in rad.THz
 }
 
diff --git a/src/SPIN/pair_spin_dipole_long.cpp b/src/SPIN/pair_spin_dipole_long.cpp
index aeb916cfae..5ac3b276d2 100644
--- a/src/SPIN/pair_spin_dipole_long.cpp
+++ b/src/SPIN/pair_spin_dipole_long.cpp
@@ -52,7 +52,7 @@ PairSpinDipoleLong::PairSpinDipoleLong(LAMMPS *lmp) : PairSpin(lmp)
 
   hbar = force->hplanck/MY_2PI;                 // eV/(rad.THz)
   mub = 9.274e-4;                               // in A.Ang^2
-  mu_0 = 785.15;                                // in eV/Ang/A^2
+  mu_0 = 784.15;                                // in eV/Ang/A^2
   mub2mu0 = mub * mub * mu_0 / (4.0*MY_PI);     // in eV.Ang^3
   //mub2mu0 = mub * mub * mu_0 / (4.0*MY_PI);   // in eV
   mub2mu0hbinv = mub2mu0 / hbar;                // in rad.THz
@@ -136,10 +136,11 @@ void PairSpinDipoleLong::init_style()
 
   // insure use of KSpace long-range solver, set g_ewald
 
-  if (force->kspace == NULL)
-    error->all(FLERR,"Pair style requires a KSpace style");
+  // if (force->kspace == NULL)
+  //   error->all(FLERR,"Pair style requires a KSpace style");
 
-  g_ewald = force->kspace->g_ewald;
+  // g_ewald = force->kspace->g_ewald;
+  g_ewald = 1.0;
 }
 
 /* ----------------------------------------------------------------------
@@ -220,6 +221,9 @@ void PairSpinDipoleLong::compute(int eflag, int vflag)
     memory->grow(emag,nlocal_max,"pair/spin:emag");
   }
 
+
+  printf("test gewald %g \n",g_ewald);
+
   pre1 = 2.0 * g_ewald / MY_PIS;
   pre2 = 4.0 * pow(g_ewald,3.0) / MY_PIS;
   pre3 = 8.0 * pow(g_ewald,5.0) / MY_PIS;

From 7054c82b679031845592e28b400c4b1a5d2c890f Mon Sep 17 00:00:00 2001
From: julient31 <julien.tranchida1@gmail.com>
Date: Mon, 24 Aug 2020 09:23:31 -0600
Subject: [PATCH 03/44] added BS function to pair/spin/biquadractic

---
 src/SPIN/pair_spin_exchange_biquadratic.cpp | 102 ++++++++++++++------
 1 file changed, 70 insertions(+), 32 deletions(-)

diff --git a/src/SPIN/pair_spin_exchange_biquadratic.cpp b/src/SPIN/pair_spin_exchange_biquadratic.cpp
index a7f64690af..20cea77396 100644
--- a/src/SPIN/pair_spin_exchange_biquadratic.cpp
+++ b/src/SPIN/pair_spin_exchange_biquadratic.cpp
@@ -375,15 +375,24 @@ void PairSpinExchangeBiquadratic::compute_exchange(int i, int j, double rsq,
   rk = ra/K3[itype][jtype];
   r2k = rsq/K3[itype][jtype]/K3[itype][jtype];
   ir3k = 1.0/(rk*rk*rk);
+ 
+  // BS model
+  Jex = 4.0*J1_mag[itype][jtype]*r2j;
+  Jex *= (1.0-J2[itype][jtype]*r2j);
+  Jex *= exp(-r2j);
+
+  Kex = 4.0*K1_mag[itype][jtype]*r2k;
+  Kex *= (1.0-K2[itype][jtype]*r2k);
+  Kex *= exp(-r2k);
   
   // modified Yukawa
-  Jex = (1.0-J2[itype][jtype]*r2j);
-  Jex *= J1_mag[itype][jtype]*ir3j;
-  Jex *= exp((J3[itype][jtype]-ra)/J3[itype][jtype]);
-  
-  Kex = (1.0-K2[itype][jtype]*r2k);
-  Kex *= K1_mag[itype][jtype]*ir3k;
-  Kex *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]);
+  // Jex = (1.0-J2[itype][jtype]*r2j);
+  // Jex *= J1_mag[itype][jtype]*ir3j;
+  // Jex *= exp((J3[itype][jtype]-ra)/J3[itype][jtype]);
+  // 
+  // Kex = (1.0-K2[itype][jtype]*r2k);
+  // Kex *= K1_mag[itype][jtype]*ir3k;
+  // Kex *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]);
  
   sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
 
@@ -402,28 +411,48 @@ void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j, double rsq
   int *type = atom->type;
   int itype,jtype;
   double Jex,Jex_mech,Kex,Kex_mech,ra,sdots;
-  double rj,rk,r2j,r2k,ir3j,ir3k;
+  // double rj,rk,r2j,r2k,ir3j,ir3k;
+  double rja,rka,rjr,rkr,iJ3,iK3;
   itype = type[i];
   jtype = type[j];
 
-  ra = sqrt(rsq);
-  rj = ra/J3[itype][jtype];
-  r2j = rsq/J3[itype][jtype]/J3[itype][jtype];
-  ir3j = 1.0/(rj*rj*rj);
-  rk = ra/K3[itype][jtype];
-  r2k = rsq/K3[itype][jtype]/K3[itype][jtype];
-  ir3k = 1.0/(rk*rk*rk);
+  // ra = sqrt(rsq);
+  // rj = ra/J3[itype][jtype];
+  // r2j = rsq/J3[itype][jtype]/J3[itype][jtype];
+  // ir3j = 1.0/(rj*rj*rj);
+  // rk = ra/K3[itype][jtype];
+  // r2k = rsq/K3[itype][jtype]/K3[itype][jtype];
+  // ir3k = 1.0/(rk*rk*rk);
   
-  // modified Yukawa
-  Jex_mech = J2[itype][jtype]*2.0*ra/(J3[itype][jtype]*J3[itype][jtype]);
-  Jex_mech += (3.0/ra+1.0/J3[itype][jtype])*(1.0-J2[itype][jtype]*r2j);
-  Jex_mech *= -J1_mech[itype][jtype]*ir3j;
-  Jex_mech *= exp((J3[itype][jtype]-ra)/J3[itype][jtype]);
+  Jex = J1_mech[itype][jtype];
+  iJ3 = 1.0/(J3[itype][jtype]*J3[itype][jtype]);
+  Kex = K1_mech[itype][jtype];
+  iK3 = 1.0/(K3[itype][jtype]*K3[itype][jtype]);
+  
+  rja = rsq*iJ3;
+  rjr = sqrt(rsq)*iJ3;
+  rka = rsq*iK3;
+  rkr = sqrt(rsq)*iK3;
+ 
+  // BS model
+  Jex_mech = 1.0-rja-J2[itype][jtype]*rja*(2.0-rja);
+  Jex_mech *= 8.0*Jex*rjr*exp(-rja);
+  Jex_mech *= (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
+  
+  Kex_mech = 1.0-rka-K2[itype][jtype]*rka*(2.0-rka);
+  Kex_mech *= 8.0*Kex*rkr*exp(-rka);
+  Kex_mech *= (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
 
-  Kex_mech = K2[itype][jtype]*2.0*ra/(K3[itype][jtype]*K3[itype][jtype]);
-  Kex_mech += (3.0/ra+1.0/K3[itype][jtype])*(1.0-K2[itype][jtype]*r2k);
-  Kex_mech *= -K1_mech[itype][jtype]*ir3k;
-  Kex_mech *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]);
+  // modified Yukawa
+  // Jex_mech = J2[itype][jtype]*2.0*ra/(J3[itype][jtype]*J3[itype][jtype]);
+  // Jex_mech += (3.0/ra+1.0/J3[itype][jtype])*(1.0-J2[itype][jtype]*r2j);
+  // Jex_mech *= -J1_mech[itype][jtype]*ir3j;
+  // Jex_mech *= exp((J3[itype][jtype]-ra)/J3[itype][jtype]);
+
+  // Kex_mech = K2[itype][jtype]*2.0*ra/(K3[itype][jtype]*K3[itype][jtype]);
+  // Kex_mech += (3.0/ra+1.0/K3[itype][jtype])*(1.0-K2[itype][jtype]*r2k);
+  // Kex_mech *= -K1_mech[itype][jtype]*ir3k;
+  // Kex_mech *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]);
   
   sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
 
@@ -454,15 +483,24 @@ double PairSpinExchangeBiquadratic::compute_energy(int i, int j, double rsq,
   rk = ra/K3[itype][jtype];
   r2k = rsq/K3[itype][jtype]/K3[itype][jtype];
   ir3k = 1.0/(rk*rk*rk);
-  
+ 
+  // BS model 
+  Jex = 4.0*J1_mech[itype][jtype]*r2j;
+  Jex *= (1.0-J2[itype][jtype]*r2j);
+  Jex *= exp(-r2j);
+
+  Kex = 4.0*K1_mech[itype][jtype]*r2k;
+  Kex *= (1.0-K2[itype][jtype]*r2k);
+  Kex *= exp(-r2k);
+
   // modified Yukawa
-  Jex = (1.0-J2[itype][jtype]*r2j);
-  Jex *= J1_mech[itype][jtype]*ir3j;
-  Jex *= exp((J3[itype][jtype]-ra)/J3[itype][jtype]);
-  
-  Kex = (1.0-K2[itype][jtype]*r2k);
-  Kex *= K1_mech[itype][jtype]*ir3k;
-  Kex *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]);
+  // Jex = (1.0-J2[itype][jtype]*r2j);
+  // Jex *= J1_mech[itype][jtype]*ir3j;
+  // Jex *= exp((J3[itype][jtype]-ra)/J3[itype][jtype]);
+  // 
+  // Kex = (1.0-K2[itype][jtype]*r2k);
+  // Kex *= K1_mech[itype][jtype]*ir3k;
+  // Kex *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]);
 
   sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);  
 

From 901fe9d3aa494f1ec92867e4fbc1a26e18222c99 Mon Sep 17 00:00:00 2001
From: julient31 <julien.tranchida1@gmail.com>
Date: Tue, 15 Sep 2020 18:22:11 -0600
Subject: [PATCH 04/44] modification of pair spin exchange/biquadratic, to
 offset ground state spin pressure

---
 src/SPIN/compute_spin.cpp                   | 37 +++++++++++++++++++--
 src/SPIN/pair_spin_exchange_biquadratic.cpp |  9 +++--
 2 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/src/SPIN/compute_spin.cpp b/src/SPIN/compute_spin.cpp
index 94eff27f53..ca3c40e11a 100644
--- a/src/SPIN/compute_spin.cpp
+++ b/src/SPIN/compute_spin.cpp
@@ -46,6 +46,7 @@ ComputeSpin::ComputeSpin(LAMMPS *lmp, int narg, char **arg) :
   if ((narg != 3) && (narg != 4)) error->all(FLERR,"Illegal compute compute/spin command");
 
   vector_flag = 1;
+  // size_vector = 7;
   size_vector = 6;
   extvector = 0;
 
@@ -148,15 +149,19 @@ void ComputeSpin::compute_vector()
   int i;
   int countsp, countsptot;
   double mag[4], magtot[4];
+  double m2, m2tot;
+  double m4, m4tot;
   double magenergy, magenergytot;
   double tempnum, tempnumtot;
   double tempdenom, tempdenomtot;
-  double spintemperature;
+  double spintemperature,binder;
 
   invoked_vector = update->ntimestep;
 
   countsp = countsptot = 0.0;
   mag[0] = mag[1] = mag[2] = mag[3] = 0.0;
+  // m2 = m2tot = 0.0;
+  // m4 = m4tot = 0.0;
   magtot[0] = magtot[1] = magtot[2] = magtot[3] = 0.0;
   magenergy = magenergytot = 0.0;
   tempnum = tempnumtot = 0.0;
@@ -176,10 +181,25 @@ void ComputeSpin::compute_vector()
   for (i = 0; i < nlocal; i++) {
     if (mask[i] & groupbit) {
       if (atom->sp_flag) {
+        
+        // compute first moment
+
         mag[0] += sp[i][0];
         mag[1] += sp[i][1];
         mag[2] += sp[i][2];
 
+        // compute second moment
+        
+        // m2 += sp[i][0]*sp[i][0];
+        // m2 += sp[i][1]*sp[i][1];
+        // m2 += sp[i][2]*sp[i][2];
+
+        // compute fourth moment
+        
+        // m4 += sp[i][0]*sp[i][0]*sp[i][0]*sp[i][0];
+        // m4 += sp[i][1]*sp[i][1]*sp[i][1]*sp[i][1];
+        // m4 += sp[i][2]*sp[i][2]*sp[i][2]*sp[i][2];
+
         // update magnetic precession energies
 
         if (precession_spin_flag) {
@@ -206,26 +226,39 @@ void ComputeSpin::compute_vector()
   }
 
   MPI_Allreduce(mag,magtot,4,MPI_DOUBLE,MPI_SUM,world);
+  // MPI_Allreduce(&m2,&m2tot,1,MPI_DOUBLE,MPI_SUM,world);
+  // MPI_Allreduce(&m4,&m4tot,1,MPI_DOUBLE,MPI_SUM,world);
   MPI_Allreduce(&magenergy,&magenergytot,1,MPI_DOUBLE,MPI_SUM,world);
   MPI_Allreduce(&tempnum,&tempnumtot,1,MPI_DOUBLE,MPI_SUM,world);
   MPI_Allreduce(&tempdenom,&tempdenomtot,1,MPI_DOUBLE,MPI_SUM,world);
   MPI_Allreduce(&countsp,&countsptot,1,MPI_INT,MPI_SUM,world);
 
+  // compute average magnetization
+
   double scale = 1.0/countsptot;
   magtot[0] *= scale;
   magtot[1] *= scale;
   magtot[2] *= scale;
   magtot[3] = sqrt((magtot[0]*magtot[0])+(magtot[1]*magtot[1])+(magtot[2]*magtot[2]));
+  
+  // compute spin temperature
+  
   spintemperature = hbar*tempnumtot;
   spintemperature /= (2.0*kb*tempdenomtot);
 
+  // compute Binder cumulant
+
+  // m2tot *= scale;
+  // m4tot *= scale;
+  // binder = 1.0 - m4tot/(3.0*m2tot*m2tot);
+
   vector[0] = magtot[0];
   vector[1] = magtot[1];
   vector[2] = magtot[2];
   vector[3] = magtot[3];
   vector[4] = magenergytot;
   vector[5] = spintemperature;
-
+  // vector[6] = binder;
 }
 
 /* ----------------------------------------------------------------------
diff --git a/src/SPIN/pair_spin_exchange_biquadratic.cpp b/src/SPIN/pair_spin_exchange_biquadratic.cpp
index 20cea77396..812ccf40ab 100644
--- a/src/SPIN/pair_spin_exchange_biquadratic.cpp
+++ b/src/SPIN/pair_spin_exchange_biquadratic.cpp
@@ -454,11 +454,15 @@ void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j, double rsq
   // Kex_mech *= -K1_mech[itype][jtype]*ir3k;
   // Kex_mech *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]);
   
-  sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
+  // sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
+  sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2] - 1.0);
 
   fi[0] -= (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[0];
   fi[1] -= (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[1];
   fi[2] -= (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[2];
+  // fi[0] += (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[0];
+  // fi[1] += (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[1];
+  // fi[2] += (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[2];
 }
 
 /* ----------------------------------------------------------------------
@@ -502,7 +506,8 @@ double PairSpinExchangeBiquadratic::compute_energy(int i, int j, double rsq,
   // Kex *= K1_mech[itype][jtype]*ir3k;
   // Kex *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]);
 
-  sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);  
+  // sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);  
+  sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2] - 1.0);  
 
   energy = 0.5*(Jex*sdots + Kex*sdots*sdots);
   return energy;

From 7d5109454f02bde06f625065e18f4506701446ac Mon Sep 17 00:00:00 2001
From: julient31 <julien.tranchida1@gmail.com>
Date: Tue, 15 Sep 2020 20:16:48 -0600
Subject: [PATCH 05/44] correcting small issue with offset of biquadratic
 exchange

---
 src/SPIN/pair_spin_exchange_biquadratic.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/SPIN/pair_spin_exchange_biquadratic.cpp b/src/SPIN/pair_spin_exchange_biquadratic.cpp
index 812ccf40ab..61b3df70ce 100644
--- a/src/SPIN/pair_spin_exchange_biquadratic.cpp
+++ b/src/SPIN/pair_spin_exchange_biquadratic.cpp
@@ -455,11 +455,11 @@ void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j, double rsq
   // Kex_mech *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]);
   
   // sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
-  sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2] - 1.0);
+  sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
 
-  fi[0] -= (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[0];
-  fi[1] -= (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[1];
-  fi[2] -= (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[2];
+  fi[0] -= (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[0];
+  fi[1] -= (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[1];
+  fi[2] -= (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[2];
   // fi[0] += (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[0];
   // fi[1] += (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[1];
   // fi[2] += (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[2];
@@ -507,9 +507,9 @@ double PairSpinExchangeBiquadratic::compute_energy(int i, int j, double rsq,
   // Kex *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]);
 
   // sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);  
-  sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2] - 1.0);  
+  sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);  
 
-  energy = 0.5*(Jex*sdots + Kex*sdots*sdots);
+  energy = 0.5*(Jex*(sdots-1.0) + Kex*(sdots*sdots-1.0));
   return energy;
 }
 

From 9aba7b00505e3d33771d308b4253f310cad9297e Mon Sep 17 00:00:00 2001
From: julient31 <julien.tranchida1@gmail.com>
Date: Mon, 28 Sep 2020 15:42:26 -0600
Subject: [PATCH 06/44] adding a kokkos/spin atom style

---
 src/KOKKOS/kokkos_type.h                    |  60 +++++++++++
 src/SPIN/compute_spin.cpp                   |  24 -----
 src/SPIN/pair_spin_exchange.cpp             |  41 ++++----
 src/SPIN/pair_spin_exchange_biquadratic.cpp | 107 ++++++--------------
 src/SPIN/pair_spin_neel.cpp                 |   8 +-
 5 files changed, 117 insertions(+), 123 deletions(-)

diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h
index c8fccaf409..a3ebe4f030 100644
--- a/src/KOKKOS/kokkos_type.h
+++ b/src/KOKKOS/kokkos_type.h
@@ -714,6 +714,39 @@ typedef tdual_virial_array::t_dev_um t_virial_array_um;
 typedef tdual_virial_array::t_dev_const_um t_virial_array_const_um;
 typedef tdual_virial_array::t_dev_const_randomread t_virial_array_randomread;
 
+// Spin Types
+
+//3d SP_FLOAT array n*4
+#ifdef LMP_KOKKOS_NO_LEGACY
+typedef Kokkos::DualView<X_FLOAT*[4], Kokkos::LayoutLeft, LMPDeviceType> tdual_sp_array;
+#else
+typedef Kokkos::DualView<X_FLOAT*[4], Kokkos::LayoutRight, LMPDeviceType> tdual_sp_array;
+#endif
+typedef tdual_sp_array::t_dev t_sp_array;
+typedef tdual_sp_array::t_dev_const t_sp_array_const;
+typedef tdual_sp_array::t_dev_um t_sp_array_um;
+typedef tdual_sp_array::t_dev_const_um t_sp_array_const_um;
+typedef tdual_sp_array::t_dev_const_randomread t_sp_array_randomread;
+
+//3d FM_FLOAT array n*3
+
+typedef Kokkos::DualView<F_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_fm_array;
+typedef tdual_fm_array::t_dev t_fm_array;
+typedef tdual_fm_array::t_dev_const t_fm_array_const;
+typedef tdual_fm_array::t_dev_um t_fm_array_um;
+typedef tdual_fm_array::t_dev_const_um t_fm_array_const_um;
+typedef tdual_fm_array::t_dev_const_randomread t_fm_array_randomread;
+
+//3d FML_FLOAT array n*3
+
+typedef Kokkos::DualView<F_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_fm_long_array;
+typedef tdual_fm_long_array::t_dev t_fm_long_array;
+typedef tdual_fm_long_array::t_dev_const t_fm_long_array_const;
+typedef tdual_fm_long_array::t_dev_um t_fm_long_array_um;
+typedef tdual_fm_long_array::t_dev_const_um t_fm_long_array_const_um;
+typedef tdual_fm_long_array::t_dev_const_randomread t_fm_long_array_randomread;
+
+
 //Energy Types
 //1d E_FLOAT array n
 
@@ -950,6 +983,33 @@ typedef tdual_virial_array::t_host_um t_virial_array_um;
 typedef tdual_virial_array::t_host_const_um t_virial_array_const_um;
 typedef tdual_virial_array::t_host_const_randomread t_virial_array_randomread;
 
+// Spin types
+
+//2d X_FLOAT array n*3
+typedef Kokkos::DualView<X_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_sp_array;
+typedef tdual_sp_array::t_host t_sp_array;
+typedef tdual_sp_array::t_host_const t_sp_array_const;
+typedef tdual_sp_array::t_host_um t_sp_array_um;
+typedef tdual_sp_array::t_host_const_um t_sp_array_const_um;
+typedef tdual_sp_array::t_host_const_randomread t_sp_array_randomread;
+
+//2d F_FLOAT array n*3
+typedef Kokkos::DualView<F_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_fm_array;
+//typedef Kokkos::DualView<F_FLOAT*[3], LMPDeviceType::array_layout, LMPDeviceType> tdual_f_array;
+typedef tdual_fm_array::t_host t_fm_array;
+typedef tdual_fm_array::t_host_const t_fm_array_const;
+typedef tdual_fm_array::t_host_um t_fm_array_um;
+typedef tdual_fm_array::t_host_const_um t_fm_array_const_um;
+typedef tdual_fm_array::t_host_const_randomread t_fm_array_randomread;
+
+//2d F_FLOAT array n*3
+typedef Kokkos::DualView<F_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_fm_long_array;
+//typedef Kokkos::DualView<F_FLOAT*[3], LMPDeviceType::array_layout, LMPDeviceType> tdual_f_array;
+typedef tdual_fm_long_array::t_host t_fm_long_array;
+typedef tdual_fm_long_array::t_host_const t_fm_long_array_const;
+typedef tdual_fm_long_array::t_host_um t_fm_long_array_um;
+typedef tdual_fm_long_array::t_host_const_um t_fm_long_array_const_um;
+typedef tdual_fm_long_array::t_host_const_randomread t_fm_long_array_randomread;
 
 
 //Energy Types
diff --git a/src/SPIN/compute_spin.cpp b/src/SPIN/compute_spin.cpp
index ca3c40e11a..5edfb04645 100644
--- a/src/SPIN/compute_spin.cpp
+++ b/src/SPIN/compute_spin.cpp
@@ -46,7 +46,6 @@ ComputeSpin::ComputeSpin(LAMMPS *lmp, int narg, char **arg) :
   if ((narg != 3) && (narg != 4)) error->all(FLERR,"Illegal compute compute/spin command");
 
   vector_flag = 1;
-  // size_vector = 7;
   size_vector = 6;
   extvector = 0;
 
@@ -160,8 +159,6 @@ void ComputeSpin::compute_vector()
 
   countsp = countsptot = 0.0;
   mag[0] = mag[1] = mag[2] = mag[3] = 0.0;
-  // m2 = m2tot = 0.0;
-  // m4 = m4tot = 0.0;
   magtot[0] = magtot[1] = magtot[2] = magtot[3] = 0.0;
   magenergy = magenergytot = 0.0;
   tempnum = tempnumtot = 0.0;
@@ -188,18 +185,6 @@ void ComputeSpin::compute_vector()
         mag[1] += sp[i][1];
         mag[2] += sp[i][2];
 
-        // compute second moment
-        
-        // m2 += sp[i][0]*sp[i][0];
-        // m2 += sp[i][1]*sp[i][1];
-        // m2 += sp[i][2]*sp[i][2];
-
-        // compute fourth moment
-        
-        // m4 += sp[i][0]*sp[i][0]*sp[i][0]*sp[i][0];
-        // m4 += sp[i][1]*sp[i][1]*sp[i][1]*sp[i][1];
-        // m4 += sp[i][2]*sp[i][2]*sp[i][2]*sp[i][2];
-
         // update magnetic precession energies
 
         if (precession_spin_flag) {
@@ -226,8 +211,6 @@ void ComputeSpin::compute_vector()
   }
 
   MPI_Allreduce(mag,magtot,4,MPI_DOUBLE,MPI_SUM,world);
-  // MPI_Allreduce(&m2,&m2tot,1,MPI_DOUBLE,MPI_SUM,world);
-  // MPI_Allreduce(&m4,&m4tot,1,MPI_DOUBLE,MPI_SUM,world);
   MPI_Allreduce(&magenergy,&magenergytot,1,MPI_DOUBLE,MPI_SUM,world);
   MPI_Allreduce(&tempnum,&tempnumtot,1,MPI_DOUBLE,MPI_SUM,world);
   MPI_Allreduce(&tempdenom,&tempdenomtot,1,MPI_DOUBLE,MPI_SUM,world);
@@ -246,19 +229,12 @@ void ComputeSpin::compute_vector()
   spintemperature = hbar*tempnumtot;
   spintemperature /= (2.0*kb*tempdenomtot);
 
-  // compute Binder cumulant
-
-  // m2tot *= scale;
-  // m4tot *= scale;
-  // binder = 1.0 - m4tot/(3.0*m2tot*m2tot);
-
   vector[0] = magtot[0];
   vector[1] = magtot[1];
   vector[2] = magtot[2];
   vector[3] = magtot[3];
   vector[4] = magenergytot;
   vector[5] = spintemperature;
-  // vector[6] = binder;
 }
 
 /* ----------------------------------------------------------------------
diff --git a/src/SPIN/pair_spin_exchange.cpp b/src/SPIN/pair_spin_exchange.cpp
index b23f4fa0cb..611230c73e 100644
--- a/src/SPIN/pair_spin_exchange.cpp
+++ b/src/SPIN/pair_spin_exchange.cpp
@@ -240,28 +240,26 @@ void PairSpinExchange::compute(int eflag, int vflag)
           evdwl *= 0.5*hbar;
           emag[i] += evdwl;
         } else evdwl = 0.0;
+
+        f[i][0] += fi[0];
+        f[i][1] += fi[1];
+        f[i][2] += fi[2];
+        if (newton_pair || j < nlocal) {
+          f[j][0] -= fi[0];
+          f[j][1] -= fi[1];
+          f[j][2] -= fi[2];
+        }
+        fm[i][0] += fmi[0];
+        fm[i][1] += fmi[1];
+        fm[i][2] += fmi[2];
+
+        if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
+            evdwl,ecoul,fi[0],fi[1],fi[2],delx,dely,delz);
       }
-
-      f[i][0] += fi[0];
-      f[i][1] += fi[1];
-      f[i][2] += fi[2];
-      fm[i][0] += fmi[0];
-      fm[i][1] += fmi[1];
-      fm[i][2] += fmi[2];
-
-      // if (eflag) {
-      //   evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]);
-      //   evdwl *= 0.5*hbar;
-      //   emag[i] += evdwl;
-      // } else evdwl = 0.0;
-
-      if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
-          evdwl,ecoul,fi[0],fi[1],fi[2],delx,dely,delz);
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
-
 }
 
 /* ----------------------------------------------------------------------
@@ -389,9 +387,12 @@ void PairSpinExchange::compute_exchange_mech(int i, int j, double rsq,
   Jex_mech *= 8.0*Jex*rr*exp(-ra);
   Jex_mech *= (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
 
-  fi[0] -= Jex_mech*eij[0];
-  fi[1] -= Jex_mech*eij[1];
-  fi[2] -= Jex_mech*eij[2];
+  fi[0] -= 0.5*Jex_mech*eij[0];
+  fi[1] -= 0.5*Jex_mech*eij[1];
+  fi[2] -= 0.5*Jex_mech*eij[2];
+  // fi[0] -= Jex_mech*eij[0];
+  // fi[1] -= Jex_mech*eij[1];
+  // fi[2] -= Jex_mech*eij[2];
 }
 
 /* ----------------------------------------------------------------------
diff --git a/src/SPIN/pair_spin_exchange_biquadratic.cpp b/src/SPIN/pair_spin_exchange_biquadratic.cpp
index 61b3df70ce..cf351e6539 100644
--- a/src/SPIN/pair_spin_exchange_biquadratic.cpp
+++ b/src/SPIN/pair_spin_exchange_biquadratic.cpp
@@ -246,6 +246,7 @@ void PairSpinExchangeBiquadratic::compute(int eflag, int vflag)
 
       if (rsq <= local_cut2) {
         compute_exchange(i,j,rsq,fmi,spi,spj);
+        
         if (lattice_flag)
           compute_exchange_mech(i,j,rsq,eij,fi,spi,spj);
       
@@ -253,22 +254,26 @@ void PairSpinExchangeBiquadratic::compute(int eflag, int vflag)
           evdwl -= compute_energy(i,j,rsq,spi,spj);
           emag[i] += evdwl;
         } else evdwl = 0.0;
+
+        f[i][0] += fi[0];
+        f[i][1] += fi[1];
+        f[i][2] += fi[2];
+        if (newton_pair || j < nlocal) {
+          f[j][0] -= fi[0];
+          f[j][1] -= fi[1];
+          f[j][2] -= fi[2];
+        }
+        fm[i][0] += fmi[0];
+        fm[i][1] += fmi[1];
+        fm[i][2] += fmi[2];
+
+        if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
+            evdwl,ecoul,fi[0],fi[1],fi[2],delx,dely,delz);
       }
-
-      f[i][0] += fi[0];
-      f[i][1] += fi[1];
-      f[i][2] += fi[2];
-      fm[i][0] += fmi[0];
-      fm[i][1] += fmi[1];
-      fm[i][2] += fmi[2];
-
-      if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
-          evdwl,ecoul,fi[0],fi[1],fi[2],delx,dely,delz);
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
-
 }
 
 /* ----------------------------------------------------------------------
@@ -363,20 +368,13 @@ void PairSpinExchangeBiquadratic::compute_exchange(int i, int j, double rsq,
 {
   int *type = atom->type;
   int itype,jtype;
-  double Jex,Kex,ra,sdots;
-  double rj,rk,r2j,r2k,ir3j,ir3k;
+  double Jex,Kex,r2j,r2k,sdots;
   itype = type[i];
   jtype = type[j];
 
-  ra = sqrt(rsq);
-  rj = ra/J3[itype][jtype];
   r2j = rsq/J3[itype][jtype]/J3[itype][jtype];
-  ir3j = 1.0/(rj*rj*rj);
-  rk = ra/K3[itype][jtype];
-  r2k = rsq/K3[itype][jtype]/K3[itype][jtype];
-  ir3k = 1.0/(rk*rk*rk);
+  r2k = rsq/J3[itype][jtype]/J3[itype][jtype];
  
-  // BS model
   Jex = 4.0*J1_mag[itype][jtype]*r2j;
   Jex *= (1.0-J2[itype][jtype]*r2j);
   Jex *= exp(-r2j);
@@ -385,45 +383,27 @@ void PairSpinExchangeBiquadratic::compute_exchange(int i, int j, double rsq,
   Kex *= (1.0-K2[itype][jtype]*r2k);
   Kex *= exp(-r2k);
   
-  // modified Yukawa
-  // Jex = (1.0-J2[itype][jtype]*r2j);
-  // Jex *= J1_mag[itype][jtype]*ir3j;
-  // Jex *= exp((J3[itype][jtype]-ra)/J3[itype][jtype]);
-  // 
-  // Kex = (1.0-K2[itype][jtype]*r2k);
-  // Kex *= K1_mag[itype][jtype]*ir3k;
-  // Kex *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]);
- 
   sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
 
-  fmi[0] += Jex*spj[0] + 2.0*Kex*spj[0]*sdots;
-  fmi[1] += Jex*spj[1] + 2.0*Kex*spj[1]*sdots;
-  fmi[2] += Jex*spj[2] + 2.0*Kex*spj[2]*sdots;
+  fmi[0] += (Jex*spj[0] + 2.0*Kex*spj[0]*sdots);
+  fmi[1] += (Jex*spj[1] + 2.0*Kex*spj[1]*sdots);
+  fmi[2] += (Jex*spj[2] + 2.0*Kex*spj[2]*sdots);
 }
 
 /* ----------------------------------------------------------------------
    compute the mechanical force due to the exchange interaction between atom i and atom j
 ------------------------------------------------------------------------- */
 
-void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j, double rsq, 
-    double eij[3], double fi[3],  double spi[3], double spj[3])
+void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j, 
+    double rsq, double eij[3], double fi[3],  double spi[3], double spj[3])
 {
   int *type = atom->type;
   int itype,jtype;
   double Jex,Jex_mech,Kex,Kex_mech,ra,sdots;
-  // double rj,rk,r2j,r2k,ir3j,ir3k;
   double rja,rka,rjr,rkr,iJ3,iK3;
   itype = type[i];
   jtype = type[j];
 
-  // ra = sqrt(rsq);
-  // rj = ra/J3[itype][jtype];
-  // r2j = rsq/J3[itype][jtype]/J3[itype][jtype];
-  // ir3j = 1.0/(rj*rj*rj);
-  // rk = ra/K3[itype][jtype];
-  // r2k = rsq/K3[itype][jtype]/K3[itype][jtype];
-  // ir3k = 1.0/(rk*rk*rk);
-  
   Jex = J1_mech[itype][jtype];
   iJ3 = 1.0/(J3[itype][jtype]*J3[itype][jtype]);
   Kex = K1_mech[itype][jtype];
@@ -434,35 +414,22 @@ void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j, double rsq
   rka = rsq*iK3;
   rkr = sqrt(rsq)*iK3;
  
-  // BS model
   Jex_mech = 1.0-rja-J2[itype][jtype]*rja*(2.0-rja);
   Jex_mech *= 8.0*Jex*rjr*exp(-rja);
-  Jex_mech *= (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
+  // Jex_mech *= (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
   
   Kex_mech = 1.0-rka-K2[itype][jtype]*rka*(2.0-rka);
   Kex_mech *= 8.0*Kex*rkr*exp(-rka);
-  Kex_mech *= (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
+  // Kex_mech *= (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
 
-  // modified Yukawa
-  // Jex_mech = J2[itype][jtype]*2.0*ra/(J3[itype][jtype]*J3[itype][jtype]);
-  // Jex_mech += (3.0/ra+1.0/J3[itype][jtype])*(1.0-J2[itype][jtype]*r2j);
-  // Jex_mech *= -J1_mech[itype][jtype]*ir3j;
-  // Jex_mech *= exp((J3[itype][jtype]-ra)/J3[itype][jtype]);
-
-  // Kex_mech = K2[itype][jtype]*2.0*ra/(K3[itype][jtype]*K3[itype][jtype]);
-  // Kex_mech += (3.0/ra+1.0/K3[itype][jtype])*(1.0-K2[itype][jtype]*r2k);
-  // Kex_mech *= -K1_mech[itype][jtype]*ir3k;
-  // Kex_mech *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]);
-  
-  // sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
   sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
 
-  fi[0] -= (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[0];
-  fi[1] -= (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[1];
-  fi[2] -= (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[2];
-  // fi[0] += (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[0];
-  // fi[1] += (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[1];
-  // fi[2] += (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[2];
+  fi[0] -= 0.5*(Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[0];
+  fi[1] -= 0.5*(Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[1];
+  fi[2] -= 0.5*(Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[2];
+  // fi[0] -= (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[0];
+  // fi[1] -= (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[1];
+  // fi[2] -= (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[2];
 }
 
 /* ----------------------------------------------------------------------
@@ -488,7 +455,6 @@ double PairSpinExchangeBiquadratic::compute_energy(int i, int j, double rsq,
   r2k = rsq/K3[itype][jtype]/K3[itype][jtype];
   ir3k = 1.0/(rk*rk*rk);
  
-  // BS model 
   Jex = 4.0*J1_mech[itype][jtype]*r2j;
   Jex *= (1.0-J2[itype][jtype]*r2j);
   Jex *= exp(-r2j);
@@ -497,19 +463,10 @@ double PairSpinExchangeBiquadratic::compute_energy(int i, int j, double rsq,
   Kex *= (1.0-K2[itype][jtype]*r2k);
   Kex *= exp(-r2k);
 
-  // modified Yukawa
-  // Jex = (1.0-J2[itype][jtype]*r2j);
-  // Jex *= J1_mech[itype][jtype]*ir3j;
-  // Jex *= exp((J3[itype][jtype]-ra)/J3[itype][jtype]);
-  // 
-  // Kex = (1.0-K2[itype][jtype]*r2k);
-  // Kex *= K1_mech[itype][jtype]*ir3k;
-  // Kex *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]);
-
-  // sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);  
   sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);  
 
   energy = 0.5*(Jex*(sdots-1.0) + Kex*(sdots*sdots-1.0));
+  // energy = 0.5*(Jex*(sdots) + Kex*(sdots*sdots-1.0));
   return energy;
 }
 
diff --git a/src/SPIN/pair_spin_neel.cpp b/src/SPIN/pair_spin_neel.cpp
index 4fd8ecc215..fc7cb6ab9a 100644
--- a/src/SPIN/pair_spin_neel.cpp
+++ b/src/SPIN/pair_spin_neel.cpp
@@ -262,8 +262,8 @@ void PairSpinNeel::compute(int eflag, int vflag)
       fm[i][2] += fmi[2];
 
       if (eflag) {
-        evdwl = compute_neel_energy(i,j,rsq,eij,spi,spj);
-        evdwl *= 0.5*hbar;
+        evdwl -= compute_neel_energy(i,j,rsq,eij,spi,spj);
+        // evdwl *= 0.5*hbar;
         emag[i] += evdwl;
       } else evdwl = 0.0;
 
@@ -588,12 +588,12 @@ double PairSpinNeel::compute_neel_energy(int i, int j, double rsq, double eij[3]
   // compute Neel's functions
 
   ra = rsq/g3[itype][jtype]/g3[itype][jtype];
-  gr = 4.0*g1[itype][jtype]*ra;
+  gr = 4.0*g1_mech[itype][jtype]*ra;
   gr *= (1.0-g2[itype][jtype]*ra);
   gr *= exp(-ra);
 
   ra = rsq/q3[itype][jtype]/q3[itype][jtype];
-  qr = 4.0*q1[itype][jtype]*ra;
+  qr = 4.0*q1_mech[itype][jtype]*ra;
   qr *= (1.0-q2[itype][jtype]*ra);
   qr *= exp(-ra);
 

From f0729551ae3798edccd44521cbf015e3d5d19fb7 Mon Sep 17 00:00:00 2001
From: julient31 <julien.tranchida1@gmail.com>
Date: Mon, 28 Sep 2020 16:54:10 -0600
Subject: [PATCH 07/44] adding for good new kokkos spin style

---
 src/KOKKOS/atom_vec_spin_kokkos.cpp | 1297 +++++++++++++++++++++++++++
 src/KOKKOS/atom_vec_spin_kokkos.h   |  132 +++
 2 files changed, 1429 insertions(+)
 create mode 100644 src/KOKKOS/atom_vec_spin_kokkos.cpp
 create mode 100644 src/KOKKOS/atom_vec_spin_kokkos.h

diff --git a/src/KOKKOS/atom_vec_spin_kokkos.cpp b/src/KOKKOS/atom_vec_spin_kokkos.cpp
new file mode 100644
index 0000000000..8a7dd3317c
--- /dev/null
+++ b/src/KOKKOS/atom_vec_spin_kokkos.cpp
@@ -0,0 +1,1297 @@
+/* ----------------------------------------------------------------------
+
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+
+------------------------------------------------------------------------- */
+
+/* ------------------------------------------------------------------------
+   Contributing authors: Julien Tranchida (SNL)
+                         Aidan Thompson (SNL)
+
+   Please cite the related publication:
+   Tranchida, J., Plimpton, S. J., Thibaudeau, P., & Thompson, A. P. (2018).
+   Massively parallel symplectic algorithm for coupled magnetic spin dynamics
+   and molecular dynamics. Journal of Computational Physics.
+------------------------------------------------------------------------- */
+
+#include "atom_vec_spin_kokkos.h"
+#include <cmath>
+#include <cstring>
+#include "atom_kokkos.h"
+#include "comm_kokkos.h"
+#include "domain.h"
+#include "error.h"
+#include "fix.h"
+#include "memory_kokkos.h"
+#include "modify.h"
+#include "utils.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+AtomVecSpinKokkos::AtomVecSpinKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
+{
+  molecular = 0;
+  mass_type = 1;
+  forceclearflag = 1;
+
+  comm_x_only = comm_f_only = 0;
+  size_forward = 7;
+  size_reverse = 9;
+  size_border = 10;
+  size_velocity = 3;
+  size_data_atom = 9;
+  size_data_vel = 4;
+  xcol_data = 4;
+
+  atom->sp_flag = 1;
+  
+  k_count = DAT::tdual_int_1d("atom::k_count",1);
+  atomKK = (AtomKokkos *) atom;
+  commKK = (CommKokkos *) comm;
+}
+
+/* ----------------------------------------------------------------------
+   grow atom arrays
+   n = 0 grows arrays by a chunk
+   n > 0 allocates arrays to size n
+------------------------------------------------------------------------- */
+
+void AtomVecSpinKokkos::grow(int n)
+{
+  int step = MAX(DELTA,nmax*0.01);
+  if (n == 0) nmax += step;
+  else nmax = n;
+  atomKK->nmax = nmax;
+  if (nmax < 0 || nmax > MAXSMALLINT)
+    error->one(FLERR,"Per-processor system is too big");
+
+  atomKK->sync(Device,ALL_MASK);
+  atomKK->modified(Device,ALL_MASK);
+
+  memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
+  memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
+  memoryKK->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask");
+  memoryKK->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image");
+   
+  // allocating mech. quantities
+
+  memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,"atom:x");
+  memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,"atom:v");
+  memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,"atom:f");
+  
+  // allocating mag. quantities
+
+  memoryKK->grow_kokkos(atomKK->k_sp,atomKK->sp,nmax,"atom:sp");
+  memoryKK->grow_kokkos(atomKK->k_fm,atomKK->fm,nmax,"atom:fm");
+  memoryKK->grow_kokkos(atomKK->k_fm_long,atomKK->fm_long,nmax,"atom:fm_long");
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax);
+}
+
+/* ----------------------------------------------------------------------
+   reset local array ptrs
+------------------------------------------------------------------------- */
+
+void AtomVecSpinKokkos::grow_reset()
+{
+  tag = atomKK->tag;
+  d_tag = atomKK->k_tag.d_view;
+  h_tag = atomKK->k_tag.h_view;
+
+  type = atomKK->type;
+  d_type = atomKK->k_type.d_view;
+  h_type = atomKK->k_type.h_view;
+  mask = atomKK->mask;
+  d_mask = atomKK->k_mask.d_view;
+  h_mask = atomKK->k_mask.h_view;
+  image = atomKK->image;
+  d_image = atomKK->k_image.d_view;
+  h_image = atomKK->k_image.h_view;
+  
+  x = atomKK->x;
+  d_x = atomKK->k_x.d_view;
+  h_x = atomKK->k_x.h_view;
+  v = atomKK->v;
+  d_v = atomKK->k_v.d_view;
+  h_v = atomKK->k_v.h_view;
+  f = atomKK->f;
+  d_f = atomKK->k_f.d_view;
+  h_f = atomKK->k_f.h_view;
+  
+  sp = atomKK->sp; 
+  d_sp = atomKK->k_sp.d_view;
+  h_sp = atomKK->k_sp.h_view;
+  fm = atom->fm; 
+  d_fm = atomKK->k_fm.d_view;
+  h_fm = atomKK->k_fm.h_view;
+  fm_long = atom->fm_long;
+  d_fm_long = atomKK->k_fm_long.d_view;
+  h_fm_long = atomKK->k_fm_long.h_view;
+}
+
+/* ----------------------------------------------------------------------
+   copy atom I info to atom J
+------------------------------------------------------------------------- */
+
+void AtomVecSpinKokkos::copy(int i, int j, int delflag)
+{
+  h_tag[j] = h_tag[i];
+  h_type[j] = h_type[i];
+  mask[j] = mask[i];
+  h_image[j] = h_image[i];
+  h_x(j,0) = h_x(i,0);
+  h_x(j,1) = h_x(i,1);
+  h_x(j,2) = h_x(i,2);
+  h_v(j,0) = h_v(i,0);
+  h_v(j,1) = h_v(i,1);
+  h_v(j,2) = h_v(i,2);
+
+  h_sp(j,0) = h_sp(i,0)
+  h_sp(j,1) = h_sp(i,1)
+  h_sp(j,2) = h_sp(i,2)
+  h_sp(j,3) = h_sp(i,3)
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG,int TRICLINIC>
+struct AtomVecSpinKokkos_PackComm {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  typename ArrayTypes<DeviceType>::t_sp_array_randomread _sp;
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
+  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
+  X_FLOAT _pbc[6];
+  
+  AtomVecSpinKokkos_PackComm(
+      const typename DAT::tdual_x_array &x,
+      const typename DAT::tdual_sp_array &sp,
+      const typename DAT::tdual_xfloat_2d &buf,
+      const typename DAT::tdual_int_2d &list,
+      const int & iswap,
+      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
+      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
+      _x(x.view<DeviceType>()),_sp(sp.view<DeviceType>()),
+      _list(list.view<DeviceType>()),_iswap(iswap),
+      _xprd(xprd),_yprd(yprd),_zprd(zprd),
+      _xy(xy),_xz(xz),_yz(yz) {
+        const size_t maxsend = (buf.view<DeviceType>().extent(0)*buf.view<DeviceType>().extent(1))/3;
+        // const size_t elements = 3;
+        const size_t elements = 7;
+        buffer_view<DeviceType>(_buf,buf,maxsend,elements);
+        _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
+        _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+        const int j = _list(_iswap,i);
+      if (PBC_FLAG == 0) {
+          _buf(i,0) = _x(j,0);
+          _buf(i,1) = _x(j,1);
+          _buf(i,2) = _x(j,2);
+          _buf(i,3) = _sp(j,0);
+          _buf(i,4) = _sp(j,1);
+          _buf(i,5) = _sp(j,2);
+          _buf(i,6) = _sp(j,3);
+      } else {
+        if (TRICLINIC == 0) {
+          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
+          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
+          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
+          _buf(i,3) = _sp(j,0);
+          _buf(i,4) = _sp(j,1);
+          _buf(i,5) = _sp(j,2);
+          _buf(i,6) = _sp(j,3);
+        } else {
+          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
+          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
+          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
+          _buf(i,3) = _sp(j,0);
+          _buf(i,4) = _sp(j,1);
+          _buf(i,5) = _sp(j,2);
+          _buf(i,6) = _sp(j,3);
+        }
+      }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG>
+struct AtomVecSpinKokkos_PackBorder {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_xfloat_2d _buf;
+  const typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+  const typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  const typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
+  const typename ArrayTypes<DeviceType>::t_int_1d _type;
+  const typename ArrayTypes<DeviceType>::t_int_1d _mask;
+  const typename ArrayTypes<DeviceType>::t_sp_array_randomread _sp;
+  X_FLOAT _dx,_dy,_dz;
+
+  AtomVecSpinKokkos_PackBorder(
+      const typename ArrayTypes<DeviceType>::t_xfloat_2d &buf,
+      const typename ArrayTypes<DeviceType>::t_int_2d_const &list,
+      const int & iswap,
+      const typename ArrayTypes<DeviceType>::t_x_array &x,
+      const typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
+      const typename ArrayTypes<DeviceType>::t_int_1d &type,
+      const typename ArrayTypes<DeviceType>::t_int_1d &mask,
+      const typename ArrayTypes<DeviceType>::t_sp_array &sp,
+      const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
+  _buf(buf),_list(list),_iswap(iswap),
+    _x(x),_sp(sp),_tag(tag),_type(type),_mask(mask),
+    _dx(dx),_dy(dy),_dz(dz) {}
+  
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+      const int j = _list(_iswap,i);
+      if (PBC_FLAG == 0) {
+          _buf(i,0) = _x(j,0);
+          _buf(i,1) = _x(j,1);
+          _buf(i,2) = _x(j,2);
+          _buf(i,3) = d_ubuf(_tag(j)).d;
+          _buf(i,4) = d_ubuf(_type(j)).d;
+          _buf(i,5) = d_ubuf(_mask(j)).d;
+          _buf(i,6) = _sp(j,0);
+          _buf(i,7) = _sp(j,1);
+          _buf(i,8) = _sp(j,2);
+          _buf(i,9) = _sp(j,3);
+      } else {
+          _buf(i,0) = _x(j,0) + _dx;
+          _buf(i,1) = _x(j,1) + _dy;
+          _buf(i,2) = _x(j,2) + _dz;
+          _buf(i,3) = d_ubuf(_tag(j)).d;
+          _buf(i,4) = d_ubuf(_type(j)).d;
+          _buf(i,5) = d_ubuf(_mask(j)).d;
+          _buf(i,6) = _sp(j,0);
+          _buf(i,7) = _sp(j,1);
+          _buf(i,8) = _sp(j,2);
+          _buf(i,9) = _sp(j,3);
+      }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSpinKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap,
+                               int pbc_flag, int *pbc, ExecutionSpace space)
+{
+  X_FLOAT dx,dy,dz;
+
+  if (pbc_flag != 0) {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    if(space==Host) {
+      AtomVecSpinKokkos_PackBorder<LMPHostType,1> f(
+        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
+        iswap,h_x,h_tag,h_type,h_mask,h_sp,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+    } else {
+      AtomVecSpinKokkos_PackBorder<LMPDeviceType,1> f(
+        buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
+        iswap,d_x,d_tag,d_type,d_mask,d_sp,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+    }
+
+  } else {
+    dx = dy = dz = 0;
+    if(space==Host) {
+      AtomVecSpinKokkos_PackBorder<LMPHostType,0> f(
+        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
+        iswap,h_x,h_tag,h_type,h_mask,h_sp,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+    } else {
+      AtomVecSpinKokkos_PackBorder<LMPDeviceType,0> f(
+        buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
+        iswap,d_x,d_tag,d_type,d_mask,d_sp,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+    }
+  }
+  return n*size_border;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSpinKokkos::pack_border(int n, int *list, double *buf,
+                               int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+      buf[m++] = ubuf(h_tag(j)).d;
+      buf[m++] = ubuf(h_type(j)).d;
+      buf[m++] = ubuf(h_mask(j)).d;
+      buf[m++] = h_sp(j,0);
+      buf[m++] = h_sp(j,1);
+      buf[m++] = h_sp(j,2);
+      buf[m++] = h_sp(j,3);
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0) + dx;
+      buf[m++] = h_x(j,1) + dy;
+      buf[m++] = h_x(j,2) + dz;
+      buf[m++] = ubuf(h_tag(j)).d;
+      buf[m++] = ubuf(h_type(j)).d;
+      buf[m++] = ubuf(h_mask(j)).d;
+      buf[m++] = h_sp(j,0);
+      buf[m++] = h_sp(j,1);
+      buf[m++] = h_sp(j,2);
+      buf[m++] = h_sp(j,3);
+    }
+  }
+  
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]);
+
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSpinKokkos::pack_border_vel(int n, int *list, double *buf,
+                                   int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz,dvx,dvy,dvz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+      buf[m++] = ubuf(h_tag(j)).d;
+      buf[m++] = ubuf(h_type(j)).d;
+      buf[m++] = ubuf(h_mask(j)).d;
+      buf[m++] = h_sp(j,0);
+      buf[m++] = h_sp(j,1);
+      buf[m++] = h_sp(j,2);
+      buf[m++] = h_sp(j,3);
+      buf[m++] = h_v(j,0);
+      buf[m++] = h_v(j,1);
+      buf[m++] = h_v(j,2);
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    if (!deform_vremap) {
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        buf[m++] = ubuf(h_tag(j)).d;
+        buf[m++] = ubuf(h_type(j)).d;
+        buf[m++] = ubuf(h_mask(j)).d;
+        buf[m++] = h_sp(j,0);
+        buf[m++] = h_sp(j,1);
+        buf[m++] = h_sp(j,2);
+        buf[m++] = h_sp(j,3);
+        buf[m++] = h_v(j,0);
+        buf[m++] = h_v(j,1);
+        buf[m++] = h_v(j,2);
+      }
+    } else {
+      dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
+      dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
+      dvz = pbc[2]*h_rate[2];
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        buf[m++] = ubuf(h_tag(j)).d;
+        buf[m++] = ubuf(h_type(j)).d;
+        buf[m++] = ubuf(h_mask(j)).d;
+        buf[m++] = h_sp(j,0);
+        buf[m++] = h_sp(j,1);
+        buf[m++] = h_sp(j,2);
+        buf[m++] = h_sp(j,3);
+        if (mask[i] & deform_groupbit) {
+          buf[m++] = h_v(j,0) + dvx;
+          buf[m++] = h_v(j,1) + dvy;
+          buf[m++] = h_v(j,2) + dvz;
+        } else {
+          buf[m++] = h_v(j,0);
+          buf[m++] = h_v(j,1);
+          buf[m++] = h_v(j,2);
+        }
+      }
+    }
+  }
+  
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]);
+
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSpinKokkos::pack_border_hybrid(int n, int *list, double *buf)
+{
+  int i,j,m;
+
+  m = 0;
+  for (i = 0; i < n; i++) {
+    j = list[i];
+    buf[m++] = h_sp(j,0);
+    buf[m++] = h_sp(j,1);
+    buf[m++] = h_sp(j,2);
+    buf[m++] = h_sp(j,3);
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecSpinKokkos_UnpackBorder {
+  typedef DeviceType device_type;
+
+  const typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
+  typename ArrayTypes<DeviceType>::t_x_array _x;
+  typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
+  typename ArrayTypes<DeviceType>::t_int_1d _type;
+  typename ArrayTypes<DeviceType>::t_int_1d _mask;
+  typename ArrayTypes<DeviceType>::t_sp_array _sp;
+  int _first;
+
+
+  AtomVecSpinKokkos_UnpackBorder(
+      const typename ArrayTypes<DeviceType>::t_xfloat_2d_const &buf,
+      typename ArrayTypes<DeviceType>::t_x_array &x,
+      typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
+      typename ArrayTypes<DeviceType>::t_int_1d &type,
+      typename ArrayTypes<DeviceType>::t_int_1d &mask,
+      typename ArrayTypes<DeviceType>::t_sp_array &sp,
+      const int& first):
+    _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_sp(sp),_first(first){
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+      _x(i+_first,0) = _buf(i,0);
+      _x(i+_first,1) = _buf(i,1);
+      _x(i+_first,2) = _buf(i,2);
+      _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i;
+      _type(i+_first) = (int) d_ubuf(_buf(i,4)).i;
+      _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i;
+      _sp(i+_first) = _buf(i,6);
+      _sp(i+_first) = _buf(i,7);
+      _sp(i+_first) = _buf(i,8);
+      _sp(i+_first) = _buf(i,9);
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecSpinKokkos::unpack_border_kokkos(const int &n, const int &first,
+                     const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) {
+  if (first+n >= nmax) {
+    grow(first+n+100);
+  }
+  if(space==Host) {
+    struct AtomVecSpinKokkos_UnpackBorder<LMPHostType>
+      f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_sp,first);
+    Kokkos::parallel_for(n,f);
+  } else {
+    struct AtomVecSpinKokkos_UnpackBorder<LMPDeviceType>
+      f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_sp,first);
+    Kokkos::parallel_for(n,f);
+  }
+  atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|SP_MASK);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecSpinKokkos::unpack_border(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+
+  for (i = first; i < last; i++) {
+    if (i == nmax) {
+      grow(0);
+    }
+    atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|SP_MASK);
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+    h_tag(i) =  (tagint)  ubuf(buf[m++]).i;
+    h_type(i) = (int) ubuf(buf[m++]).i;
+    h_mask(i) = (int) ubuf(buf[m++]).i;
+    h_sp(i,0) = buf[m++];
+    h_sp(i,1) = buf[m++];
+    h_sp(i,2) = buf[m++];
+    h_sp(i,3) = buf[m++];
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->
+        unpack_border(n,first,&buf[m]);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecSpinKokkos::unpack_border_vel(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    if (i == nmax) grow(0);
+    atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|SP_MASK);
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+    h_tag(i) =  (tagint)  ubuf(buf[m++]).i;
+    h_type(i) = (int) ubuf(buf[m++]).i;
+    h_mask(i) = (int) ubuf(buf[m++]).i;
+    h_sp(i,0) = buf[m++];
+    h_sp(i,1) = buf[m++];
+    h_sp(i,2) = buf[m++];
+    h_sp(i,3) = buf[m++];
+    h_v(i,0) = buf[m++];
+    h_v(i,1) = buf[m++];
+    h_v(i,2) = buf[m++];
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->
+        unpack_border(n,first,&buf[m]);
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSpinKokkos::unpack_border_hybrid(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++)
+    h_sp(i,0) = buf[m++];
+    h_sp(i,1) = buf[m++];
+    h_sp(i,2) = buf[m++];
+    h_sp(i,3) = buf[m++];
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecSpinKokkos_PackExchangeFunctor {
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typename AT::t_x_array_randomread _x;
+  typename AT::t_v_array_randomread _v;
+  typename AT::t_tagint_1d_randomread _tag;
+  typename AT::t_int_1d_randomread _type;
+  typename AT::t_int_1d_randomread _mask;
+  typename AT::t_imageint_1d_randomread _image;
+  typename AT::t_sp_array_randomread _sp;
+  typename AT::t_x_array _xw;
+  typename AT::t_v_array _vw;
+  typename AT::t_tagint_1d _tagw;
+  typename AT::t_int_1d _typew;
+  typename AT::t_int_1d _maskw;
+  typename AT::t_imageint_1d _imagew;
+  typename AT::t_sp_array _spw;
+
+  typename AT::t_xfloat_2d_um _buf;
+  typename AT::t_int_1d_const _sendlist;
+  typename AT::t_int_1d_const _copylist;
+  int _nlocal,_dim;
+  X_FLOAT _lo,_hi;
+
+  AtomVecSpinKokkos_PackExchangeFunctor(
+      const AtomKokkos* atom,
+      const typename AT::tdual_xfloat_2d buf,
+      typename AT::tdual_int_1d sendlist,
+      typename AT::tdual_int_1d copylist,int nlocal, int dim,
+                X_FLOAT lo, X_FLOAT hi):
+    _x(atom->k_x.view<DeviceType>()),
+    _v(atom->k_v.view<DeviceType>()),
+    _tag(atom->k_tag.view<DeviceType>()),
+    _type(atom->k_type.view<DeviceType>()),
+    _mask(atom->k_mask.view<DeviceType>()),
+    _image(atom->k_image.view<DeviceType>()),
+    _sp(atom->k_sp.view<DeviceType>()),
+    _xw(atom->k_x.view<DeviceType>()),
+    _vw(atom->k_v.view<DeviceType>()),
+    _tagw(atom->k_tag.view<DeviceType>()),
+    _typew(atom->k_type.view<DeviceType>()),
+    _maskw(atom->k_mask.view<DeviceType>()),
+    _imagew(atom->k_image.view<DeviceType>()),
+    _spw(atom->k_sp.view<DeviceType>()),
+    _sendlist(sendlist.template view<DeviceType>()),
+    _copylist(copylist.template view<DeviceType>()),
+    _nlocal(nlocal),_dim(dim),
+    _lo(lo),_hi(hi){
+    const size_t elements = 15;
+    const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
+                             buf.template view<DeviceType>().extent(1))/elements;
+
+    buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int &mysend) const {
+    const int i = _sendlist(mysend);
+    _buf(mysend,0) = 15;
+    _buf(mysend,1) = _x(i,0);
+    _buf(mysend,2) = _x(i,1);
+    _buf(mysend,3) = _x(i,2);
+    _buf(mysend,4) = _v(i,0);
+    _buf(mysend,5) = _v(i,1);
+    _buf(mysend,6) = _v(i,2);
+    _buf(mysend,7) = d_ubuf(_tag[i]).d;
+    _buf(mysend,8) = d_ubuf(_type[i]).d;
+    _buf(mysend,9) = d_ubuf(_mask[i]).d;
+    _buf(mysend,10) = d_ubuf(_image[i]).d;
+    _buf(mysend,11) = _sp(i,0);
+    _buf(mysend,12) = _sp(i,1);
+    _buf(mysend,13) = _sp(i,2);
+    _buf(mysend,14) = _sp(i,3);
+    const int j = _copylist(mysend);
+
+    if(j>-1) {
+    _xw(i,0) = _x(j,0);
+    _xw(i,1) = _x(j,1);
+    _xw(i,2) = _x(j,2);
+    _vw(i,0) = _v(j,0);
+    _vw(i,1) = _v(j,1);
+    _vw(i,2) = _v(j,2);
+    _tagw(i) = _tag(j);
+    _typew(i) = _type(j);
+    _maskw(i) = _mask(j);
+    _imagew(i) = _image(j);
+    _spw(i,0) = _sp(j,0);
+    _spw(i,1) = _sp(j,1);
+    _spw(i,2) = _sp(j,2);
+    _spw(i,3) = _sp(j,3);
+    }
+  }
+};
+  
+/* ---------------------------------------------------------------------- */
+  
+int AtomVecSpinKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf,
+                                              DAT::tdual_int_1d k_sendlist,
+                                              DAT::tdual_int_1d k_copylist,
+                                              ExecutionSpace space,int dim,
+                                              X_FLOAT lo,X_FLOAT hi )
+{
+  if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/15) {
+    int newsize = nsend*15/k_buf.view<LMPHostType>().extent(1)+1;
+    k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
+  }
+  if(space == Host) {
+    AtomVecSpinKokkos_PackExchangeFunctor<LMPHostType>
+      f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
+    Kokkos::parallel_for(nsend,f);
+    return nsend*15;
+  } else {
+    AtomVecSpinKokkos_PackExchangeFunctor<LMPDeviceType>
+      f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
+    Kokkos::parallel_for(nsend,f);
+    return nsend*15;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+  
+int AtomVecSpinKokkos::pack_exchange(int i, double *buf)
+{
+  int m = 1;
+  buf[m++] = h_x(i,0);
+  buf[m++] = h_x(i,1);
+  buf[m++] = h_x(i,2);
+  buf[m++] = h_v(i,0);
+  buf[m++] = h_v(i,1);
+  buf[m++] = h_v(i,2);
+  buf[m++] = ubuf(h_tag(i)).d;
+  buf[m++] = ubuf(h_type(i)).d;
+  buf[m++] = ubuf(h_mask(i)).d;
+  buf[m++] = ubuf(h_image(i)).d;
+  buf[m++] = h_sp(i,0);
+  buf[m++] = h_sp(i,1);
+  buf[m++] = h_sp(i,2);
+  buf[m++] = h_sp(i,3);
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      m += modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf[m]);
+
+  buf[0] = m;
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecSpinKokkos_UnpackExchangeFunctor {
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typename AT::t_x_array _x;
+  typename AT::t_v_array _v;
+  typename AT::t_tagint_1d _tag;
+  typename AT::t_int_1d _type;
+  typename AT::t_int_1d _mask;
+  typename AT::t_imageint_1d _image;
+  typename AT::t_sp_array _sp;
+  typename AT::t_xfloat_2d_um _buf;
+  typename AT::t_int_1d _nlocal;
+  int _dim;
+  X_FLOAT _lo,_hi;
+
+  AtomVecSpinKokkos_UnpackExchangeFunctor(
+      const AtomKokkos* atom,
+      const typename AT::tdual_xfloat_2d buf,
+      typename AT::tdual_int_1d nlocal,
+      int dim, X_FLOAT lo, X_FLOAT hi):
+    _x(atom->k_x.view<DeviceType>()),
+    _v(atom->k_v.view<DeviceType>()),
+    _tag(atom->k_tag.view<DeviceType>()),
+    _type(atom->k_type.view<DeviceType>()),
+    _mask(atom->k_mask.view<DeviceType>()),
+    _image(atom->k_image.view<DeviceType>()),
+    _sp(atom->k_sp.view<DeviceType>()),
+    _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
+    _lo(lo),_hi(hi){
+    const size_t elements = 15;
+    const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
+
+    buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int &myrecv) const {
+    X_FLOAT x = _buf(myrecv,_dim+1);
+    if (x >= _lo && x < _hi) {
+      int i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
+      _x(i,0) = _buf(myrecv,1);
+      _x(i,1) = _buf(myrecv,2);
+      _x(i,2) = _buf(myrecv,3);
+      _v(i,0) = _buf(myrecv,4);
+      _v(i,1) = _buf(myrecv,5);
+      _v(i,2) = _buf(myrecv,6);
+      _tag[i] = (tagint) d_ubuf(_buf(myrecv,7)).i;
+      _type[i] = (int) d_ubuf(_buf(myrecv,8)).i;
+      _mask[i] = (int) d_ubuf(_buf(myrecv,9)).i;
+      _image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i;
+      _sp(i,0) = _buf(myrecv,11);
+      _sp(i,1) = _buf(myrecv,12);
+      _sp(i,2) = _buf(myrecv,13);
+      _sp(i,3) = _buf(myrecv,14);
+    }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
+                                                int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
+                                                ExecutionSpace space) {
+  if(space == Host) {
+    k_count.h_view(0) = nlocal;
+    AtomVecSpinKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
+    Kokkos::parallel_for(nrecv/15,f);
+    return k_count.h_view(0);
+  } else {
+    k_count.h_view(0) = nlocal;
+    k_count.modify<LMPHostType>();
+    k_count.sync<LMPDeviceType>();
+    AtomVecSpinKokkos_UnpackExchangeFunctor<LMPDeviceType>
+      f(atomKK,k_buf,k_count,dim,lo,hi);
+    Kokkos::parallel_for(nrecv/15,f);
+    k_count.modify<LMPDeviceType>();
+    k_count.sync<LMPHostType>();
+
+    return k_count.h_view(0);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSpinKokkos::unpack_exchange(double *buf)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) grow(0);
+  atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
+           MASK_MASK | IMAGE_MASK | SP_MASK);
+
+  int m = 1;
+  h_x(nlocal,0) = buf[m++];
+  h_x(nlocal,1) = buf[m++];
+  h_x(nlocal,2) = buf[m++];
+  h_v(nlocal,0) = buf[m++];
+  h_v(nlocal,1) = buf[m++];
+  h_v(nlocal,2) = buf[m++];
+  h_tag(nlocal) = (tagint) ubuf(buf[m++]).i;
+  h_type(nlocal) = (int) ubuf(buf[m++]).i;
+  h_mask(nlocal) = (int) ubuf(buf[m++]).i;
+  h_image(nlocal) = (imageint) ubuf(buf[m++]).i;
+  h_sp(nlocal,0) = buf[m++];
+  h_sp(nlocal,1) = buf[m++];
+  h_sp(nlocal,2) = buf[m++];
+  h_sp(nlocal,3) = buf[m++];
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      m += modify->fix[atom->extra_grow[iextra]]->
+        unpack_exchange(nlocal,&buf[m]);
+
+  atom->nlocal++;
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   size of restart data for all atoms owned by this proc
+   include extra data stored by fixes
+------------------------------------------------------------------------- */
+
+int AtomVecSpinKokkos::size_restart()
+{
+  int i;
+
+  int nlocal = atom->nlocal;
+  int n = 15 * nlocal;
+
+  if (atom->nextra_restart)
+    for (int iextra = 0; iextra < atom->nextra_restart; iextra++)
+      for (i = 0; i < nlocal; i++)
+        n += modify->fix[atom->extra_restart[iextra]]->size_restart(i);
+
+  return n;
+}
+
+/* ----------------------------------------------------------------------
+   pack atom I's data for restart file including extra quantities
+   xyz must be 1st 3 values, so that read_restart can test on them
+   molecular types may be negative, but write as positive
+------------------------------------------------------------------------- */
+
+int AtomVecSpinKokkos::pack_restart(int i, double *buf)
+{
+  atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
+            MASK_MASK | IMAGE_MASK | SP_MASK);
+
+  int m = 1;
+  buf[m++] = h_x(i,0);
+  buf[m++] = h_x(i,1);
+  buf[m++] = h_x(i,2);
+  buf[m++] = ubuf(h_tag(i)).d;
+  buf[m++] = ubuf(h_type(i)).d;
+  buf[m++] = ubuf(h_mask(i)).d;
+  buf[m++] = ubuf(h_image(i)).d;
+  buf[m++] = h_v(i,0);
+  buf[m++] = h_v(i,1);
+  buf[m++] = h_v(i,2);
+
+  buf[m++] = h_sp(i,0);
+  buf[m++] = h_sp(i,1);
+  buf[m++] = h_sp(i,2);
+  buf[m++] = h_sp(i,3);
+
+  if (atom->nextra_restart)
+    for (int iextra = 0; iextra < atom->nextra_restart; iextra++)
+      m += modify->fix[atom->extra_restart[iextra]]->pack_restart(i,&buf[m]);
+
+  buf[0] = m;
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   unpack data for one atom from restart file including extra quantities
+------------------------------------------------------------------------- */
+
+int AtomVecSpinKokkos::unpack_restart(double *buf)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) {
+    grow(0);
+    if (atom->nextra_store)
+      memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
+  }
+
+  atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
+           MASK_MASK | IMAGE_MASK | SP_MASK);
+
+  int m = 1;
+  h_x(nlocal,0) = buf[m++];
+  h_x(nlocal,1) = buf[m++];
+  h_x(nlocal,2) = buf[m++];
+  h_tag(nlocal) = (tagint) ubuf(buf[m++]).i;
+  h_type(nlocal) = (int) ubuf(buf[m++]).i;
+  h_mask(nlocal) = (int) ubuf(buf[m++]).i;
+  h_image(nlocal) = (imageint) ubuf(buf[m++]).i;
+  h_v(nlocal,0) = buf[m++];
+  h_v(nlocal,1) = buf[m++];
+  h_v(nlocal,2) = buf[m++];
+
+  h_sp(nlocal,0) = buf[m++];
+  h_sp(nlocal,1) = buf[m++];
+  h_sp(nlocal,2) = buf[m++];
+  h_sp(nlocal,3) = buf[m++];
+
+  double **extra = atom->extra;
+  if (atom->nextra_store) {
+    int size = static_cast<int> (buf[0]) - m;
+    for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++];
+  }
+
+  atom->nlocal++;
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   create one atom of itype at coord
+   set other values to defaults
+------------------------------------------------------------------------- */
+
+void AtomVecSpinKokkos::create_atom(int itype, double *coord)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) {
+    atomKK->modified(Host,ALL_MASK);
+    grow(0);
+  }
+  atomKK->sync(Host,ALL_MASK);
+  atomKK->modified(Host,ALL_MASK);
+
+  tag[nlocal] = 0;
+  type[nlocal] = itype;
+  h_x(nlocal,0) = coord[0];
+  h_x(nlocal,1) = coord[1];
+  h_x(nlocal,2) = coord[2];
+  h_mask[nlocal] = 1;
+  h_image[nlocal] = ((imageint) IMGMAX << IMG2BITS) |
+    ((imageint) IMGMAX << IMGBITS) | IMGMAX;
+  h_v(nlocal,0) = 0.0;
+  h_v(nlocal,1) = 0.0;
+  h_v(nlocal,2) = 0.0;
+
+  h_sp(nlocal,0) = 0.0;
+  h_sp(nlocal,1) = 0.0;
+  h_sp(nlocal,2) = 0.0;
+  h_sp(nlocal,3) = 0.0;
+
+  atom->nlocal++;
+}
+
+/* ----------------------------------------------------------------------
+   unpack one line from Atoms section of data file
+   initialize other atom quantities
+------------------------------------------------------------------------- */
+
+void AtomVecSpinKokkos::data_atom(double *coord, imageint imagetmp,
+                                    char **values)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) grow(0);
+
+  h_tag[nlocal] = utils::inumeric(FLERR,values[0],true,lmp);
+  h_type[nlocal] = utils::inumeric(FLERR,values[1],true,lmp);
+  if (type[nlocal] <= 0 || type[nlocal] > atom->ntypes)
+    error->one(FLERR,"Invalid atom type in Atoms section of data file");
+
+  h_sp(nlocal,3) = utils::numeric(FLERR,values[2],true,lmp);
+  h_sp(nlocal,0) = utils::numeric(FLERR,values[6],true,lmp);
+  h_sp(nlocal,1) = utils::numeric(FLERR,values[7],true,lmp);
+  h_sp(nlocal,2) = utils::numeric(FLERR,values[8],true,lmp);
+  double inorm = 1.0/sqrt(sp[nlocal][0]*sp[nlocal][0] +
+                          sp[nlocal][1]*sp[nlocal][1] +
+                          sp[nlocal][2]*sp[nlocal][2]);
+  h_sp(nlocal,0) *= inorm;
+  h_sp(nlocal,1) *= inorm;
+  h_sp(nlocal,2) *= inorm;
+
+  h_x(nlocal,0) = coord[0];
+  h_x(nlocal,1) = coord[1];
+  h_x(nlocal,2) = coord[2];
+
+  h_image[nlocal] = imagetmp;
+
+  h_mask[nlocal] = 1;
+  h_v(nlocal,0) = 0.0;
+  h_v(nlocal,1) = 0.0;
+  h_v(nlocal,2) = 0.0;
+
+  atomKK->modified(Host,ALL_MASK);
+
+  atom->nlocal++;
+}
+
+/* ----------------------------------------------------------------------
+   unpack hybrid quantities from one line in Atoms section of data file
+   initialize other atom quantities for this sub-style
+------------------------------------------------------------------------- */
+
+int AtomVecSpinKokkos::data_atom_hybrid(int nlocal, char **values)
+{
+  h_sp(nlocal,3) = utils::numeric(FLERR,values[0],true,lmp);
+  h_sp(nlocal,0) = utils::numeric(FLERR,values[1],true,lmp);
+  h_sp(nlocal,1) = utils::numeric(FLERR,values[2],true,lmp);
+  h_sp(nlocal,2) = utils::numeric(FLERR,values[3],true,lmp);
+  double inorm = 1.0/sqrt(sp[nlocal][0]*sp[nlocal][0] +
+                          sp[nlocal][1]*sp[nlocal][1] +
+                          sp[nlocal][2]*sp[nlocal][2]);
+  sp[nlocal][0] *= inorm;
+  sp[nlocal][1] *= inorm;
+  sp[nlocal][2] *= inorm;
+
+  return 4;
+}
+
+/* ----------------------------------------------------------------------
+   pack atom info for data file including 3 image flags
+------------------------------------------------------------------------- */
+
+void AtomVecSpinKokkos::pack_data(double **buf)
+{
+  int nlocal = atom->nlocal;
+  for (int i = 0; i < nlocal; i++) {
+    buf[i][0] = h_tag[i];
+    buf[i][1] = h_type[i];
+    buf[i][2] = h_sp(i,0);
+    buf[i][3] = h_x(i,0);
+    buf[i][4] = h_x(i,1);
+    buf[i][5] = h_x(i,2);
+    buf[i][2] = h_sp(i,1);
+    buf[i][2] = h_sp(i,2);
+    buf[i][2] = h_sp(i,3);
+    buf[i][6] = (h_image[i] & IMGMASK) - IMGMAX;
+    buf[i][7] = (h_image[i] >> IMGBITS & IMGMASK) - IMGMAX;
+    buf[i][8] = (h_image[i] >> IMG2BITS) - IMGMAX;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   pack hybrid atom info for data file
+------------------------------------------------------------------------- */
+
+int AtomVecSpinKokkos::pack_data_hybrid(int i, double *buf)
+{
+  buf[0] = h_sp(i,3);
+  buf[1] = h_sp(i,0);
+  buf[2] = h_sp(i,1);
+  buf[3] = h_sp(i,2);
+  return 4;
+}
+
+/* ----------------------------------------------------------------------
+   write atom info to data file including 3 image flags
+------------------------------------------------------------------------- */
+
+void AtomVecSpinKokkos::write_data(FILE *fp, int n, double **buf)
+{
+  for (int i = 0; i < n; i++)
+    fprintf(fp,"%d %d %-1.16e %-1.16e %-1.16e %-1.16e %d %d %d\n",
+            (int) buf[i][0],(int) buf[i][1],buf[i][2],buf[i][3],buf[i][4],
+            buf[i][5],(int) buf[i][6],(int) buf[i][7],(int) buf[i][8]);
+}
+
+/* ----------------------------------------------------------------------
+   write hybrid atom info to data file
+------------------------------------------------------------------------- */
+
+int AtomVecSpinKokkos::write_data_hybrid(FILE *fp, double *buf)
+{
+  fprintf(fp," %-1.16e %-1.16e %-1.16e %-1.16e",buf[0],buf[1],buf[2],buf[3]);
+  return 4;
+}
+
+/* ----------------------------------------------------------------------
+   return # of bytes of allocated memory
+------------------------------------------------------------------------- */
+
+bigint AtomVecSpinKokkos::memory_usage()
+{
+  bigint bytes = 0;
+
+  if (atom->memcheck("tag")) bytes += memory->usage(tag,nmax);
+  if (atom->memcheck("type")) bytes += memory->usage(type,nmax);
+  if (atom->memcheck("mask")) bytes += memory->usage(mask,nmax);
+  if (atom->memcheck("image")) bytes += memory->usage(image,nmax);
+  if (atom->memcheck("x")) bytes += memory->usage(x,nmax,3);
+  if (atom->memcheck("v")) bytes += memory->usage(v,nmax,3);
+  if (atom->memcheck("f")) bytes += memory->usage(f,nmax*commKK->nthreads,3);
+
+  if (atom->memcheck("sp")) bytes += memory->usage(sp,nmax,4);
+  if (atom->memcheck("fm")) bytes += memory->usage(fm,nmax*comm->nthreads,3);
+  if (atom->memcheck("fm_long")) bytes += memory->usage(fm_long,nmax*comm->nthreads,3);
+
+  return bytes;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecSpinKokkos::sync(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if (mask & X_MASK) atomKK->k_x.sync<LMPDeviceType>();
+    if (mask & V_MASK) atomKK->k_v.sync<LMPDeviceType>();
+    if (mask & F_MASK) atomKK->k_f.sync<LMPDeviceType>();
+    if (mask & TAG_MASK) atomKK->k_tag.sync<LMPDeviceType>();
+    if (mask & TYPE_MASK) atomKK->k_type.sync<LMPDeviceType>();
+    if (mask & MASK_MASK) atomKK->k_mask.sync<LMPDeviceType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPDeviceType>();
+    if (mask & SP_MASK) atomKK->k_sp.sync<LMPDeviceType>();
+    if (mask & FM_MASK) atomKK->k_fm.sync<LMPDeviceType>();
+    if (mask & FML_MASK) atomKK->k_fm_long.sync<LMPDeviceType>();
+  } else {
+    if (mask & X_MASK) atomKK->k_x.sync<LMPHostType>();
+    if (mask & V_MASK) atomKK->k_v.sync<LMPHostType>();
+    if (mask & F_MASK) atomKK->k_f.sync<LMPHostType>();
+    if (mask & TAG_MASK) atomKK->k_tag.sync<LMPHostType>();
+    if (mask & TYPE_MASK) atomKK->k_type.sync<LMPHostType>();
+    if (mask & MASK_MASK) atomKK->k_mask.sync<LMPHostType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPHostType>();
+    if (mask & SP_MASK) atomKK->k_sp.sync<LMPHostType>();
+    if (mask & FM_MASK) atomKK->k_fm.sync<LMPHostType>();
+    if (mask & FML_MASK) atomKK->k_fm_long.sync<LMPHostType>();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecSpinKokkos::modified(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if (mask & X_MASK) atomKK->k_x.modify<LMPDeviceType>();
+    if (mask & V_MASK) atomKK->k_v.modify<LMPDeviceType>();
+    if (mask & F_MASK) atomKK->k_f.modify<LMPDeviceType>();
+    if (mask & TAG_MASK) atomKK->k_tag.modify<LMPDeviceType>();
+    if (mask & TYPE_MASK) atomKK->k_type.modify<LMPDeviceType>();
+    if (mask & MASK_MASK) atomKK->k_mask.modify<LMPDeviceType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPDeviceType>();
+    if (mask & SP_MASK) atomKK->k_sp.modify<LMPDeviceType>();
+    if (mask & FM_MASK) atomKK->k_fm.modify<LMPDeviceType>();
+    if (mask & FML_MASK) atomKK->k_fm_long.modify<LMPDeviceType>();
+  } else {
+    if (mask & X_MASK) atomKK->k_x.modify<LMPHostType>();
+    if (mask & V_MASK) atomKK->k_v.modify<LMPHostType>();
+    if (mask & F_MASK) atomKK->k_f.modify<LMPHostType>();
+    if (mask & TAG_MASK) atomKK->k_tag.modify<LMPHostType>();
+    if (mask & TYPE_MASK) atomKK->k_type.modify<LMPHostType>();
+    if (mask & MASK_MASK) atomKK->k_mask.modify<LMPHostType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPHostType>();
+    if (mask & SP_MASK) atomKK->k_sp.modify<LMPHostType>();
+    if (mask & FM_MASK) atomKK->k_fm.modify<LMPHostType>();
+    if (mask & FML_MASK) atomKK->k_fm_long.modify<LMPHostType>();
+  }
+}
+
+void AtomVecSpinKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space);
+    if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space);
+    if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space);
+    if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space);
+    if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space);
+    if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
+    if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
+    if ((mask & SP_MASK) && atomKK->k_sp.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_sp_array>(atomKK->k_sp,space);
+    if ((mask & FM_MASK) && atomKK->k_sp.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_fm_array>(atomKK->k_fm,space);
+    if ((mask & FML_MASK) && atomKK->k_fm_long.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_fm_long_array>(atomKK->k_fm_long,space);
+  } else {
+    if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space);
+    if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space);
+    if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space);
+    if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space);
+    if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space);
+    if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
+    if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
+    if ((mask & SP_MASK) && atomKK->k_sp.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_sp_array>(atomKK->k_sp,space);
+    if ((mask & FM_MASK) && atomKK->k_fm.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_fm_array>(atomKK->k_fm,space);
+    if ((mask & FML_MASK) && atomKK->k_fm_long.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_fm_long_array>(atomKK->k_fm_long,space);
+  }
+}
+
diff --git a/src/KOKKOS/atom_vec_spin_kokkos.h b/src/KOKKOS/atom_vec_spin_kokkos.h
new file mode 100644
index 0000000000..5b57cfd8e6
--- /dev/null
+++ b/src/KOKKOS/atom_vec_spin_kokkos.h
@@ -0,0 +1,132 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef ATOM_CLASS
+
+AtomStyle(spin/kk,AtomVecSpinKokkos)
+AtomStyle(spin/kk/device,AtomVecSpinKokkos)
+AtomStyle(spin/kk/host,AtomVecSpinKokkos)
+
+#else
+
+#ifndef LMP_ATOM_VEC_SPIN_KOKKOS_H
+#define LMP_ATOM_VEC_SPIN_KOKKOS_H
+
+#include "atom_vec_kokkos.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+
+class AtomVecSpinKokkos : public AtomVecKokkos {
+ public:
+  AtomVecSpinKokkos(class LAMMPS *);
+  void grow(int);
+  void copy(int, int, int);
+  int pack_border(int, int *, double *, int, int *);
+  int pack_border_vel(int, int *, double *, int, int *);
+  int pack_border_hybrid(int, int *, double *);
+  void unpack_border(int, int, double *);
+  void unpack_border_vel(int, int, double *);
+  int unpack_border_hybrid(int, int, double *);
+  int pack_exchange(int, double *);
+  int unpack_exchange(double *);
+  int size_restart();
+  int pack_restart(int, double *);
+  int unpack_restart(double *);
+  void create_atom(int, double *);
+  void data_atom(double *, imageint, char **);
+  int data_atom_hybrid(int, char **);
+  void pack_data(double **);
+  int pack_data_hybrid(int, double *);
+  void write_data(FILE *, int, double **);
+  int write_data_hybrid(FILE *, double *);
+  bigint memory_usage();
+  
+  // clear magnetic and mechanic forces
+
+  void force_clear(int, size_t);
+
+  void grow_reset();
+  // input lists to be checked
+  int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
+                         DAT::tdual_xfloat_2d buf,int iswap,
+                         int pbc_flag, int *pbc, ExecutionSpace space);
+  void unpack_border_kokkos(const int &n, const int &nfirst,
+                            const DAT::tdual_xfloat_2d &buf,
+                            ExecutionSpace space);
+  int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
+                           DAT::tdual_int_1d k_sendlist,
+                           DAT::tdual_int_1d k_copylist,
+                           ExecutionSpace space, int dim,
+                           X_FLOAT lo, X_FLOAT hi);
+  int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
+                             int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
+                             ExecutionSpace space);
+
+  void sync(ExecutionSpace space, unsigned int mask);
+  void modified(ExecutionSpace space, unsigned int mask);
+  void sync_overlapping_device(ExecutionSpace space, unsigned int mask);
+
+ protected:
+  tagint *tag;
+  int *type,*mask;
+  imageint *image;
+  double **x,**v,**f;           // lattice quantities
+
+                                // spin quantities
+  double **sp;                  // sp[i][0-2] direction of the spin i
+                                // sp[i][3] atomic magnetic moment of the spin i
+  double **fm;                  // fm[i][0-2] direction of magnetic precession
+  double **fm_long;             // storage of long-range spin prec. components
+
+  DAT::t_tagint_1d d_tag;
+  HAT::t_tagint_1d h_tag;
+
+  DAT::t_int_1d d_type, d_mask;
+  HAT::t_int_1d h_type, h_mask;
+
+  DAT::t_imageint_1d d_image;
+  HAT::t_imageint_1d h_image;
+
+  DAT::t_x_array d_x;
+  DAT::t_v_array d_v;
+  DAT::t_f_array d_f;
+
+  DAT::t_x_array d_sp;
+  DAT::t_x_array d_fm;
+  DAT::t_x_array d_fm_long;
+
+  HAT::t_x_array h_sp;
+  HAT::t_x_array h_fm;
+  HAT::t_x_array h_fm_long;
+
+  DAT::tdual_int_1d k_count;
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: Per-processor system is too big
+
+The number of owned atoms plus ghost atoms on a single
+processor must fit in 32-bit integer.
+
+E: Invalid atom type in Atoms section of data file
+
+Atom types must range from 1 to specified # of types.
+
+*/

From 735676241ff8b56bf952e67d2e9f410a674251b0 Mon Sep 17 00:00:00 2001
From: julient31 <julien.tranchida1@gmail.com>
Date: Tue, 29 Sep 2020 08:06:41 -0600
Subject: [PATCH 08/44] start correcting atom spin/kk

---
 src/KOKKOS/Install.sh               |  2 ++
 src/KOKKOS/atom_kokkos.cpp          |  6 ++++++
 src/KOKKOS/atom_kokkos.h            |  5 +++++
 src/KOKKOS/atom_vec_spin_kokkos.cpp | 21 +++++++++++----------
 src/atom_masks.h                    |  6 ++++++
 5 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh
index 540389f599..87cddbe1de 100755
--- a/src/KOKKOS/Install.sh
+++ b/src/KOKKOS/Install.sh
@@ -63,6 +63,8 @@ action atom_vec_bond_kokkos.cpp atom_vec_bond.cpp
 action atom_vec_bond_kokkos.h atom_vec_bond.h
 action atom_vec_charge_kokkos.cpp
 action atom_vec_charge_kokkos.h
+action atom_vec_spin_kokkos.cpp
+action atom_vec_spin_kokkos.h
 action atom_vec_dpd_kokkos.cpp atom_vec_dpd.cpp
 action atom_vec_dpd_kokkos.h atom_vec_dpd.h
 action atom_vec_full_kokkos.cpp atom_vec_full.cpp
diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp
index 4637a9a21c..2640c1611d 100644
--- a/src/KOKKOS/atom_kokkos.cpp
+++ b/src/KOKKOS/atom_kokkos.cpp
@@ -76,6 +76,12 @@ AtomKokkos::~AtomKokkos()
   memoryKK->destroy_kokkos(k_improper_atom3, improper_atom3);
   memoryKK->destroy_kokkos(k_improper_atom4, improper_atom4);
 
+  // SPIN package
+
+  memoryKK->destroy_kokkos(k_sp, sp);
+  memoryKK->destroy_kokkos(k_fm, fm);
+  memoryKK->destroy_kokkos(k_fm_long, fm_long);
+
   // USER-DPD package
   memoryKK->destroy_kokkos(k_uCond,uCond);
   memoryKK->destroy_kokkos(k_uMech,uMech);
diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h
index 0ae032032a..3ed703c66a 100644
--- a/src/KOKKOS/atom_kokkos.h
+++ b/src/KOKKOS/atom_kokkos.h
@@ -54,6 +54,11 @@ class AtomKokkos : public Atom {
 
   DAT::tdual_float_2d k_dvector;
 
+  // SPIN package
+
+  DAT::tdual_x_array k_sp;
+  DAT::tdual_x_array k_fm;
+  DAT::tdual_x_array k_fm_long;
 
 // USER-DPD package
   DAT::tdual_efloat_1d k_uCond, k_uMech, k_uChem, k_uCG, k_uCGnew,
diff --git a/src/KOKKOS/atom_vec_spin_kokkos.cpp b/src/KOKKOS/atom_vec_spin_kokkos.cpp
index 8a7dd3317c..ef0b350092 100644
--- a/src/KOKKOS/atom_vec_spin_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_spin_kokkos.cpp
@@ -24,19 +24,20 @@
 ------------------------------------------------------------------------- */
 
 #include "atom_vec_spin_kokkos.h"
-#include <cmath>
-#include <cstring>
 #include "atom_kokkos.h"
 #include "comm_kokkos.h"
 #include "domain.h"
-#include "error.h"
-#include "fix.h"
-#include "memory_kokkos.h"
 #include "modify.h"
+#include "fix.h"
+#include "atom_masks.h"
+#include "memory_kokkos.h"
+#include "error.h"
 #include "utils.h"
 
 using namespace LAMMPS_NS;
 
+#define DELTA 10
+
 /* ---------------------------------------------------------------------- */
 
 AtomVecSpinKokkos::AtomVecSpinKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
@@ -159,10 +160,10 @@ void AtomVecSpinKokkos::copy(int i, int j, int delflag)
   h_v(j,1) = h_v(i,1);
   h_v(j,2) = h_v(i,2);
 
-  h_sp(j,0) = h_sp(i,0)
-  h_sp(j,1) = h_sp(i,1)
-  h_sp(j,2) = h_sp(i,2)
-  h_sp(j,3) = h_sp(i,3)
+  h_sp(j,0) = h_sp(i,0); 
+  h_sp(j,1) = h_sp(i,1);
+  h_sp(j,2) = h_sp(i,2);
+  h_sp(j,3) = h_sp(i,3);
 
   if (atom->nextra_grow)
     for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
@@ -263,7 +264,7 @@ struct AtomVecSpinKokkos_PackBorder {
       const typename ArrayTypes<DeviceType>::t_sp_array &sp,
       const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
   _buf(buf),_list(list),_iswap(iswap),
-    _x(x),_sp(sp),_tag(tag),_type(type),_mask(mask),
+    _x(x),_tag(tag),_type(type),_mask(mask),_sp(sp),
     _dx(dx),_dy(dy),_dz(dz) {}
   
   KOKKOS_INLINE_FUNCTION
diff --git a/src/atom_masks.h b/src/atom_masks.h
index 8e29448488..daad323835 100644
--- a/src/atom_masks.h
+++ b/src/atom_masks.h
@@ -42,6 +42,12 @@
 #define ENERGY_MASK    0x00010000
 #define VIRIAL_MASK    0x00020000
 
+// SPIN
+
+#define SP_MASK         0x00000001
+#define FM_MASK         0x00000002
+#define FML_MASK        0x00000004
+
 // DPD
 
 #define DPDRHO_MASK       0x00040000

From d3aa2d1cd01c6f4fa86b3eb388130b1fe9214d26 Mon Sep 17 00:00:00 2001
From: julient31 <julien.tranchida1@gmail.com>
Date: Wed, 30 Sep 2020 10:27:22 -0600
Subject: [PATCH 09/44] compilable kokkos files (still a segfault issue)

---
 src/KOKKOS/atom_kokkos.h            |  6 +++---
 src/KOKKOS/atom_vec_spin_kokkos.cpp | 22 ++++++++++++++++------
 src/KOKKOS/atom_vec_spin_kokkos.h   | 12 ++++++------
 3 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h
index 3ed703c66a..b66d54cbdd 100644
--- a/src/KOKKOS/atom_kokkos.h
+++ b/src/KOKKOS/atom_kokkos.h
@@ -56,9 +56,9 @@ class AtomKokkos : public Atom {
 
   // SPIN package
 
-  DAT::tdual_x_array k_sp;
-  DAT::tdual_x_array k_fm;
-  DAT::tdual_x_array k_fm_long;
+  DAT::tdual_sp_array k_sp;
+  DAT::tdual_fm_array k_fm;
+  DAT::tdual_fm_long_array k_fm_long;
 
 // USER-DPD package
   DAT::tdual_efloat_1d k_uCond, k_uMech, k_uChem, k_uCG, k_uCGnew,
diff --git a/src/KOKKOS/atom_vec_spin_kokkos.cpp b/src/KOKKOS/atom_vec_spin_kokkos.cpp
index ef0b350092..6ed62c0242 100644
--- a/src/KOKKOS/atom_vec_spin_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_spin_kokkos.cpp
@@ -135,10 +135,10 @@ void AtomVecSpinKokkos::grow_reset()
   sp = atomKK->sp; 
   d_sp = atomKK->k_sp.d_view;
   h_sp = atomKK->k_sp.h_view;
-  fm = atom->fm; 
+  fm = atomKK->fm; 
   d_fm = atomKK->k_fm.d_view;
   h_fm = atomKK->k_fm.h_view;
-  fm_long = atom->fm_long;
+  fm_long = atomKK->fm_long;
   d_fm_long = atomKK->k_fm_long.d_view;
   h_fm_long = atomKK->k_fm_long.h_view;
 }
@@ -537,10 +537,10 @@ struct AtomVecSpinKokkos_UnpackBorder {
       _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i;
       _type(i+_first) = (int) d_ubuf(_buf(i,4)).i;
       _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i;
-      _sp(i+_first) = _buf(i,6);
-      _sp(i+_first) = _buf(i,7);
-      _sp(i+_first) = _buf(i,8);
-      _sp(i+_first) = _buf(i,9);
+      _sp(i+_first,0) = _buf(i,6);
+      _sp(i+_first,1) = _buf(i,7);
+      _sp(i+_first,2) = _buf(i,8);
+      _sp(i+_first,3) = _buf(i,9);
   }
 };
 
@@ -1296,3 +1296,13 @@ void AtomVecSpinKokkos::sync_overlapping_device(ExecutionSpace space, unsigned i
   }
 }
 
+/* ----------------------------------------------------------------------
+   clear all forces (mech and mag)
+------------------------------------------------------------------------- */
+
+void AtomVecSpinKokkos::force_clear(int /*n*/, size_t nbytes)
+{
+  memset(&atom->f[0][0],0,3*nbytes);
+  memset(&atom->fm[0][0],0,3*nbytes);
+  memset(&atom->fm_long[0][0],0,3*nbytes);
+}
diff --git a/src/KOKKOS/atom_vec_spin_kokkos.h b/src/KOKKOS/atom_vec_spin_kokkos.h
index 5b57cfd8e6..d439424076 100644
--- a/src/KOKKOS/atom_vec_spin_kokkos.h
+++ b/src/KOKKOS/atom_vec_spin_kokkos.h
@@ -102,13 +102,13 @@ class AtomVecSpinKokkos : public AtomVecKokkos {
   DAT::t_v_array d_v;
   DAT::t_f_array d_f;
 
-  DAT::t_x_array d_sp;
-  DAT::t_x_array d_fm;
-  DAT::t_x_array d_fm_long;
+  DAT::t_sp_array d_sp;
+  DAT::t_fm_array d_fm;
+  DAT::t_fm_long_array d_fm_long;
 
-  HAT::t_x_array h_sp;
-  HAT::t_x_array h_fm;
-  HAT::t_x_array h_fm_long;
+  HAT::t_sp_array h_sp;
+  HAT::t_fm_array h_fm;
+  HAT::t_fm_long_array h_fm_long;
 
   DAT::tdual_int_1d k_count;
 };

From a8d304405ddca36740deef2e8608d8b4c782f88a Mon Sep 17 00:00:00 2001
From: julient31 <julien.tranchida1@gmail.com>
Date: Wed, 30 Sep 2020 15:55:18 -0600
Subject: [PATCH 10/44] before pull from other machine

---
 src/SPIN/pair_spin_exchange_biquadratic.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/SPIN/pair_spin_exchange_biquadratic.cpp b/src/SPIN/pair_spin_exchange_biquadratic.cpp
index cf351e6539..3fffb8b58e 100644
--- a/src/SPIN/pair_spin_exchange_biquadratic.cpp
+++ b/src/SPIN/pair_spin_exchange_biquadratic.cpp
@@ -416,11 +416,9 @@ void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j,
  
   Jex_mech = 1.0-rja-J2[itype][jtype]*rja*(2.0-rja);
   Jex_mech *= 8.0*Jex*rjr*exp(-rja);
-  // Jex_mech *= (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
   
   Kex_mech = 1.0-rka-K2[itype][jtype]*rka*(2.0-rka);
   Kex_mech *= 8.0*Kex*rkr*exp(-rka);
-  // Kex_mech *= (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
 
   sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
 

From 84c104641b4d510cfba8535085f9f17befe22926 Mon Sep 17 00:00:00 2001
From: julient31 <julien.tranchida1@gmail.com>
Date: Fri, 2 Oct 2020 10:47:29 -0600
Subject: [PATCH 11/44] adding offset option and doc

---
 doc/src/pair_spin_exchange.rst              | 159 ++++++++++++++++----
 src/SPIN/pair_spin_exchange.cpp             | 114 ++++++++++----
 src/SPIN/pair_spin_exchange.h               |   6 +-
 src/SPIN/pair_spin_exchange_biquadratic.cpp |  71 +++++++--
 src/SPIN/pair_spin_exchange_biquadratic.h   |   4 +-
 src/SPIN/pair_spin_neel.cpp                 |   2 +-
 6 files changed, 279 insertions(+), 77 deletions(-)

diff --git a/doc/src/pair_spin_exchange.rst b/doc/src/pair_spin_exchange.rst
index 14eefaccec..32a722c5f0 100644
--- a/doc/src/pair_spin_exchange.rst
+++ b/doc/src/pair_spin_exchange.rst
@@ -3,12 +3,16 @@
 pair_style spin/exchange command
 ================================
 
+pair_style spin/exchange/biquadratic command
+================================
+
 Syntax
 """"""
 
 .. code-block:: LAMMPS
 
    pair_style spin/exchange cutoff
+   pair_style spin/exchange/biquadratic cutoff
 
 * cutoff = global cutoff pair (distance in metal units)
 
@@ -19,7 +23,10 @@ Examples
 
    pair_style spin/exchange 4.0
    pair_coeff * * exchange 4.0 0.0446928 0.003496 1.4885
-   pair_coeff 1 2 exchange 6.0 -0.01575 0.0 1.965
+   pair_coeff 1 2 exchange 6.0 -0.01575 0.0 1.965 offset yes
+   pair_style spin/exchange/biquadratic 4.0
+   pair_coeff * * biquadratic 4.0 0.05 0.03 1.48 0.05 0.03 1.48 offset no
+   pair_coeff 1 2 biquadratic 6.0 -0.01 0.0 1.9 0.0 0.1 19
 
 Description
 """""""""""
@@ -31,69 +38,163 @@ pairs of magnetic spins:
 
    H_{ex} = -\sum_{i,j}^N J_{ij} (r_{ij}) \,\vec{s}_i \cdot \vec{s}_j
 
-where :math:`\vec{s}_i` and :math:`\vec{s}_j` are two neighboring magnetic spins of two particles,
-:math:`r_{ij} = \vert \vec{r}_i - \vec{r}_j \vert` is the inter-atomic distance between the two
-particles. The summation is over pairs of nearest neighbors.
-:math:`J(r_{ij})` is a function defining the intensity and the sign of the exchange
-interaction for different neighboring shells. This function is defined as:
+where :math:`\vec{s}_i` and :math:`\vec{s}_j` are two unit vectors representing
+the magnetic spins of two particles (usually atoms), and 
+:math:`r_{ij} = \vert \vec{r}_i - \vec{r}_j \vert` is the inter-atomic distance 
+between those two particles. The summation is over pairs of nearest neighbors. 
+:math:`J(r_{ij})` is a function defining the intensity and the sign of the 
+exchange interaction for different neighboring shells. 
+
+Style *spin/exchange/biquadratic* computes a biquadratic exchange interaction 
+between pairs of magnetic spins:
+
+.. math::
+  
+   H_{bi} = -\sum_{i, j}^{N} {J}_{ij} \left(r_{ij} \right)\,
+                      \vec{s}_{i}\cdot \vec{s}_{j} 
+                      -\sum_{i, j}^{N} {K}_{ij} \left(r_{ij} \right)\,
+                      \left(\vec{s}_{i}\cdot 
+                      \vec{s}_{j}\right)^2
+
+where :math:`\vec{s}_i`,  :math:`\vec{s}_j`,  :math:`r_{ij}` and 
+:math:`J(r_{ij})` have the same definitions as above, and :math:`K(r_{ij})` is 
+a second function, defining the intensity and the sign of the biquadratic term.
+
+The interatomic dependence of :math:`J(r_{ij})` and :math:`K(r_{ij})` in both 
+interactions above is defined by the following function:
 
 .. math::
 
-    {J}\left( r_{ij} \right) = 4 a \left( \frac{r_{ij}}{d}  \right)^2 \left( 1 - b \left( \frac{r_{ij}}{d}  \right)^2 \right) e^{-\left( \frac{r_{ij}}{d} \right)^2 }\Theta (R_c - r_{ij})
+    {f}\left( r_{ij} \right) = 4 a \left( \frac{r_{ij}}{d}  \right)^2 
+    \left( 1 - b \left( \frac{r_{ij}}{d}  \right)^2 \right) 
+    e^{-\left( \frac{r_{ij}}{d} \right)^2 }\Theta (R_c - r_{ij})
 
-where :math:`a`, :math:`b` and :math:`d` are the three constant coefficients defined in the associated
-"pair_coeff" command, and :math:`R_c` is the radius cutoff associated to
-the pair interaction (see below for more explanations).
+where :math:`a`, :math:`b` and :math:`d` are the three constant coefficients 
+defined in the associated "pair_coeff" command, and :math:`R_c` is the radius 
+cutoff associated to the pair interaction (see below for more explanations).
 
-The coefficients :math:`a`, :math:`b`, and :math:`d` need to be fitted so that the function above matches with
-the value of the exchange interaction for the :math:`N` neighbor shells taken into account.
-Examples and more explanations about this function and its parameterization are reported
-in :ref:`(Tranchida) <Tranchida3>`.
+The coefficients :math:`a`, :math:`b`, and :math:`d` need to be fitted so that 
+the function above matches with the value of the exchange interaction for the 
+:math:`N` neighbor shells taken into account.
+Examples and more explanations about this function and its parameterization 
+are reported in :ref:`(Tranchida) <Tranchida3>`.
+
+When a *spin/exchange/biquadratic* pair style is defined, six coefficients 
+(three for :math:`J(r_{ij})`, and three for :math:`K(r_{ij})`) have to be 
+fitted.
 
 From this exchange interaction, each spin :math:`i` will be submitted
-to a magnetic torque :math:`\vec{\omega}`, and its associated atom can be submitted to a
-force :math:`\vec{F}` for spin-lattice calculations (see :doc:`fix nve/spin <fix_nve_spin>`),
-such as:
+to a magnetic torque :math:`\vec{\omega}_{i}`, and its associated atom can be 
+submitted to a force :math:`\vec{F}_{i}` for spin-lattice calculations (see 
+:doc:`fix nve/spin <fix_nve_spin>`), such as:
 
 .. math::
 
    \vec{\omega}_{i} = \frac{1}{\hbar} \sum_{j}^{Neighb} {J}
    \left(r_{ij} \right)\,\vec{s}_{j}
    ~~{\rm and}~~
-   \vec{F}_{i} = \sum_{j}^{Neighb} \frac{\partial {J} \left(r_{ij} \right)}{ \partial r_{ij}} \left( \vec{s}_{i}\cdot \vec{s}_{j} \right) \vec{e}_{ij}
+   \vec{F}_{i} = \sum_{j}^{Neighb} \frac{\partial {J} \left(r_{ij} \right)}{ 
+   \partial r_{ij}} \left( \vec{s}_{i}\cdot \vec{s}_{j} \right) \vec{e}_{ij}
 
-with :math:`\hbar` the Planck constant (in metal units), and :math:`\vec{e}_{ij} = \frac{\vec{r}_i - \vec{r}_j}{\vert \vec{r}_i-\vec{r}_j \vert}` the unit
+with :math:`\hbar` the Planck constant (in metal units), and :math:`\vec{e}_{ij}
+= \frac{\vec{r}_i - \vec{r}_j}{\vert \vec{r}_i-\vec{r}_j \vert}` the unit
 vector between sites :math:`i` and :math:`j`.
+Equivalent forces and magnetic torques are generated for the biquadratic term 
+when a *spin/exchange/biquadratic* pair style is defined.
 
 More details about the derivation of these torques/forces are reported in
 :ref:`(Tranchida) <Tranchida3>`.
 
-For the *spin/exchange* pair style, the following coefficients must be defined
-for each pair of atoms types via the :doc:`pair_coeff <pair_coeff>` command as in
-the examples above, or in the data file or restart files read by the
-:doc:`read_data <read_data>` or :doc:`read_restart <read_restart>` commands, and
-set in the following order:
+For the *spin/exchange* and *spin/exchange/biquadratic* pair styles, the 
+following coefficients must be defined for each pair of atoms types via the 
+:doc:`pair_coeff <pair_coeff>` command as in the examples above, or in the data 
+file or restart files read by the :doc:`read_data <read_data>` or 
+:doc:`read_restart <read_restart>` commands, and set in the following order:
 
 * :math:`R_c` (distance units)
 * :math:`a`  (energy units)
 * :math:`b`  (adim parameter)
 * :math:`d`  (distance units)
 
-Note that :math:`R_c` is the radius cutoff of the considered exchange interaction,
-and :math:`a`, :math:`b` and :math:`d` are the three coefficients performing the parameterization
-of the function :math:`J(r_{ij})` defined above.
+for the *spin/exchange* pair style, and:
+
+* :math:`R_c` (distance units)
+* :math:`a_j`  (energy units)
+* :math:`b_j`  (adim parameter)
+* :math:`d_j`  (distance units)
+* :math:`a_k`  (energy units)
+* :math:`b_k`  (adim parameter)
+* :math:`d_k`  (distance units)
+
+for the *spin/exchange/biquadratic* pair style.
+
+Note that :math:`R_c` is the radius cutoff of the considered exchange 
+interaction, and :math:`a`, :math:`b` and :math:`d` are the three coefficients 
+performing the parameterization of the function :math:`J(r_{ij})` defined 
+above (in the *biquadratic* ase, :math:`a_j`, :math:`b_j`, :math:`d_j` and 
+:math:`a_k`, :math:`b_k`, :math:`d_k` are the coefficients of :math:`J(r_{ij})`
+and :math:`K(r_{ij})` respectively).
+
 
 None of those coefficients is optional. If not specified, the
 *spin/exchange* pair style cannot be used.
 
 ----------
 
+**Offsetting magnetic forces and energies**\ :
+
+For spin-lattice simulation, it can be useful to offset the
+mechanical forces and energies generated by the exchange
+interaction.
+The *offset* keyword allows to apply this offset. 
+By setting *offset* to *yes*, the energy definitions above are
+replaced by:
+
+.. math::
+
+   H_{ex} = -\sum_{i,j}^N J_{ij} (r_{ij}) \,[ \vec{s}_i \cdot \vec{s}_j-1 ]
+
+for the *spin/exchange* pair style, and:  
+
+.. math::
+  
+   H_{bi} = -\sum_{i, j}^{N} {J}_{ij} \left(r_{ij} \right)\,
+                      [ \vec{s}_{i}\cdot \vec{s}_{j} -1 ]
+                      -\sum_{i, j}^{N} {K}_{ij} \left(r_{ij} \right)\,
+                      [ \left(\vec{s}_{i}\cdot 
+                      \vec{s}_{j}\right)^2 -1]
+
+for the *spin/exchange/biquadratic* pair style.
+
+Note that this offset only affects the calculation of the energy
+and mechanical forces. It does not modify the calculation of the
+precession vectors (and thus does no impact the purely magnetic
+properties).
+This ensures that when all spins are aligned, the magnetic energy
+and the associated mechanical forces (and thus the pressure
+generated by the magnetic potential) are null. 
+
+.. note::
+  This offset term can be very important when calculations such as
+  equations of state (energy vs volume, or energy vs pressure) are
+  being performed. Indeed, setting the *offset* term ensures that
+  at the ground state of the crystal and at the equilibrium magnetic
+  configuration (typically ferromagnetic), the pressure is null,
+  as expected.
+  Otherwise, magnetic forces could generate a residual pressure.
+
+When the *offset* option is set to *no*, no offset is applied
+(also corresponding to the default option).
+
+----------
+
 Restrictions
 """"""""""""
 
 All the *pair/spin* styles are part of the SPIN package.  These styles
 are only enabled if LAMMPS was built with this package, and if the
-atom_style "spin" was declared.  See the :doc:`Build package <Build_package>` doc page for more info.
+atom_style "spin" was declared.  
+See the :doc:`Build package <Build_package>` doc page for more info.
 
 Related commands
 """"""""""""""""
@@ -103,7 +204,7 @@ Related commands
 
 **Default:**
 
-none
+The default *offset* keyword value is *no*.
 
 ----------
 
diff --git a/src/SPIN/pair_spin_exchange.cpp b/src/SPIN/pair_spin_exchange.cpp
index 611230c73e..5792738fd1 100644
--- a/src/SPIN/pair_spin_exchange.cpp
+++ b/src/SPIN/pair_spin_exchange.cpp
@@ -40,6 +40,14 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
+PairSpinExchange::PairSpinExchange(LAMMPS *lmp) : 
+  PairSpin(lmp) 
+{
+  e_offset = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
 PairSpinExchange::~PairSpinExchange()
 {
   if (allocated) {
@@ -61,6 +69,8 @@ PairSpinExchange::~PairSpinExchange()
 void PairSpinExchange::settings(int narg, char **arg)
 {
   PairSpin::settings(narg,arg);
+  
+  if (narg != 1) error->all(FLERR,"Illegal pair_style command");
 
   cut_spin_exchange_global = force->numeric(FLERR,arg[0]);
 
@@ -87,9 +97,9 @@ void PairSpinExchange::coeff(int narg, char **arg)
   // check if args correct
 
   if (strcmp(arg[2],"exchange") != 0)
-    error->all(FLERR,"Incorrect args in pair_style command");
-  if (narg != 7)
-    error->all(FLERR,"Incorrect args in pair_style command");
+    error->all(FLERR,"Incorrect args for pair coefficients");
+  if ((narg != 7) && (narg != 9))
+    error->all(FLERR,"Incorrect args for pair coefficients");
 
   int ilo,ihi,jlo,jhi;
   force->bounds(FLERR,arg[0],atom->ntypes,ilo,ihi);
@@ -97,11 +107,25 @@ void PairSpinExchange::coeff(int narg, char **arg)
 
   // get exchange arguments from input command
 
+  int iarg = 7;
   const double rc = force->numeric(FLERR,arg[3]);
   const double j1 = force->numeric(FLERR,arg[4]);
   const double j2 = force->numeric(FLERR,arg[5]);
   const double j3 = force->numeric(FLERR,arg[6]);
 
+  // read energy offset flag if specified
+
+  while (iarg < narg) { 
+    if (strcmp(arg[7],"offset") == 0) { 
+      if (strcmp(arg[8],"yes") == 0) {
+        e_offset = 1;
+      } else if  (strcmp(arg[8],"no") == 0) {
+        e_offset = 0;
+      } else error->all(FLERR,"Incorrect args for pair coefficients");
+      iarg += 2; 
+    } else error->all(FLERR,"Incorrect args for pair coefficients");
+  }
+  
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
@@ -236,8 +260,7 @@ void PairSpinExchange::compute(int eflag, int vflag)
           compute_exchange_mech(i,j,rsq,eij,fi,spi,spj);
         
         if (eflag) {
-          evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]);
-          evdwl *= 0.5*hbar;
+          evdwl -= compute_energy(i,j,rsq,spi,spj);
           emag[i] += evdwl;
         } else evdwl = 0.0;
 
@@ -373,7 +396,9 @@ void PairSpinExchange::compute_exchange_mech(int i, int j, double rsq,
 {
   int *type = atom->type;
   int itype, jtype;
-  double Jex, Jex_mech, ra, rr, iJ3;
+  double Jex, Jex_mech, ra, sdots;
+  double rr, iJ3;
+  double fx, fy, fz;
   itype = type[i];
   jtype = type[j];
 
@@ -385,38 +410,62 @@ void PairSpinExchange::compute_exchange_mech(int i, int j, double rsq,
 
   Jex_mech = 1.0-ra-J2[itype][jtype]*ra*(2.0-ra);
   Jex_mech *= 8.0*Jex*rr*exp(-ra);
-  Jex_mech *= (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
+  
+  sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
+  
+  // apply or not energy and force offset
+  
+  fx = fy = fz = 0.0;
+  if (e_offset == 1) { // set offset
+    fx = Jex_mech*(sdots-1.0)*eij[0];
+    fy = Jex_mech*(sdots-1.0)*eij[1];
+    fz = Jex_mech*(sdots-1.0)*eij[2];
+  } else if (e_offset == 0) { // no offset ("normal" calculation)
+    fx =  Jex_mech*sdots*eij[0];
+    fy =  Jex_mech*sdots*eij[1];
+    fz =  Jex_mech*sdots*eij[2];
+  } else error->all(FLERR,"Illegal option in pair exchange/biquadratic command");
+
+  fi[0] -= 0.5*fx;
+  fi[1] -= 0.5*fy;
+  fi[2] -= 0.5*fz;
+  // fi[0] -= fx;
+  // fi[1] -= fy;
+  // fi[2] -= fz;
 
-  fi[0] -= 0.5*Jex_mech*eij[0];
-  fi[1] -= 0.5*Jex_mech*eij[1];
-  fi[2] -= 0.5*Jex_mech*eij[2];
-  // fi[0] -= Jex_mech*eij[0];
-  // fi[1] -= Jex_mech*eij[1];
-  // fi[2] -= Jex_mech*eij[2];
 }
 
 /* ----------------------------------------------------------------------
    compute energy of spin pair i and j
 ------------------------------------------------------------------------- */
 
-// double PairSpinExchange::compute_energy(int i, int j, double rsq, double spi[3], double spj[3])
-// {
-//   int *type = atom->type;
-//   int itype, jtype;
-//   double Jex, ra;
-//   double energy = 0.0;
-//   itype = type[i];
-//   jtype = type[j];
-//
-//   Jex = J1_mech[itype][jtype];
-//   ra = rsq/J3[itype][jtype]/J3[itype][jtype];
-//   Jex = 4.0*Jex*ra;
-//   Jex *= (1.0-J2[itype][jtype]*ra);
-//   Jex *= exp(-ra);
-//
-//   energy = Jex*(spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
-//   return energy;
-// }
+double PairSpinExchange::compute_energy(int i, int j, double rsq, double spi[3], double spj[3])
+{
+  int *type = atom->type;
+  int itype, jtype;
+  double Jex, ra, sdots;
+  double energy = 0.0;
+  itype = type[i];
+  jtype = type[j];
+
+  Jex = J1_mech[itype][jtype];
+  ra = rsq/J3[itype][jtype]/J3[itype][jtype];
+  Jex = 4.0*Jex*ra;
+  Jex *= (1.0-J2[itype][jtype]*ra);
+  Jex *= exp(-ra);
+  
+  sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);  
+
+  // apply or not energy and force offset
+  
+  if (e_offset == 1) { // set offset
+    energy = 0.5*Jex*(sdots-1.0);
+  } else if (e_offset == 0) { // no offset ("normal" calculation)
+    energy = 0.5*Jex*sdots;
+  } else error->all(FLERR,"Illegal option in pair exchange/biquadratic command");
+  
+  return energy;
+}
 
 /* ----------------------------------------------------------------------
    allocate all arrays
@@ -505,6 +554,7 @@ void PairSpinExchange::read_restart(FILE *fp)
 void PairSpinExchange::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_spin_exchange_global,sizeof(double),1,fp);
+  fwrite(&e_offset,sizeof(int),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
 }
@@ -517,10 +567,12 @@ void PairSpinExchange::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     utils::sfread(FLERR,&cut_spin_exchange_global,sizeof(double),1,fp,NULL,error);
+    utils::sfread(FLERR,&e_offset,sizeof(int),1,fp,NULL,error);
     utils::sfread(FLERR,&offset_flag,sizeof(int),1,fp,NULL,error);
     utils::sfread(FLERR,&mix_flag,sizeof(int),1,fp,NULL,error);
   }
   MPI_Bcast(&cut_spin_exchange_global,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&e_offset,1,MPI_INT,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
 }
diff --git a/src/SPIN/pair_spin_exchange.h b/src/SPIN/pair_spin_exchange.h
index 4e9e6bfac8..2a31f9516e 100644
--- a/src/SPIN/pair_spin_exchange.h
+++ b/src/SPIN/pair_spin_exchange.h
@@ -26,7 +26,7 @@ namespace LAMMPS_NS {
 
 class PairSpinExchange : public PairSpin {
  public:
-  PairSpinExchange(LAMMPS *lmp) : PairSpin(lmp) {}
+  PairSpinExchange(class LAMMPS *);
   virtual ~PairSpinExchange();
   void settings(int, char **);
   void coeff(int, char **);
@@ -38,8 +38,7 @@ class PairSpinExchange : public PairSpin {
 
   void compute_exchange(int, int, double, double *, double *);
   void compute_exchange_mech(int, int, double, double *, double *, double *, double *);
-
-  // double compute_energy(int , int , double , double *, double *);
+  double compute_energy(int , int , double , double *, double *);
 
   void write_restart(FILE *);
   void read_restart(FILE *);
@@ -49,6 +48,7 @@ class PairSpinExchange : public PairSpin {
   double cut_spin_exchange_global;      // global exchange cutoff distance
 
  protected:
+  int e_offset;                         // apply energy offset
   double **J1_mag;                      // exchange coeffs in eV
   double **J1_mech;                     // mech exchange coeffs in
   double **J2, **J3;                    // J1 in eV, J2 adim, J3 in Ang
diff --git a/src/SPIN/pair_spin_exchange_biquadratic.cpp b/src/SPIN/pair_spin_exchange_biquadratic.cpp
index 3fffb8b58e..4c6c3936cf 100644
--- a/src/SPIN/pair_spin_exchange_biquadratic.cpp
+++ b/src/SPIN/pair_spin_exchange_biquadratic.cpp
@@ -40,6 +40,14 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
+PairSpinExchangeBiquadratic::PairSpinExchangeBiquadratic(LAMMPS *lmp) : 
+  PairSpin(lmp) 
+{
+  e_offset = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
 PairSpinExchangeBiquadratic::~PairSpinExchangeBiquadratic()
 {
   if (allocated) {
@@ -66,6 +74,8 @@ void PairSpinExchangeBiquadratic::settings(int narg, char **arg)
 {
   PairSpin::settings(narg,arg);
 
+  if (narg != 1) error->all(FLERR,"Illegal pair_style command");
+
   cut_spin_exchange_global = force->numeric(FLERR,arg[0]);
 
   // reset cutoffs that have been explicitly set
@@ -91,9 +101,9 @@ void PairSpinExchangeBiquadratic::coeff(int narg, char **arg)
   // check if args correct
 
   if (strcmp(arg[2],"biquadratic") != 0)
-    error->all(FLERR,"Incorrect args in pair_style command");
-  if (narg != 10)
-    error->all(FLERR,"Incorrect args in pair_style command");
+    error->all(FLERR,"Incorrect args for pair coefficients");
+  if ((narg != 10) && (narg != 12))
+    error->all(FLERR,"Incorrect args for pair coefficients");
 
   int ilo,ihi,jlo,jhi;
   force->bounds(FLERR,arg[0],atom->ntypes,ilo,ihi);
@@ -101,6 +111,7 @@ void PairSpinExchangeBiquadratic::coeff(int narg, char **arg)
 
   // get exchange arguments from input command
 
+  int iarg = 10;
   const double rc = force->numeric(FLERR,arg[3]);
   const double j1 = force->numeric(FLERR,arg[4]);
   const double j2 = force->numeric(FLERR,arg[5]);
@@ -109,6 +120,19 @@ void PairSpinExchangeBiquadratic::coeff(int narg, char **arg)
   const double k2 = force->numeric(FLERR,arg[8]);
   const double k3 = force->numeric(FLERR,arg[9]);
 
+  // read energy offset flag if specified
+
+  while (iarg < narg) { 
+    if (strcmp(arg[10],"offset") == 0) { 
+      if (strcmp(arg[11],"yes") == 0) {
+        e_offset = 1;
+      } else if  (strcmp(arg[11],"no") == 0) {
+        e_offset = 0;
+      } else error->all(FLERR,"Incorrect args for pair coefficients");
+      iarg += 2; 
+    } else error->all(FLERR,"Incorrect args for pair coefficients");
+  }
+
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
@@ -399,8 +423,9 @@ void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j,
 {
   int *type = atom->type;
   int itype,jtype;
-  double Jex,Jex_mech,Kex,Kex_mech,ra,sdots;
+  double Jex,Jex_mech,Kex,Kex_mech,sdots;
   double rja,rka,rjr,rkr,iJ3,iK3;
+  double fx, fy, fz;
   itype = type[i];
   jtype = type[j];
 
@@ -422,12 +447,25 @@ void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j,
 
   sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
 
-  fi[0] -= 0.5*(Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[0];
-  fi[1] -= 0.5*(Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[1];
-  fi[2] -= 0.5*(Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[2];
-  // fi[0] -= (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[0];
-  // fi[1] -= (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[1];
-  // fi[2] -= (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[2];
+  // apply or not energy and force offset
+  
+  fx = fy = fz = 0.0;
+  if (e_offset == 1) { // set offset
+    fx = (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[0];
+    fy = (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[1];
+    fz = (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[2];
+  } else if (e_offset == 0) { // no offset ("normal" calculation)
+    fx =  (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[0];
+    fy =  (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[1];
+    fz =  (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[2];
+  } else error->all(FLERR,"Illegal option in pair exchange/biquadratic command");
+
+  fi[0] -= 0.5*fx;
+  fi[1] -= 0.5*fy;
+  fi[2] -= 0.5*fz;
+  // fi[0] -= fx;
+  // fi[1] -= fy;
+  // fi[2] -= fz;
 }
 
 /* ----------------------------------------------------------------------
@@ -463,8 +501,14 @@ double PairSpinExchangeBiquadratic::compute_energy(int i, int j, double rsq,
 
   sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);  
 
-  energy = 0.5*(Jex*(sdots-1.0) + Kex*(sdots*sdots-1.0));
-  // energy = 0.5*(Jex*(sdots) + Kex*(sdots*sdots-1.0));
+  // apply or not energy and force offset
+  
+  if (e_offset == 1) { // set offset
+    energy = 0.5*(Jex*(sdots-1.0) + Kex*(sdots*sdots-1.0));
+  } else if (e_offset == 0) { // no offset ("normal" calculation)
+    energy = 0.5*(Jex*sdots + Kex*sdots*sdots);
+  } else error->all(FLERR,"Illegal option in pair exchange/biquadratic command");
+  
   return energy;
 }
 
@@ -571,6 +615,7 @@ void PairSpinExchangeBiquadratic::read_restart(FILE *fp)
 void PairSpinExchangeBiquadratic::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_spin_exchange_global,sizeof(double),1,fp);
+  fwrite(&e_offset,sizeof(int),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
 }
@@ -583,10 +628,12 @@ void PairSpinExchangeBiquadratic::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     utils::sfread(FLERR,&cut_spin_exchange_global,sizeof(double),1,fp,NULL,error);
+    utils::sfread(FLERR,&e_offset,sizeof(int),1,fp,NULL,error);
     utils::sfread(FLERR,&offset_flag,sizeof(int),1,fp,NULL,error);
     utils::sfread(FLERR,&mix_flag,sizeof(int),1,fp,NULL,error);
   }
   MPI_Bcast(&cut_spin_exchange_global,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&e_offset,1,MPI_INT,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
 }
diff --git a/src/SPIN/pair_spin_exchange_biquadratic.h b/src/SPIN/pair_spin_exchange_biquadratic.h
index 6fb9a7a94c..1074b50f7b 100644
--- a/src/SPIN/pair_spin_exchange_biquadratic.h
+++ b/src/SPIN/pair_spin_exchange_biquadratic.h
@@ -26,7 +26,7 @@ namespace LAMMPS_NS {
 
 class PairSpinExchangeBiquadratic : public PairSpin {
  public:
-  PairSpinExchangeBiquadratic(LAMMPS *lmp) : PairSpin(lmp) {}
+  PairSpinExchangeBiquadratic(class LAMMPS *);
   virtual ~PairSpinExchangeBiquadratic();
   void settings(int, char **);
   void coeff(int, char **);
@@ -48,6 +48,8 @@ class PairSpinExchangeBiquadratic : public PairSpin {
   double cut_spin_exchange_global;      // global exchange cutoff distance
 
  protected:
+  
+  int e_offset;                         // apply energy offset
   double **J1_mag;                      // H exchange coeffs in eV
   double **J1_mech;                     // mech exchange coeffs in
   double **J2, **J3;                    // J1 in eV, J2 in Ang-1, J3 in Ang
diff --git a/src/SPIN/pair_spin_neel.cpp b/src/SPIN/pair_spin_neel.cpp
index fc7cb6ab9a..c2377e7aee 100644
--- a/src/SPIN/pair_spin_neel.cpp
+++ b/src/SPIN/pair_spin_neel.cpp
@@ -612,7 +612,7 @@ double PairSpinNeel::compute_neel_energy(int i, int j, double rsq, double eij[3]
   eij_sj_3 = eij_sj*eij_sj_2;
   epq2 = q2r*(eij_si*eij_sj_3+eij_sj*eij_si_3);
 
-  return (epd+epq1+epq2);
+  return 0.5*(epd+epq1+epq2);
 }
 
 /* ----------------------------------------------------------------------

From 1cb0b9dece6e7e07b6dc8f2ba6bbd790b1bbfe9c Mon Sep 17 00:00:00 2001
From: julient31 <julien.tranchida1@gmail.com>
Date: Mon, 5 Oct 2020 17:11:53 -0600
Subject: [PATCH 12/44] - modified all pairs (if on rcut) - clean KOKKOS from
 atom spin/kk (other PR)

---
 src/KOKKOS/Install.sh                       |    2 -
 src/KOKKOS/atom_kokkos.cpp                  |    6 -
 src/KOKKOS/atom_kokkos.h                    |    5 -
 src/KOKKOS/atom_vec_spin_kokkos.cpp         | 1308 -------------------
 src/KOKKOS/atom_vec_spin_kokkos.h           |  132 --
 src/KOKKOS/kokkos_type.h                    |   60 -
 src/SPIN/pair_spin_dipole_cut.cpp           |   48 +-
 src/SPIN/pair_spin_dipole_long.cpp          |   46 +-
 src/SPIN/pair_spin_dmi.cpp                  |   49 +-
 src/SPIN/pair_spin_exchange_biquadratic.cpp |   33 +-
 src/SPIN/pair_spin_magelec.cpp              |   46 +-
 src/SPIN/pair_spin_neel.cpp                 |   44 +-
 12 files changed, 143 insertions(+), 1636 deletions(-)
 delete mode 100644 src/KOKKOS/atom_vec_spin_kokkos.cpp
 delete mode 100644 src/KOKKOS/atom_vec_spin_kokkos.h

diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh
index 4c5c9d7e1d..03508578ae 100755
--- a/src/KOKKOS/Install.sh
+++ b/src/KOKKOS/Install.sh
@@ -63,8 +63,6 @@ action atom_vec_bond_kokkos.cpp atom_vec_bond.cpp
 action atom_vec_bond_kokkos.h atom_vec_bond.h
 action atom_vec_charge_kokkos.cpp
 action atom_vec_charge_kokkos.h
-action atom_vec_spin_kokkos.cpp
-action atom_vec_spin_kokkos.h
 action atom_vec_dpd_kokkos.cpp atom_vec_dpd.cpp
 action atom_vec_dpd_kokkos.h atom_vec_dpd.h
 action atom_vec_full_kokkos.cpp atom_vec_full.cpp
diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp
index b85b063190..a587494d09 100644
--- a/src/KOKKOS/atom_kokkos.cpp
+++ b/src/KOKKOS/atom_kokkos.cpp
@@ -76,12 +76,6 @@ AtomKokkos::~AtomKokkos()
   memoryKK->destroy_kokkos(k_improper_atom3, improper_atom3);
   memoryKK->destroy_kokkos(k_improper_atom4, improper_atom4);
 
-  // SPIN package
-
-  memoryKK->destroy_kokkos(k_sp, sp);
-  memoryKK->destroy_kokkos(k_fm, fm);
-  memoryKK->destroy_kokkos(k_fm_long, fm_long);
-
   // USER-DPD package
   memoryKK->destroy_kokkos(k_uCond,uCond);
   memoryKK->destroy_kokkos(k_uMech,uMech);
diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h
index e2c666fea5..6eebbad661 100644
--- a/src/KOKKOS/atom_kokkos.h
+++ b/src/KOKKOS/atom_kokkos.h
@@ -54,11 +54,6 @@ class AtomKokkos : public Atom {
 
   DAT::tdual_float_2d k_dvector;
 
-  // SPIN package
-
-  DAT::tdual_sp_array k_sp;
-  DAT::tdual_fm_array k_fm;
-  DAT::tdual_fm_long_array k_fm_long;
 
 // USER-DPD package
   DAT::tdual_efloat_1d k_uCond, k_uMech, k_uChem, k_uCG, k_uCGnew,
diff --git a/src/KOKKOS/atom_vec_spin_kokkos.cpp b/src/KOKKOS/atom_vec_spin_kokkos.cpp
deleted file mode 100644
index 6ed62c0242..0000000000
--- a/src/KOKKOS/atom_vec_spin_kokkos.cpp
+++ /dev/null
@@ -1,1308 +0,0 @@
-/* ----------------------------------------------------------------------
-
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-
-------------------------------------------------------------------------- */
-
-/* ------------------------------------------------------------------------
-   Contributing authors: Julien Tranchida (SNL)
-                         Aidan Thompson (SNL)
-
-   Please cite the related publication:
-   Tranchida, J., Plimpton, S. J., Thibaudeau, P., & Thompson, A. P. (2018).
-   Massively parallel symplectic algorithm for coupled magnetic spin dynamics
-   and molecular dynamics. Journal of Computational Physics.
-------------------------------------------------------------------------- */
-
-#include "atom_vec_spin_kokkos.h"
-#include "atom_kokkos.h"
-#include "comm_kokkos.h"
-#include "domain.h"
-#include "modify.h"
-#include "fix.h"
-#include "atom_masks.h"
-#include "memory_kokkos.h"
-#include "error.h"
-#include "utils.h"
-
-using namespace LAMMPS_NS;
-
-#define DELTA 10
-
-/* ---------------------------------------------------------------------- */
-
-AtomVecSpinKokkos::AtomVecSpinKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
-{
-  molecular = 0;
-  mass_type = 1;
-  forceclearflag = 1;
-
-  comm_x_only = comm_f_only = 0;
-  size_forward = 7;
-  size_reverse = 9;
-  size_border = 10;
-  size_velocity = 3;
-  size_data_atom = 9;
-  size_data_vel = 4;
-  xcol_data = 4;
-
-  atom->sp_flag = 1;
-  
-  k_count = DAT::tdual_int_1d("atom::k_count",1);
-  atomKK = (AtomKokkos *) atom;
-  commKK = (CommKokkos *) comm;
-}
-
-/* ----------------------------------------------------------------------
-   grow atom arrays
-   n = 0 grows arrays by a chunk
-   n > 0 allocates arrays to size n
-------------------------------------------------------------------------- */
-
-void AtomVecSpinKokkos::grow(int n)
-{
-  int step = MAX(DELTA,nmax*0.01);
-  if (n == 0) nmax += step;
-  else nmax = n;
-  atomKK->nmax = nmax;
-  if (nmax < 0 || nmax > MAXSMALLINT)
-    error->one(FLERR,"Per-processor system is too big");
-
-  atomKK->sync(Device,ALL_MASK);
-  atomKK->modified(Device,ALL_MASK);
-
-  memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
-  memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
-  memoryKK->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask");
-  memoryKK->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image");
-   
-  // allocating mech. quantities
-
-  memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,"atom:x");
-  memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,"atom:v");
-  memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,"atom:f");
-  
-  // allocating mag. quantities
-
-  memoryKK->grow_kokkos(atomKK->k_sp,atomKK->sp,nmax,"atom:sp");
-  memoryKK->grow_kokkos(atomKK->k_fm,atomKK->fm,nmax,"atom:fm");
-  memoryKK->grow_kokkos(atomKK->k_fm_long,atomKK->fm_long,nmax,"atom:fm_long");
-
-  if (atom->nextra_grow)
-    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
-      modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax);
-}
-
-/* ----------------------------------------------------------------------
-   reset local array ptrs
-------------------------------------------------------------------------- */
-
-void AtomVecSpinKokkos::grow_reset()
-{
-  tag = atomKK->tag;
-  d_tag = atomKK->k_tag.d_view;
-  h_tag = atomKK->k_tag.h_view;
-
-  type = atomKK->type;
-  d_type = atomKK->k_type.d_view;
-  h_type = atomKK->k_type.h_view;
-  mask = atomKK->mask;
-  d_mask = atomKK->k_mask.d_view;
-  h_mask = atomKK->k_mask.h_view;
-  image = atomKK->image;
-  d_image = atomKK->k_image.d_view;
-  h_image = atomKK->k_image.h_view;
-  
-  x = atomKK->x;
-  d_x = atomKK->k_x.d_view;
-  h_x = atomKK->k_x.h_view;
-  v = atomKK->v;
-  d_v = atomKK->k_v.d_view;
-  h_v = atomKK->k_v.h_view;
-  f = atomKK->f;
-  d_f = atomKK->k_f.d_view;
-  h_f = atomKK->k_f.h_view;
-  
-  sp = atomKK->sp; 
-  d_sp = atomKK->k_sp.d_view;
-  h_sp = atomKK->k_sp.h_view;
-  fm = atomKK->fm; 
-  d_fm = atomKK->k_fm.d_view;
-  h_fm = atomKK->k_fm.h_view;
-  fm_long = atomKK->fm_long;
-  d_fm_long = atomKK->k_fm_long.d_view;
-  h_fm_long = atomKK->k_fm_long.h_view;
-}
-
-/* ----------------------------------------------------------------------
-   copy atom I info to atom J
-------------------------------------------------------------------------- */
-
-void AtomVecSpinKokkos::copy(int i, int j, int delflag)
-{
-  h_tag[j] = h_tag[i];
-  h_type[j] = h_type[i];
-  mask[j] = mask[i];
-  h_image[j] = h_image[i];
-  h_x(j,0) = h_x(i,0);
-  h_x(j,1) = h_x(i,1);
-  h_x(j,2) = h_x(i,2);
-  h_v(j,0) = h_v(i,0);
-  h_v(j,1) = h_v(i,1);
-  h_v(j,2) = h_v(i,2);
-
-  h_sp(j,0) = h_sp(i,0); 
-  h_sp(j,1) = h_sp(i,1);
-  h_sp(j,2) = h_sp(i,2);
-  h_sp(j,3) = h_sp(i,3);
-
-  if (atom->nextra_grow)
-    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
-      modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag);
-}
-
-/* ---------------------------------------------------------------------- */
-
-template<class DeviceType,int PBC_FLAG,int TRICLINIC>
-struct AtomVecSpinKokkos_PackComm {
-  typedef DeviceType device_type;
-
-  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
-  typename ArrayTypes<DeviceType>::t_sp_array_randomread _sp;
-  typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
-  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
-  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
-  X_FLOAT _pbc[6];
-  
-  AtomVecSpinKokkos_PackComm(
-      const typename DAT::tdual_x_array &x,
-      const typename DAT::tdual_sp_array &sp,
-      const typename DAT::tdual_xfloat_2d &buf,
-      const typename DAT::tdual_int_2d &list,
-      const int & iswap,
-      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
-      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
-      _x(x.view<DeviceType>()),_sp(sp.view<DeviceType>()),
-      _list(list.view<DeviceType>()),_iswap(iswap),
-      _xprd(xprd),_yprd(yprd),_zprd(zprd),
-      _xy(xy),_xz(xz),_yz(yz) {
-        const size_t maxsend = (buf.view<DeviceType>().extent(0)*buf.view<DeviceType>().extent(1))/3;
-        // const size_t elements = 3;
-        const size_t elements = 7;
-        buffer_view<DeviceType>(_buf,buf,maxsend,elements);
-        _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
-        _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
-  };
-
-  KOKKOS_INLINE_FUNCTION
-  void operator() (const int& i) const {
-        const int j = _list(_iswap,i);
-      if (PBC_FLAG == 0) {
-          _buf(i,0) = _x(j,0);
-          _buf(i,1) = _x(j,1);
-          _buf(i,2) = _x(j,2);
-          _buf(i,3) = _sp(j,0);
-          _buf(i,4) = _sp(j,1);
-          _buf(i,5) = _sp(j,2);
-          _buf(i,6) = _sp(j,3);
-      } else {
-        if (TRICLINIC == 0) {
-          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
-          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
-          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
-          _buf(i,3) = _sp(j,0);
-          _buf(i,4) = _sp(j,1);
-          _buf(i,5) = _sp(j,2);
-          _buf(i,6) = _sp(j,3);
-        } else {
-          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
-          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
-          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
-          _buf(i,3) = _sp(j,0);
-          _buf(i,4) = _sp(j,1);
-          _buf(i,5) = _sp(j,2);
-          _buf(i,6) = _sp(j,3);
-        }
-      }
-  }
-};
-
-/* ---------------------------------------------------------------------- */
-
-template<class DeviceType,int PBC_FLAG>
-struct AtomVecSpinKokkos_PackBorder {
-  typedef DeviceType device_type;
-
-  typename ArrayTypes<DeviceType>::t_xfloat_2d _buf;
-  const typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
-  const typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
-  const typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
-  const typename ArrayTypes<DeviceType>::t_int_1d _type;
-  const typename ArrayTypes<DeviceType>::t_int_1d _mask;
-  const typename ArrayTypes<DeviceType>::t_sp_array_randomread _sp;
-  X_FLOAT _dx,_dy,_dz;
-
-  AtomVecSpinKokkos_PackBorder(
-      const typename ArrayTypes<DeviceType>::t_xfloat_2d &buf,
-      const typename ArrayTypes<DeviceType>::t_int_2d_const &list,
-      const int & iswap,
-      const typename ArrayTypes<DeviceType>::t_x_array &x,
-      const typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
-      const typename ArrayTypes<DeviceType>::t_int_1d &type,
-      const typename ArrayTypes<DeviceType>::t_int_1d &mask,
-      const typename ArrayTypes<DeviceType>::t_sp_array &sp,
-      const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
-  _buf(buf),_list(list),_iswap(iswap),
-    _x(x),_tag(tag),_type(type),_mask(mask),_sp(sp),
-    _dx(dx),_dy(dy),_dz(dz) {}
-  
-  KOKKOS_INLINE_FUNCTION
-  void operator() (const int& i) const {
-      const int j = _list(_iswap,i);
-      if (PBC_FLAG == 0) {
-          _buf(i,0) = _x(j,0);
-          _buf(i,1) = _x(j,1);
-          _buf(i,2) = _x(j,2);
-          _buf(i,3) = d_ubuf(_tag(j)).d;
-          _buf(i,4) = d_ubuf(_type(j)).d;
-          _buf(i,5) = d_ubuf(_mask(j)).d;
-          _buf(i,6) = _sp(j,0);
-          _buf(i,7) = _sp(j,1);
-          _buf(i,8) = _sp(j,2);
-          _buf(i,9) = _sp(j,3);
-      } else {
-          _buf(i,0) = _x(j,0) + _dx;
-          _buf(i,1) = _x(j,1) + _dy;
-          _buf(i,2) = _x(j,2) + _dz;
-          _buf(i,3) = d_ubuf(_tag(j)).d;
-          _buf(i,4) = d_ubuf(_type(j)).d;
-          _buf(i,5) = d_ubuf(_mask(j)).d;
-          _buf(i,6) = _sp(j,0);
-          _buf(i,7) = _sp(j,1);
-          _buf(i,8) = _sp(j,2);
-          _buf(i,9) = _sp(j,3);
-      }
-  }
-};
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecSpinKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap,
-                               int pbc_flag, int *pbc, ExecutionSpace space)
-{
-  X_FLOAT dx,dy,dz;
-
-  if (pbc_flag != 0) {
-    if (domain->triclinic == 0) {
-      dx = pbc[0]*domain->xprd;
-      dy = pbc[1]*domain->yprd;
-      dz = pbc[2]*domain->zprd;
-    } else {
-      dx = pbc[0];
-      dy = pbc[1];
-      dz = pbc[2];
-    }
-    if(space==Host) {
-      AtomVecSpinKokkos_PackBorder<LMPHostType,1> f(
-        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
-        iswap,h_x,h_tag,h_type,h_mask,h_sp,dx,dy,dz);
-      Kokkos::parallel_for(n,f);
-    } else {
-      AtomVecSpinKokkos_PackBorder<LMPDeviceType,1> f(
-        buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
-        iswap,d_x,d_tag,d_type,d_mask,d_sp,dx,dy,dz);
-      Kokkos::parallel_for(n,f);
-    }
-
-  } else {
-    dx = dy = dz = 0;
-    if(space==Host) {
-      AtomVecSpinKokkos_PackBorder<LMPHostType,0> f(
-        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
-        iswap,h_x,h_tag,h_type,h_mask,h_sp,dx,dy,dz);
-      Kokkos::parallel_for(n,f);
-    } else {
-      AtomVecSpinKokkos_PackBorder<LMPDeviceType,0> f(
-        buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
-        iswap,d_x,d_tag,d_type,d_mask,d_sp,dx,dy,dz);
-      Kokkos::parallel_for(n,f);
-    }
-  }
-  return n*size_border;
-}
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecSpinKokkos::pack_border(int n, int *list, double *buf,
-                               int pbc_flag, int *pbc)
-{
-  int i,j,m;
-  double dx,dy,dz;
-
-  m = 0;
-  if (pbc_flag == 0) {
-    for (i = 0; i < n; i++) {
-      j = list[i];
-      buf[m++] = h_x(j,0);
-      buf[m++] = h_x(j,1);
-      buf[m++] = h_x(j,2);
-      buf[m++] = ubuf(h_tag(j)).d;
-      buf[m++] = ubuf(h_type(j)).d;
-      buf[m++] = ubuf(h_mask(j)).d;
-      buf[m++] = h_sp(j,0);
-      buf[m++] = h_sp(j,1);
-      buf[m++] = h_sp(j,2);
-      buf[m++] = h_sp(j,3);
-    }
-  } else {
-    if (domain->triclinic == 0) {
-      dx = pbc[0]*domain->xprd;
-      dy = pbc[1]*domain->yprd;
-      dz = pbc[2]*domain->zprd;
-    } else {
-      dx = pbc[0];
-      dy = pbc[1];
-      dz = pbc[2];
-    }
-    for (i = 0; i < n; i++) {
-      j = list[i];
-      buf[m++] = h_x(j,0) + dx;
-      buf[m++] = h_x(j,1) + dy;
-      buf[m++] = h_x(j,2) + dz;
-      buf[m++] = ubuf(h_tag(j)).d;
-      buf[m++] = ubuf(h_type(j)).d;
-      buf[m++] = ubuf(h_mask(j)).d;
-      buf[m++] = h_sp(j,0);
-      buf[m++] = h_sp(j,1);
-      buf[m++] = h_sp(j,2);
-      buf[m++] = h_sp(j,3);
-    }
-  }
-  
-  if (atom->nextra_border)
-    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
-      m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]);
-
-  return m;
-}
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecSpinKokkos::pack_border_vel(int n, int *list, double *buf,
-                                   int pbc_flag, int *pbc)
-{
-  int i,j,m;
-  double dx,dy,dz,dvx,dvy,dvz;
-
-  m = 0;
-  if (pbc_flag == 0) {
-    for (i = 0; i < n; i++) {
-      j = list[i];
-      buf[m++] = h_x(j,0);
-      buf[m++] = h_x(j,1);
-      buf[m++] = h_x(j,2);
-      buf[m++] = ubuf(h_tag(j)).d;
-      buf[m++] = ubuf(h_type(j)).d;
-      buf[m++] = ubuf(h_mask(j)).d;
-      buf[m++] = h_sp(j,0);
-      buf[m++] = h_sp(j,1);
-      buf[m++] = h_sp(j,2);
-      buf[m++] = h_sp(j,3);
-      buf[m++] = h_v(j,0);
-      buf[m++] = h_v(j,1);
-      buf[m++] = h_v(j,2);
-    }
-  } else {
-    if (domain->triclinic == 0) {
-      dx = pbc[0]*domain->xprd;
-      dy = pbc[1]*domain->yprd;
-      dz = pbc[2]*domain->zprd;
-    } else {
-      dx = pbc[0];
-      dy = pbc[1];
-      dz = pbc[2];
-    }
-    if (!deform_vremap) {
-      for (i = 0; i < n; i++) {
-        j = list[i];
-        buf[m++] = h_x(j,0) + dx;
-        buf[m++] = h_x(j,1) + dy;
-        buf[m++] = h_x(j,2) + dz;
-        buf[m++] = ubuf(h_tag(j)).d;
-        buf[m++] = ubuf(h_type(j)).d;
-        buf[m++] = ubuf(h_mask(j)).d;
-        buf[m++] = h_sp(j,0);
-        buf[m++] = h_sp(j,1);
-        buf[m++] = h_sp(j,2);
-        buf[m++] = h_sp(j,3);
-        buf[m++] = h_v(j,0);
-        buf[m++] = h_v(j,1);
-        buf[m++] = h_v(j,2);
-      }
-    } else {
-      dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
-      dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
-      dvz = pbc[2]*h_rate[2];
-      for (i = 0; i < n; i++) {
-        j = list[i];
-        buf[m++] = h_x(j,0) + dx;
-        buf[m++] = h_x(j,1) + dy;
-        buf[m++] = h_x(j,2) + dz;
-        buf[m++] = ubuf(h_tag(j)).d;
-        buf[m++] = ubuf(h_type(j)).d;
-        buf[m++] = ubuf(h_mask(j)).d;
-        buf[m++] = h_sp(j,0);
-        buf[m++] = h_sp(j,1);
-        buf[m++] = h_sp(j,2);
-        buf[m++] = h_sp(j,3);
-        if (mask[i] & deform_groupbit) {
-          buf[m++] = h_v(j,0) + dvx;
-          buf[m++] = h_v(j,1) + dvy;
-          buf[m++] = h_v(j,2) + dvz;
-        } else {
-          buf[m++] = h_v(j,0);
-          buf[m++] = h_v(j,1);
-          buf[m++] = h_v(j,2);
-        }
-      }
-    }
-  }
-  
-  if (atom->nextra_border)
-    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
-      m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]);
-
-  return m;
-}
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecSpinKokkos::pack_border_hybrid(int n, int *list, double *buf)
-{
-  int i,j,m;
-
-  m = 0;
-  for (i = 0; i < n; i++) {
-    j = list[i];
-    buf[m++] = h_sp(j,0);
-    buf[m++] = h_sp(j,1);
-    buf[m++] = h_sp(j,2);
-    buf[m++] = h_sp(j,3);
-  }
-  return m;
-}
-
-/* ---------------------------------------------------------------------- */
-
-template<class DeviceType>
-struct AtomVecSpinKokkos_UnpackBorder {
-  typedef DeviceType device_type;
-
-  const typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
-  typename ArrayTypes<DeviceType>::t_x_array _x;
-  typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
-  typename ArrayTypes<DeviceType>::t_int_1d _type;
-  typename ArrayTypes<DeviceType>::t_int_1d _mask;
-  typename ArrayTypes<DeviceType>::t_sp_array _sp;
-  int _first;
-
-
-  AtomVecSpinKokkos_UnpackBorder(
-      const typename ArrayTypes<DeviceType>::t_xfloat_2d_const &buf,
-      typename ArrayTypes<DeviceType>::t_x_array &x,
-      typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
-      typename ArrayTypes<DeviceType>::t_int_1d &type,
-      typename ArrayTypes<DeviceType>::t_int_1d &mask,
-      typename ArrayTypes<DeviceType>::t_sp_array &sp,
-      const int& first):
-    _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_sp(sp),_first(first){
-  };
-
-  KOKKOS_INLINE_FUNCTION
-  void operator() (const int& i) const {
-      _x(i+_first,0) = _buf(i,0);
-      _x(i+_first,1) = _buf(i,1);
-      _x(i+_first,2) = _buf(i,2);
-      _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i;
-      _type(i+_first) = (int) d_ubuf(_buf(i,4)).i;
-      _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i;
-      _sp(i+_first,0) = _buf(i,6);
-      _sp(i+_first,1) = _buf(i,7);
-      _sp(i+_first,2) = _buf(i,8);
-      _sp(i+_first,3) = _buf(i,9);
-  }
-};
-
-/* ---------------------------------------------------------------------- */
-
-void AtomVecSpinKokkos::unpack_border_kokkos(const int &n, const int &first,
-                     const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) {
-  if (first+n >= nmax) {
-    grow(first+n+100);
-  }
-  if(space==Host) {
-    struct AtomVecSpinKokkos_UnpackBorder<LMPHostType>
-      f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_sp,first);
-    Kokkos::parallel_for(n,f);
-  } else {
-    struct AtomVecSpinKokkos_UnpackBorder<LMPDeviceType>
-      f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_sp,first);
-    Kokkos::parallel_for(n,f);
-  }
-  atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|SP_MASK);
-}
-
-/* ---------------------------------------------------------------------- */
-
-void AtomVecSpinKokkos::unpack_border(int n, int first, double *buf)
-{
-  int i,m,last;
-
-  m = 0;
-  last = first + n;
-
-  for (i = first; i < last; i++) {
-    if (i == nmax) {
-      grow(0);
-    }
-    atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|SP_MASK);
-    h_x(i,0) = buf[m++];
-    h_x(i,1) = buf[m++];
-    h_x(i,2) = buf[m++];
-    h_tag(i) =  (tagint)  ubuf(buf[m++]).i;
-    h_type(i) = (int) ubuf(buf[m++]).i;
-    h_mask(i) = (int) ubuf(buf[m++]).i;
-    h_sp(i,0) = buf[m++];
-    h_sp(i,1) = buf[m++];
-    h_sp(i,2) = buf[m++];
-    h_sp(i,3) = buf[m++];
-  }
-
-  if (atom->nextra_border)
-    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
-      m += modify->fix[atom->extra_border[iextra]]->
-        unpack_border(n,first,&buf[m]);
-}
-
-/* ---------------------------------------------------------------------- */
-
-void AtomVecSpinKokkos::unpack_border_vel(int n, int first, double *buf)
-{
-  int i,m,last;
-
-  m = 0;
-  last = first + n;
-  for (i = first; i < last; i++) {
-    if (i == nmax) grow(0);
-    atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|SP_MASK);
-    h_x(i,0) = buf[m++];
-    h_x(i,1) = buf[m++];
-    h_x(i,2) = buf[m++];
-    h_tag(i) =  (tagint)  ubuf(buf[m++]).i;
-    h_type(i) = (int) ubuf(buf[m++]).i;
-    h_mask(i) = (int) ubuf(buf[m++]).i;
-    h_sp(i,0) = buf[m++];
-    h_sp(i,1) = buf[m++];
-    h_sp(i,2) = buf[m++];
-    h_sp(i,3) = buf[m++];
-    h_v(i,0) = buf[m++];
-    h_v(i,1) = buf[m++];
-    h_v(i,2) = buf[m++];
-  }
-
-  if (atom->nextra_border)
-    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
-      m += modify->fix[atom->extra_border[iextra]]->
-        unpack_border(n,first,&buf[m]);
-}
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecSpinKokkos::unpack_border_hybrid(int n, int first, double *buf)
-{
-  int i,m,last;
-
-  m = 0;
-  last = first + n;
-  for (i = first; i < last; i++)
-    h_sp(i,0) = buf[m++];
-    h_sp(i,1) = buf[m++];
-    h_sp(i,2) = buf[m++];
-    h_sp(i,3) = buf[m++];
-  return m;
-}
-
-/* ---------------------------------------------------------------------- */
-
-template<class DeviceType>
-struct AtomVecSpinKokkos_PackExchangeFunctor {
-  typedef DeviceType device_type;
-  typedef ArrayTypes<DeviceType> AT;
-  typename AT::t_x_array_randomread _x;
-  typename AT::t_v_array_randomread _v;
-  typename AT::t_tagint_1d_randomread _tag;
-  typename AT::t_int_1d_randomread _type;
-  typename AT::t_int_1d_randomread _mask;
-  typename AT::t_imageint_1d_randomread _image;
-  typename AT::t_sp_array_randomread _sp;
-  typename AT::t_x_array _xw;
-  typename AT::t_v_array _vw;
-  typename AT::t_tagint_1d _tagw;
-  typename AT::t_int_1d _typew;
-  typename AT::t_int_1d _maskw;
-  typename AT::t_imageint_1d _imagew;
-  typename AT::t_sp_array _spw;
-
-  typename AT::t_xfloat_2d_um _buf;
-  typename AT::t_int_1d_const _sendlist;
-  typename AT::t_int_1d_const _copylist;
-  int _nlocal,_dim;
-  X_FLOAT _lo,_hi;
-
-  AtomVecSpinKokkos_PackExchangeFunctor(
-      const AtomKokkos* atom,
-      const typename AT::tdual_xfloat_2d buf,
-      typename AT::tdual_int_1d sendlist,
-      typename AT::tdual_int_1d copylist,int nlocal, int dim,
-                X_FLOAT lo, X_FLOAT hi):
-    _x(atom->k_x.view<DeviceType>()),
-    _v(atom->k_v.view<DeviceType>()),
-    _tag(atom->k_tag.view<DeviceType>()),
-    _type(atom->k_type.view<DeviceType>()),
-    _mask(atom->k_mask.view<DeviceType>()),
-    _image(atom->k_image.view<DeviceType>()),
-    _sp(atom->k_sp.view<DeviceType>()),
-    _xw(atom->k_x.view<DeviceType>()),
-    _vw(atom->k_v.view<DeviceType>()),
-    _tagw(atom->k_tag.view<DeviceType>()),
-    _typew(atom->k_type.view<DeviceType>()),
-    _maskw(atom->k_mask.view<DeviceType>()),
-    _imagew(atom->k_image.view<DeviceType>()),
-    _spw(atom->k_sp.view<DeviceType>()),
-    _sendlist(sendlist.template view<DeviceType>()),
-    _copylist(copylist.template view<DeviceType>()),
-    _nlocal(nlocal),_dim(dim),
-    _lo(lo),_hi(hi){
-    const size_t elements = 15;
-    const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
-                             buf.template view<DeviceType>().extent(1))/elements;
-
-    buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  void operator() (const int &mysend) const {
-    const int i = _sendlist(mysend);
-    _buf(mysend,0) = 15;
-    _buf(mysend,1) = _x(i,0);
-    _buf(mysend,2) = _x(i,1);
-    _buf(mysend,3) = _x(i,2);
-    _buf(mysend,4) = _v(i,0);
-    _buf(mysend,5) = _v(i,1);
-    _buf(mysend,6) = _v(i,2);
-    _buf(mysend,7) = d_ubuf(_tag[i]).d;
-    _buf(mysend,8) = d_ubuf(_type[i]).d;
-    _buf(mysend,9) = d_ubuf(_mask[i]).d;
-    _buf(mysend,10) = d_ubuf(_image[i]).d;
-    _buf(mysend,11) = _sp(i,0);
-    _buf(mysend,12) = _sp(i,1);
-    _buf(mysend,13) = _sp(i,2);
-    _buf(mysend,14) = _sp(i,3);
-    const int j = _copylist(mysend);
-
-    if(j>-1) {
-    _xw(i,0) = _x(j,0);
-    _xw(i,1) = _x(j,1);
-    _xw(i,2) = _x(j,2);
-    _vw(i,0) = _v(j,0);
-    _vw(i,1) = _v(j,1);
-    _vw(i,2) = _v(j,2);
-    _tagw(i) = _tag(j);
-    _typew(i) = _type(j);
-    _maskw(i) = _mask(j);
-    _imagew(i) = _image(j);
-    _spw(i,0) = _sp(j,0);
-    _spw(i,1) = _sp(j,1);
-    _spw(i,2) = _sp(j,2);
-    _spw(i,3) = _sp(j,3);
-    }
-  }
-};
-  
-/* ---------------------------------------------------------------------- */
-  
-int AtomVecSpinKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf,
-                                              DAT::tdual_int_1d k_sendlist,
-                                              DAT::tdual_int_1d k_copylist,
-                                              ExecutionSpace space,int dim,
-                                              X_FLOAT lo,X_FLOAT hi )
-{
-  if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/15) {
-    int newsize = nsend*15/k_buf.view<LMPHostType>().extent(1)+1;
-    k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
-  }
-  if(space == Host) {
-    AtomVecSpinKokkos_PackExchangeFunctor<LMPHostType>
-      f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
-    Kokkos::parallel_for(nsend,f);
-    return nsend*15;
-  } else {
-    AtomVecSpinKokkos_PackExchangeFunctor<LMPDeviceType>
-      f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
-    Kokkos::parallel_for(nsend,f);
-    return nsend*15;
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-  
-int AtomVecSpinKokkos::pack_exchange(int i, double *buf)
-{
-  int m = 1;
-  buf[m++] = h_x(i,0);
-  buf[m++] = h_x(i,1);
-  buf[m++] = h_x(i,2);
-  buf[m++] = h_v(i,0);
-  buf[m++] = h_v(i,1);
-  buf[m++] = h_v(i,2);
-  buf[m++] = ubuf(h_tag(i)).d;
-  buf[m++] = ubuf(h_type(i)).d;
-  buf[m++] = ubuf(h_mask(i)).d;
-  buf[m++] = ubuf(h_image(i)).d;
-  buf[m++] = h_sp(i,0);
-  buf[m++] = h_sp(i,1);
-  buf[m++] = h_sp(i,2);
-  buf[m++] = h_sp(i,3);
-
-  if (atom->nextra_grow)
-    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
-      m += modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf[m]);
-
-  buf[0] = m;
-  return m;
-}
-
-/* ---------------------------------------------------------------------- */
-
-template<class DeviceType>
-struct AtomVecSpinKokkos_UnpackExchangeFunctor {
-  typedef DeviceType device_type;
-  typedef ArrayTypes<DeviceType> AT;
-  typename AT::t_x_array _x;
-  typename AT::t_v_array _v;
-  typename AT::t_tagint_1d _tag;
-  typename AT::t_int_1d _type;
-  typename AT::t_int_1d _mask;
-  typename AT::t_imageint_1d _image;
-  typename AT::t_sp_array _sp;
-  typename AT::t_xfloat_2d_um _buf;
-  typename AT::t_int_1d _nlocal;
-  int _dim;
-  X_FLOAT _lo,_hi;
-
-  AtomVecSpinKokkos_UnpackExchangeFunctor(
-      const AtomKokkos* atom,
-      const typename AT::tdual_xfloat_2d buf,
-      typename AT::tdual_int_1d nlocal,
-      int dim, X_FLOAT lo, X_FLOAT hi):
-    _x(atom->k_x.view<DeviceType>()),
-    _v(atom->k_v.view<DeviceType>()),
-    _tag(atom->k_tag.view<DeviceType>()),
-    _type(atom->k_type.view<DeviceType>()),
-    _mask(atom->k_mask.view<DeviceType>()),
-    _image(atom->k_image.view<DeviceType>()),
-    _sp(atom->k_sp.view<DeviceType>()),
-    _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
-    _lo(lo),_hi(hi){
-    const size_t elements = 15;
-    const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
-
-    buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  void operator() (const int &myrecv) const {
-    X_FLOAT x = _buf(myrecv,_dim+1);
-    if (x >= _lo && x < _hi) {
-      int i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
-      _x(i,0) = _buf(myrecv,1);
-      _x(i,1) = _buf(myrecv,2);
-      _x(i,2) = _buf(myrecv,3);
-      _v(i,0) = _buf(myrecv,4);
-      _v(i,1) = _buf(myrecv,5);
-      _v(i,2) = _buf(myrecv,6);
-      _tag[i] = (tagint) d_ubuf(_buf(myrecv,7)).i;
-      _type[i] = (int) d_ubuf(_buf(myrecv,8)).i;
-      _mask[i] = (int) d_ubuf(_buf(myrecv,9)).i;
-      _image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i;
-      _sp(i,0) = _buf(myrecv,11);
-      _sp(i,1) = _buf(myrecv,12);
-      _sp(i,2) = _buf(myrecv,13);
-      _sp(i,3) = _buf(myrecv,14);
-    }
-  }
-};
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
-                                                int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
-                                                ExecutionSpace space) {
-  if(space == Host) {
-    k_count.h_view(0) = nlocal;
-    AtomVecSpinKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
-    Kokkos::parallel_for(nrecv/15,f);
-    return k_count.h_view(0);
-  } else {
-    k_count.h_view(0) = nlocal;
-    k_count.modify<LMPHostType>();
-    k_count.sync<LMPDeviceType>();
-    AtomVecSpinKokkos_UnpackExchangeFunctor<LMPDeviceType>
-      f(atomKK,k_buf,k_count,dim,lo,hi);
-    Kokkos::parallel_for(nrecv/15,f);
-    k_count.modify<LMPDeviceType>();
-    k_count.sync<LMPHostType>();
-
-    return k_count.h_view(0);
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecSpinKokkos::unpack_exchange(double *buf)
-{
-  int nlocal = atom->nlocal;
-  if (nlocal == nmax) grow(0);
-  atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
-           MASK_MASK | IMAGE_MASK | SP_MASK);
-
-  int m = 1;
-  h_x(nlocal,0) = buf[m++];
-  h_x(nlocal,1) = buf[m++];
-  h_x(nlocal,2) = buf[m++];
-  h_v(nlocal,0) = buf[m++];
-  h_v(nlocal,1) = buf[m++];
-  h_v(nlocal,2) = buf[m++];
-  h_tag(nlocal) = (tagint) ubuf(buf[m++]).i;
-  h_type(nlocal) = (int) ubuf(buf[m++]).i;
-  h_mask(nlocal) = (int) ubuf(buf[m++]).i;
-  h_image(nlocal) = (imageint) ubuf(buf[m++]).i;
-  h_sp(nlocal,0) = buf[m++];
-  h_sp(nlocal,1) = buf[m++];
-  h_sp(nlocal,2) = buf[m++];
-  h_sp(nlocal,3) = buf[m++];
-
-  if (atom->nextra_grow)
-    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
-      m += modify->fix[atom->extra_grow[iextra]]->
-        unpack_exchange(nlocal,&buf[m]);
-
-  atom->nlocal++;
-  return m;
-}
-
-/* ----------------------------------------------------------------------
-   size of restart data for all atoms owned by this proc
-   include extra data stored by fixes
-------------------------------------------------------------------------- */
-
-int AtomVecSpinKokkos::size_restart()
-{
-  int i;
-
-  int nlocal = atom->nlocal;
-  int n = 15 * nlocal;
-
-  if (atom->nextra_restart)
-    for (int iextra = 0; iextra < atom->nextra_restart; iextra++)
-      for (i = 0; i < nlocal; i++)
-        n += modify->fix[atom->extra_restart[iextra]]->size_restart(i);
-
-  return n;
-}
-
-/* ----------------------------------------------------------------------
-   pack atom I's data for restart file including extra quantities
-   xyz must be 1st 3 values, so that read_restart can test on them
-   molecular types may be negative, but write as positive
-------------------------------------------------------------------------- */
-
-int AtomVecSpinKokkos::pack_restart(int i, double *buf)
-{
-  atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
-            MASK_MASK | IMAGE_MASK | SP_MASK);
-
-  int m = 1;
-  buf[m++] = h_x(i,0);
-  buf[m++] = h_x(i,1);
-  buf[m++] = h_x(i,2);
-  buf[m++] = ubuf(h_tag(i)).d;
-  buf[m++] = ubuf(h_type(i)).d;
-  buf[m++] = ubuf(h_mask(i)).d;
-  buf[m++] = ubuf(h_image(i)).d;
-  buf[m++] = h_v(i,0);
-  buf[m++] = h_v(i,1);
-  buf[m++] = h_v(i,2);
-
-  buf[m++] = h_sp(i,0);
-  buf[m++] = h_sp(i,1);
-  buf[m++] = h_sp(i,2);
-  buf[m++] = h_sp(i,3);
-
-  if (atom->nextra_restart)
-    for (int iextra = 0; iextra < atom->nextra_restart; iextra++)
-      m += modify->fix[atom->extra_restart[iextra]]->pack_restart(i,&buf[m]);
-
-  buf[0] = m;
-  return m;
-}
-
-/* ----------------------------------------------------------------------
-   unpack data for one atom from restart file including extra quantities
-------------------------------------------------------------------------- */
-
-int AtomVecSpinKokkos::unpack_restart(double *buf)
-{
-  int nlocal = atom->nlocal;
-  if (nlocal == nmax) {
-    grow(0);
-    if (atom->nextra_store)
-      memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
-  }
-
-  atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
-           MASK_MASK | IMAGE_MASK | SP_MASK);
-
-  int m = 1;
-  h_x(nlocal,0) = buf[m++];
-  h_x(nlocal,1) = buf[m++];
-  h_x(nlocal,2) = buf[m++];
-  h_tag(nlocal) = (tagint) ubuf(buf[m++]).i;
-  h_type(nlocal) = (int) ubuf(buf[m++]).i;
-  h_mask(nlocal) = (int) ubuf(buf[m++]).i;
-  h_image(nlocal) = (imageint) ubuf(buf[m++]).i;
-  h_v(nlocal,0) = buf[m++];
-  h_v(nlocal,1) = buf[m++];
-  h_v(nlocal,2) = buf[m++];
-
-  h_sp(nlocal,0) = buf[m++];
-  h_sp(nlocal,1) = buf[m++];
-  h_sp(nlocal,2) = buf[m++];
-  h_sp(nlocal,3) = buf[m++];
-
-  double **extra = atom->extra;
-  if (atom->nextra_store) {
-    int size = static_cast<int> (buf[0]) - m;
-    for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++];
-  }
-
-  atom->nlocal++;
-  return m;
-}
-
-/* ----------------------------------------------------------------------
-   create one atom of itype at coord
-   set other values to defaults
-------------------------------------------------------------------------- */
-
-void AtomVecSpinKokkos::create_atom(int itype, double *coord)
-{
-  int nlocal = atom->nlocal;
-  if (nlocal == nmax) {
-    atomKK->modified(Host,ALL_MASK);
-    grow(0);
-  }
-  atomKK->sync(Host,ALL_MASK);
-  atomKK->modified(Host,ALL_MASK);
-
-  tag[nlocal] = 0;
-  type[nlocal] = itype;
-  h_x(nlocal,0) = coord[0];
-  h_x(nlocal,1) = coord[1];
-  h_x(nlocal,2) = coord[2];
-  h_mask[nlocal] = 1;
-  h_image[nlocal] = ((imageint) IMGMAX << IMG2BITS) |
-    ((imageint) IMGMAX << IMGBITS) | IMGMAX;
-  h_v(nlocal,0) = 0.0;
-  h_v(nlocal,1) = 0.0;
-  h_v(nlocal,2) = 0.0;
-
-  h_sp(nlocal,0) = 0.0;
-  h_sp(nlocal,1) = 0.0;
-  h_sp(nlocal,2) = 0.0;
-  h_sp(nlocal,3) = 0.0;
-
-  atom->nlocal++;
-}
-
-/* ----------------------------------------------------------------------
-   unpack one line from Atoms section of data file
-   initialize other atom quantities
-------------------------------------------------------------------------- */
-
-void AtomVecSpinKokkos::data_atom(double *coord, imageint imagetmp,
-                                    char **values)
-{
-  int nlocal = atom->nlocal;
-  if (nlocal == nmax) grow(0);
-
-  h_tag[nlocal] = utils::inumeric(FLERR,values[0],true,lmp);
-  h_type[nlocal] = utils::inumeric(FLERR,values[1],true,lmp);
-  if (type[nlocal] <= 0 || type[nlocal] > atom->ntypes)
-    error->one(FLERR,"Invalid atom type in Atoms section of data file");
-
-  h_sp(nlocal,3) = utils::numeric(FLERR,values[2],true,lmp);
-  h_sp(nlocal,0) = utils::numeric(FLERR,values[6],true,lmp);
-  h_sp(nlocal,1) = utils::numeric(FLERR,values[7],true,lmp);
-  h_sp(nlocal,2) = utils::numeric(FLERR,values[8],true,lmp);
-  double inorm = 1.0/sqrt(sp[nlocal][0]*sp[nlocal][0] +
-                          sp[nlocal][1]*sp[nlocal][1] +
-                          sp[nlocal][2]*sp[nlocal][2]);
-  h_sp(nlocal,0) *= inorm;
-  h_sp(nlocal,1) *= inorm;
-  h_sp(nlocal,2) *= inorm;
-
-  h_x(nlocal,0) = coord[0];
-  h_x(nlocal,1) = coord[1];
-  h_x(nlocal,2) = coord[2];
-
-  h_image[nlocal] = imagetmp;
-
-  h_mask[nlocal] = 1;
-  h_v(nlocal,0) = 0.0;
-  h_v(nlocal,1) = 0.0;
-  h_v(nlocal,2) = 0.0;
-
-  atomKK->modified(Host,ALL_MASK);
-
-  atom->nlocal++;
-}
-
-/* ----------------------------------------------------------------------
-   unpack hybrid quantities from one line in Atoms section of data file
-   initialize other atom quantities for this sub-style
-------------------------------------------------------------------------- */
-
-int AtomVecSpinKokkos::data_atom_hybrid(int nlocal, char **values)
-{
-  h_sp(nlocal,3) = utils::numeric(FLERR,values[0],true,lmp);
-  h_sp(nlocal,0) = utils::numeric(FLERR,values[1],true,lmp);
-  h_sp(nlocal,1) = utils::numeric(FLERR,values[2],true,lmp);
-  h_sp(nlocal,2) = utils::numeric(FLERR,values[3],true,lmp);
-  double inorm = 1.0/sqrt(sp[nlocal][0]*sp[nlocal][0] +
-                          sp[nlocal][1]*sp[nlocal][1] +
-                          sp[nlocal][2]*sp[nlocal][2]);
-  sp[nlocal][0] *= inorm;
-  sp[nlocal][1] *= inorm;
-  sp[nlocal][2] *= inorm;
-
-  return 4;
-}
-
-/* ----------------------------------------------------------------------
-   pack atom info for data file including 3 image flags
-------------------------------------------------------------------------- */
-
-void AtomVecSpinKokkos::pack_data(double **buf)
-{
-  int nlocal = atom->nlocal;
-  for (int i = 0; i < nlocal; i++) {
-    buf[i][0] = h_tag[i];
-    buf[i][1] = h_type[i];
-    buf[i][2] = h_sp(i,0);
-    buf[i][3] = h_x(i,0);
-    buf[i][4] = h_x(i,1);
-    buf[i][5] = h_x(i,2);
-    buf[i][2] = h_sp(i,1);
-    buf[i][2] = h_sp(i,2);
-    buf[i][2] = h_sp(i,3);
-    buf[i][6] = (h_image[i] & IMGMASK) - IMGMAX;
-    buf[i][7] = (h_image[i] >> IMGBITS & IMGMASK) - IMGMAX;
-    buf[i][8] = (h_image[i] >> IMG2BITS) - IMGMAX;
-  }
-}
-
-/* ----------------------------------------------------------------------
-   pack hybrid atom info for data file
-------------------------------------------------------------------------- */
-
-int AtomVecSpinKokkos::pack_data_hybrid(int i, double *buf)
-{
-  buf[0] = h_sp(i,3);
-  buf[1] = h_sp(i,0);
-  buf[2] = h_sp(i,1);
-  buf[3] = h_sp(i,2);
-  return 4;
-}
-
-/* ----------------------------------------------------------------------
-   write atom info to data file including 3 image flags
-------------------------------------------------------------------------- */
-
-void AtomVecSpinKokkos::write_data(FILE *fp, int n, double **buf)
-{
-  for (int i = 0; i < n; i++)
-    fprintf(fp,"%d %d %-1.16e %-1.16e %-1.16e %-1.16e %d %d %d\n",
-            (int) buf[i][0],(int) buf[i][1],buf[i][2],buf[i][3],buf[i][4],
-            buf[i][5],(int) buf[i][6],(int) buf[i][7],(int) buf[i][8]);
-}
-
-/* ----------------------------------------------------------------------
-   write hybrid atom info to data file
-------------------------------------------------------------------------- */
-
-int AtomVecSpinKokkos::write_data_hybrid(FILE *fp, double *buf)
-{
-  fprintf(fp," %-1.16e %-1.16e %-1.16e %-1.16e",buf[0],buf[1],buf[2],buf[3]);
-  return 4;
-}
-
-/* ----------------------------------------------------------------------
-   return # of bytes of allocated memory
-------------------------------------------------------------------------- */
-
-bigint AtomVecSpinKokkos::memory_usage()
-{
-  bigint bytes = 0;
-
-  if (atom->memcheck("tag")) bytes += memory->usage(tag,nmax);
-  if (atom->memcheck("type")) bytes += memory->usage(type,nmax);
-  if (atom->memcheck("mask")) bytes += memory->usage(mask,nmax);
-  if (atom->memcheck("image")) bytes += memory->usage(image,nmax);
-  if (atom->memcheck("x")) bytes += memory->usage(x,nmax,3);
-  if (atom->memcheck("v")) bytes += memory->usage(v,nmax,3);
-  if (atom->memcheck("f")) bytes += memory->usage(f,nmax*commKK->nthreads,3);
-
-  if (atom->memcheck("sp")) bytes += memory->usage(sp,nmax,4);
-  if (atom->memcheck("fm")) bytes += memory->usage(fm,nmax*comm->nthreads,3);
-  if (atom->memcheck("fm_long")) bytes += memory->usage(fm_long,nmax*comm->nthreads,3);
-
-  return bytes;
-}
-
-/* ---------------------------------------------------------------------- */
-
-void AtomVecSpinKokkos::sync(ExecutionSpace space, unsigned int mask)
-{
-  if (space == Device) {
-    if (mask & X_MASK) atomKK->k_x.sync<LMPDeviceType>();
-    if (mask & V_MASK) atomKK->k_v.sync<LMPDeviceType>();
-    if (mask & F_MASK) atomKK->k_f.sync<LMPDeviceType>();
-    if (mask & TAG_MASK) atomKK->k_tag.sync<LMPDeviceType>();
-    if (mask & TYPE_MASK) atomKK->k_type.sync<LMPDeviceType>();
-    if (mask & MASK_MASK) atomKK->k_mask.sync<LMPDeviceType>();
-    if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPDeviceType>();
-    if (mask & SP_MASK) atomKK->k_sp.sync<LMPDeviceType>();
-    if (mask & FM_MASK) atomKK->k_fm.sync<LMPDeviceType>();
-    if (mask & FML_MASK) atomKK->k_fm_long.sync<LMPDeviceType>();
-  } else {
-    if (mask & X_MASK) atomKK->k_x.sync<LMPHostType>();
-    if (mask & V_MASK) atomKK->k_v.sync<LMPHostType>();
-    if (mask & F_MASK) atomKK->k_f.sync<LMPHostType>();
-    if (mask & TAG_MASK) atomKK->k_tag.sync<LMPHostType>();
-    if (mask & TYPE_MASK) atomKK->k_type.sync<LMPHostType>();
-    if (mask & MASK_MASK) atomKK->k_mask.sync<LMPHostType>();
-    if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPHostType>();
-    if (mask & SP_MASK) atomKK->k_sp.sync<LMPHostType>();
-    if (mask & FM_MASK) atomKK->k_fm.sync<LMPHostType>();
-    if (mask & FML_MASK) atomKK->k_fm_long.sync<LMPHostType>();
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
-void AtomVecSpinKokkos::modified(ExecutionSpace space, unsigned int mask)
-{
-  if (space == Device) {
-    if (mask & X_MASK) atomKK->k_x.modify<LMPDeviceType>();
-    if (mask & V_MASK) atomKK->k_v.modify<LMPDeviceType>();
-    if (mask & F_MASK) atomKK->k_f.modify<LMPDeviceType>();
-    if (mask & TAG_MASK) atomKK->k_tag.modify<LMPDeviceType>();
-    if (mask & TYPE_MASK) atomKK->k_type.modify<LMPDeviceType>();
-    if (mask & MASK_MASK) atomKK->k_mask.modify<LMPDeviceType>();
-    if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPDeviceType>();
-    if (mask & SP_MASK) atomKK->k_sp.modify<LMPDeviceType>();
-    if (mask & FM_MASK) atomKK->k_fm.modify<LMPDeviceType>();
-    if (mask & FML_MASK) atomKK->k_fm_long.modify<LMPDeviceType>();
-  } else {
-    if (mask & X_MASK) atomKK->k_x.modify<LMPHostType>();
-    if (mask & V_MASK) atomKK->k_v.modify<LMPHostType>();
-    if (mask & F_MASK) atomKK->k_f.modify<LMPHostType>();
-    if (mask & TAG_MASK) atomKK->k_tag.modify<LMPHostType>();
-    if (mask & TYPE_MASK) atomKK->k_type.modify<LMPHostType>();
-    if (mask & MASK_MASK) atomKK->k_mask.modify<LMPHostType>();
-    if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPHostType>();
-    if (mask & SP_MASK) atomKK->k_sp.modify<LMPHostType>();
-    if (mask & FM_MASK) atomKK->k_fm.modify<LMPHostType>();
-    if (mask & FML_MASK) atomKK->k_fm_long.modify<LMPHostType>();
-  }
-}
-
-void AtomVecSpinKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask)
-{
-  if (space == Device) {
-    if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPDeviceType>())
-      perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space);
-    if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPDeviceType>())
-      perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space);
-    if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPDeviceType>())
-      perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space);
-    if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPDeviceType>())
-      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space);
-    if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPDeviceType>())
-      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space);
-    if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPDeviceType>())
-      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
-    if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPDeviceType>())
-      perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
-    if ((mask & SP_MASK) && atomKK->k_sp.need_sync<LMPDeviceType>())
-      perform_async_copy<DAT::tdual_sp_array>(atomKK->k_sp,space);
-    if ((mask & FM_MASK) && atomKK->k_sp.need_sync<LMPDeviceType>())
-      perform_async_copy<DAT::tdual_fm_array>(atomKK->k_fm,space);
-    if ((mask & FML_MASK) && atomKK->k_fm_long.need_sync<LMPDeviceType>())
-      perform_async_copy<DAT::tdual_fm_long_array>(atomKK->k_fm_long,space);
-  } else {
-    if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>())
-      perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space);
-    if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPHostType>())
-      perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space);
-    if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPHostType>())
-      perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space);
-    if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPHostType>())
-      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space);
-    if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPHostType>())
-      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space);
-    if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPHostType>())
-      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
-    if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPHostType>())
-      perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
-    if ((mask & SP_MASK) && atomKK->k_sp.need_sync<LMPHostType>())
-      perform_async_copy<DAT::tdual_sp_array>(atomKK->k_sp,space);
-    if ((mask & FM_MASK) && atomKK->k_fm.need_sync<LMPHostType>())
-      perform_async_copy<DAT::tdual_fm_array>(atomKK->k_fm,space);
-    if ((mask & FML_MASK) && atomKK->k_fm_long.need_sync<LMPHostType>())
-      perform_async_copy<DAT::tdual_fm_long_array>(atomKK->k_fm_long,space);
-  }
-}
-
-/* ----------------------------------------------------------------------
-   clear all forces (mech and mag)
-------------------------------------------------------------------------- */
-
-void AtomVecSpinKokkos::force_clear(int /*n*/, size_t nbytes)
-{
-  memset(&atom->f[0][0],0,3*nbytes);
-  memset(&atom->fm[0][0],0,3*nbytes);
-  memset(&atom->fm_long[0][0],0,3*nbytes);
-}
diff --git a/src/KOKKOS/atom_vec_spin_kokkos.h b/src/KOKKOS/atom_vec_spin_kokkos.h
deleted file mode 100644
index d439424076..0000000000
--- a/src/KOKKOS/atom_vec_spin_kokkos.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef ATOM_CLASS
-
-AtomStyle(spin/kk,AtomVecSpinKokkos)
-AtomStyle(spin/kk/device,AtomVecSpinKokkos)
-AtomStyle(spin/kk/host,AtomVecSpinKokkos)
-
-#else
-
-#ifndef LMP_ATOM_VEC_SPIN_KOKKOS_H
-#define LMP_ATOM_VEC_SPIN_KOKKOS_H
-
-#include "atom_vec_kokkos.h"
-#include "kokkos_type.h"
-
-namespace LAMMPS_NS {
-
-class AtomVecSpinKokkos : public AtomVecKokkos {
- public:
-  AtomVecSpinKokkos(class LAMMPS *);
-  void grow(int);
-  void copy(int, int, int);
-  int pack_border(int, int *, double *, int, int *);
-  int pack_border_vel(int, int *, double *, int, int *);
-  int pack_border_hybrid(int, int *, double *);
-  void unpack_border(int, int, double *);
-  void unpack_border_vel(int, int, double *);
-  int unpack_border_hybrid(int, int, double *);
-  int pack_exchange(int, double *);
-  int unpack_exchange(double *);
-  int size_restart();
-  int pack_restart(int, double *);
-  int unpack_restart(double *);
-  void create_atom(int, double *);
-  void data_atom(double *, imageint, char **);
-  int data_atom_hybrid(int, char **);
-  void pack_data(double **);
-  int pack_data_hybrid(int, double *);
-  void write_data(FILE *, int, double **);
-  int write_data_hybrid(FILE *, double *);
-  bigint memory_usage();
-  
-  // clear magnetic and mechanic forces
-
-  void force_clear(int, size_t);
-
-  void grow_reset();
-  // input lists to be checked
-  int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
-                         DAT::tdual_xfloat_2d buf,int iswap,
-                         int pbc_flag, int *pbc, ExecutionSpace space);
-  void unpack_border_kokkos(const int &n, const int &nfirst,
-                            const DAT::tdual_xfloat_2d &buf,
-                            ExecutionSpace space);
-  int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
-                           DAT::tdual_int_1d k_sendlist,
-                           DAT::tdual_int_1d k_copylist,
-                           ExecutionSpace space, int dim,
-                           X_FLOAT lo, X_FLOAT hi);
-  int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
-                             int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
-                             ExecutionSpace space);
-
-  void sync(ExecutionSpace space, unsigned int mask);
-  void modified(ExecutionSpace space, unsigned int mask);
-  void sync_overlapping_device(ExecutionSpace space, unsigned int mask);
-
- protected:
-  tagint *tag;
-  int *type,*mask;
-  imageint *image;
-  double **x,**v,**f;           // lattice quantities
-
-                                // spin quantities
-  double **sp;                  // sp[i][0-2] direction of the spin i
-                                // sp[i][3] atomic magnetic moment of the spin i
-  double **fm;                  // fm[i][0-2] direction of magnetic precession
-  double **fm_long;             // storage of long-range spin prec. components
-
-  DAT::t_tagint_1d d_tag;
-  HAT::t_tagint_1d h_tag;
-
-  DAT::t_int_1d d_type, d_mask;
-  HAT::t_int_1d h_type, h_mask;
-
-  DAT::t_imageint_1d d_image;
-  HAT::t_imageint_1d h_image;
-
-  DAT::t_x_array d_x;
-  DAT::t_v_array d_v;
-  DAT::t_f_array d_f;
-
-  DAT::t_sp_array d_sp;
-  DAT::t_fm_array d_fm;
-  DAT::t_fm_long_array d_fm_long;
-
-  HAT::t_sp_array h_sp;
-  HAT::t_fm_array h_fm;
-  HAT::t_fm_long_array h_fm_long;
-
-  DAT::tdual_int_1d k_count;
-};
-
-}
-
-#endif
-#endif
-
-/* ERROR/WARNING messages:
-
-E: Per-processor system is too big
-
-The number of owned atoms plus ghost atoms on a single
-processor must fit in 32-bit integer.
-
-E: Invalid atom type in Atoms section of data file
-
-Atom types must range from 1 to specified # of types.
-
-*/
diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h
index 7a575ecf28..5930a9e207 100644
--- a/src/KOKKOS/kokkos_type.h
+++ b/src/KOKKOS/kokkos_type.h
@@ -758,39 +758,6 @@ typedef tdual_virial_array::t_dev_um t_virial_array_um;
 typedef tdual_virial_array::t_dev_const_um t_virial_array_const_um;
 typedef tdual_virial_array::t_dev_const_randomread t_virial_array_randomread;
 
-// Spin Types
-
-//3d SP_FLOAT array n*4
-#ifdef LMP_KOKKOS_NO_LEGACY
-typedef Kokkos::DualView<X_FLOAT*[4], Kokkos::LayoutLeft, LMPDeviceType> tdual_sp_array;
-#else
-typedef Kokkos::DualView<X_FLOAT*[4], Kokkos::LayoutRight, LMPDeviceType> tdual_sp_array;
-#endif
-typedef tdual_sp_array::t_dev t_sp_array;
-typedef tdual_sp_array::t_dev_const t_sp_array_const;
-typedef tdual_sp_array::t_dev_um t_sp_array_um;
-typedef tdual_sp_array::t_dev_const_um t_sp_array_const_um;
-typedef tdual_sp_array::t_dev_const_randomread t_sp_array_randomread;
-
-//3d FM_FLOAT array n*3
-
-typedef Kokkos::DualView<F_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_fm_array;
-typedef tdual_fm_array::t_dev t_fm_array;
-typedef tdual_fm_array::t_dev_const t_fm_array_const;
-typedef tdual_fm_array::t_dev_um t_fm_array_um;
-typedef tdual_fm_array::t_dev_const_um t_fm_array_const_um;
-typedef tdual_fm_array::t_dev_const_randomread t_fm_array_randomread;
-
-//3d FML_FLOAT array n*3
-
-typedef Kokkos::DualView<F_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_fm_long_array;
-typedef tdual_fm_long_array::t_dev t_fm_long_array;
-typedef tdual_fm_long_array::t_dev_const t_fm_long_array_const;
-typedef tdual_fm_long_array::t_dev_um t_fm_long_array_um;
-typedef tdual_fm_long_array::t_dev_const_um t_fm_long_array_const_um;
-typedef tdual_fm_long_array::t_dev_const_randomread t_fm_long_array_randomread;
-
-
 //Energy Types
 //1d E_FLOAT array n
 
@@ -1027,33 +994,6 @@ typedef tdual_virial_array::t_host_um t_virial_array_um;
 typedef tdual_virial_array::t_host_const_um t_virial_array_const_um;
 typedef tdual_virial_array::t_host_const_randomread t_virial_array_randomread;
 
-// Spin types
-
-//2d X_FLOAT array n*3
-typedef Kokkos::DualView<X_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_sp_array;
-typedef tdual_sp_array::t_host t_sp_array;
-typedef tdual_sp_array::t_host_const t_sp_array_const;
-typedef tdual_sp_array::t_host_um t_sp_array_um;
-typedef tdual_sp_array::t_host_const_um t_sp_array_const_um;
-typedef tdual_sp_array::t_host_const_randomread t_sp_array_randomread;
-
-//2d F_FLOAT array n*3
-typedef Kokkos::DualView<F_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_fm_array;
-//typedef Kokkos::DualView<F_FLOAT*[3], LMPDeviceType::array_layout, LMPDeviceType> tdual_f_array;
-typedef tdual_fm_array::t_host t_fm_array;
-typedef tdual_fm_array::t_host_const t_fm_array_const;
-typedef tdual_fm_array::t_host_um t_fm_array_um;
-typedef tdual_fm_array::t_host_const_um t_fm_array_const_um;
-typedef tdual_fm_array::t_host_const_randomread t_fm_array_randomread;
-
-//2d F_FLOAT array n*3
-typedef Kokkos::DualView<F_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_fm_long_array;
-//typedef Kokkos::DualView<F_FLOAT*[3], LMPDeviceType::array_layout, LMPDeviceType> tdual_f_array;
-typedef tdual_fm_long_array::t_host t_fm_long_array;
-typedef tdual_fm_long_array::t_host_const t_fm_long_array_const;
-typedef tdual_fm_long_array::t_host_um t_fm_long_array_um;
-typedef tdual_fm_long_array::t_host_const_um t_fm_long_array_const_um;
-typedef tdual_fm_long_array::t_host_const_randomread t_fm_long_array_randomread;
 
 
 //Energy Types
diff --git a/src/SPIN/pair_spin_dipole_cut.cpp b/src/SPIN/pair_spin_dipole_cut.cpp
index 41bb1a7755..7eb81e7a03 100644
--- a/src/SPIN/pair_spin_dipole_cut.cpp
+++ b/src/SPIN/pair_spin_dipole_cut.cpp
@@ -233,36 +233,44 @@ void PairSpinDipoleCut::compute(int eflag, int vflag)
 
       local_cut2 = cut_spin_long[itype][jtype]*cut_spin_long[itype][jtype];
 
+      // compute dipolar interaction
+      
       if (rsq < local_cut2) {
         r2inv = 1.0/rsq;
         r3inv = r2inv*rinv;
 
         compute_dipolar(i,j,eij,fmi,spi,spj,r3inv);
-        if (lattice_flag) compute_dipolar_mech(i,j,eij,fi,spi,spj,r2inv);
-      }
+        
+        if (lattice_flag) 
+          compute_dipolar_mech(i,j,eij,fi,spi,spj,r2inv);
 
-      // force accumulation
+        if (eflag) {
+          if (rsq <= local_cut2) {
+            evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]);
+            evdwl *= 0.5*hbar;
+            emag[i] += evdwl;
+          }
+        } else evdwl = 0.0;
 
-      f[i][0] += fi[0];
-      f[i][1] += fi[1];
-      f[i][2] += fi[2];
-      fm[i][0] += fmi[0];
-      fm[i][1] += fmi[1];
-      fm[i][2] += fmi[2];
-
-      if (eflag) {
-        if (rsq <= local_cut2) {
-          evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]);
-          evdwl *= 0.5*hbar;
-          emag[i] += evdwl;
+        f[i][0] += fi[0];
+        f[i][1] += fi[1];
+        f[i][2] += fi[2];
+        if (newton_pair || j < nlocal) {
+          f[j][0] -= fi[0];
+          f[j][1] -= fi[1];
+          f[j][2] -= fi[2];
         }
-      } else evdwl = 0.0;
-
-      if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
-          evdwl,ecoul,fi[0],fi[1],fi[2],rij[0],rij[1],rij[2]);
+        fm[i][0] += fmi[0];
+        fm[i][1] += fmi[1];
+        fm[i][2] += fmi[2];
 
+        if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
+            evdwl,ecoul,fi[0],fi[1],fi[2],rij[0],rij[1],rij[2]);
+      }
     }
   }
+  
+  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
@@ -391,7 +399,7 @@ void PairSpinDipoleCut::compute_dipolar_mech(int /* i */, int /* j */, double ei
   sjeij = spj[0]*eij[0] + spj[1]*eij[1] + spj[2]*eij[2];
 
   bij = sisj - 5.0*sieij*sjeij;
-  pre = 3.0*mub2mu0*gigjri4;
+  pre = 0.5*3.0*mub2mu0*gigjri4;
 
   fi[0] -= pre * (eij[0] * bij + (sjeij*spi[0] + sieij*spj[0]));
   fi[1] -= pre * (eij[1] * bij + (sjeij*spi[1] + sieij*spj[1]));
diff --git a/src/SPIN/pair_spin_dipole_long.cpp b/src/SPIN/pair_spin_dipole_long.cpp
index 85b3c1d7bb..e90a43d1cf 100644
--- a/src/SPIN/pair_spin_dipole_long.cpp
+++ b/src/SPIN/pair_spin_dipole_long.cpp
@@ -281,32 +281,37 @@ void PairSpinDipoleLong::compute(int eflag, int vflag)
         bij[3] = (5.0*bij[2] + pre3*expm2) * r2inv;
 
         compute_long(i,j,eij,bij,fmi,spi,spj);
-        compute_long_mech(i,j,eij,bij,fmi,spi,spj);
-      }
+        if (lattice_flag)
+          compute_long_mech(i,j,eij,bij,fmi,spi,spj);
 
-      // force accumulation
+        if (eflag) {
+          if (rsq <= local_cut2) {
+            evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]);
+            evdwl *= 0.5*hbar;
+            emag[i] += evdwl;
+          }
+        } else evdwl = 0.0;
 
-      f[i][0] += fi[0];
-      f[i][1] += fi[1];
-      f[i][2] += fi[2];
-      fm[i][0] += fmi[0];
-      fm[i][1] += fmi[1];
-      fm[i][2] += fmi[2];
-
-      if (eflag) {
-        if (rsq <= local_cut2) {
-          evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]);
-          evdwl *= 0.5*hbar;
-          emag[i] += evdwl;
+        f[i][0] += fi[0];
+        f[i][1] += fi[1];
+        f[i][2] += fi[2];
+        if (newton_pair || j < nlocal) {
+          f[j][0] -= fi[0];
+          f[j][1] -= fi[1];
+          f[j][2] -= fi[2];
         }
-      } else evdwl = 0.0;
+        fm[i][0] += fmi[0];
+        fm[i][1] += fmi[1];
+        fm[i][2] += fmi[2];
 
+        if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
+            evdwl,ecoul,fi[0],fi[1],fi[2],rij[0],rij[1],rij[2]);
 
-      if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
-          evdwl,ecoul,fi[0],fi[1],fi[2],rij[0],rij[1],rij[2]);
-
+      }
     }
   }
+  
+  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
@@ -373,7 +378,6 @@ void PairSpinDipoleLong::compute_single_pair(int ii, double fmi[3])
     spi[3] = sp[ii][3];
     jlist = firstneigh[ii];
     jnum = numneigh[ii];
-    //itype = type[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
@@ -459,7 +463,7 @@ void PairSpinDipoleLong::compute_long_mech(int /* i */, int /* j */, double eij[
   double g1,g2,g1b2_g2b3,gigj,pre;
 
   gigj = spi[3] * spj[3];
-  pre = gigj*mub2mu0;
+  pre = 0.5 * gigj*mub2mu0;
   sisj = spi[0]*spj[0] + spi[1]*spj[1] + spi[2]*spj[2];
   sieij = spi[0]*eij[0] + spi[1]*eij[1] + spi[2]*eij[2];
   sjeij = spj[0]*eij[0] + spj[1]*eij[1] + spj[2]*eij[2];
diff --git a/src/SPIN/pair_spin_dmi.cpp b/src/SPIN/pair_spin_dmi.cpp
index e2ddd708df..8d43a3a870 100644
--- a/src/SPIN/pair_spin_dmi.cpp
+++ b/src/SPIN/pair_spin_dmi.cpp
@@ -244,31 +244,36 @@ void PairSpinDmi::compute(int eflag, int vflag)
 
       if (rsq <= local_cut2) {
         compute_dmi(i,j,eij,fmi,spj);
-        if (lattice_flag) {
+        
+        if (lattice_flag)
           compute_dmi_mech(i,j,rsq,eij,fi,spi,spj);
-        }
+
+        if (eflag) {
+          evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]);
+          evdwl *= 0.5*hbar;
+          emag[i] += evdwl;
+        } else evdwl = 0.0;
+        
+        f[i][0] += fi[0];
+        f[i][1] += fi[1];
+        f[i][2] += fi[2];
+          if (newton_pair || j < nlocal) {
+            f[j][0] -= fi[0];
+            f[j][1] -= fi[1];
+            f[j][2] -= fi[2];
+          }
+        fm[i][0] += fmi[0];
+        fm[i][1] += fmi[1];
+        fm[i][2] += fmi[2];
+
+
+        if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
+            evdwl,ecoul,fi[0],fi[1],fi[2],delx,dely,delz);
       }
-
-      f[i][0] += fi[0];
-      f[i][1] += fi[1];
-      f[i][2] += fi[2];
-      fm[i][0] += fmi[0];
-      fm[i][1] += fmi[1];
-      fm[i][2] += fmi[2];
-
-      if (eflag) {
-        evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]);
-        evdwl *= 0.5*hbar;
-        emag[i] += evdwl;
-      } else evdwl = 0.0;
-
-      if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
-          evdwl,ecoul,fi[0],fi[1],fi[2],delx,dely,delz);
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
-
 }
 
 /* ----------------------------------------------------------------------
@@ -405,9 +410,9 @@ void PairSpinDmi::compute_dmi_mech(int i, int j, double rsq, double /*eij*/[3],
   cdmy = (dmiz*csx - dmix*csz);
   cdmz = (dmix*csy - dmiy*csz);
 
-  fi[0] += irij*cdmx;
-  fi[1] += irij*cdmy;
-  fi[2] += irij*cdmz;
+  fi[0] += 0.5*irij*cdmx;
+  fi[1] += 0.5*irij*cdmy;
+  fi[2] += 0.5*irij*cdmz;
 }
 
 /* ----------------------------------------------------------------------
diff --git a/src/SPIN/pair_spin_exchange_biquadratic.cpp b/src/SPIN/pair_spin_exchange_biquadratic.cpp
index 4c6c3936cf..36f3dbcf5e 100644
--- a/src/SPIN/pair_spin_exchange_biquadratic.cpp
+++ b/src/SPIN/pair_spin_exchange_biquadratic.cpp
@@ -22,19 +22,16 @@
 ------------------------------------------------------------------------- */
 
 #include "pair_spin_exchange_biquadratic.h"
-#include <mpi.h>
-#include <cmath>
-#include <cstring>
+
 #include "atom.h"
 #include "comm.h"
 #include "error.h"
-#include "fix.h"
 #include "force.h"
-#include "neigh_list.h"
 #include "memory.h"
-#include "modify.h"
-#include "update.h"
-#include "utils.h"
+#include "neigh_list.h"
+
+#include <cmath>
+#include <cstring>
 
 using namespace LAMMPS_NS;
 
@@ -76,7 +73,7 @@ void PairSpinExchangeBiquadratic::settings(int narg, char **arg)
 
   if (narg != 1) error->all(FLERR,"Illegal pair_style command");
 
-  cut_spin_exchange_global = force->numeric(FLERR,arg[0]);
+  cut_spin_exchange_global = utils::numeric(FLERR,arg[0],false,lmp);
 
   // reset cutoffs that have been explicitly set
 
@@ -106,19 +103,19 @@ void PairSpinExchangeBiquadratic::coeff(int narg, char **arg)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   int ilo,ihi,jlo,jhi;
-  force->bounds(FLERR,arg[0],atom->ntypes,ilo,ihi);
-  force->bounds(FLERR,arg[1],atom->ntypes,jlo,jhi);
+  utils::bounds(FLERR,arg[0],1,atom->ntypes,ilo,ihi,error);
+  utils::bounds(FLERR,arg[1],1,atom->ntypes,jlo,jhi,error);
 
   // get exchange arguments from input command
 
   int iarg = 10;
-  const double rc = force->numeric(FLERR,arg[3]);
-  const double j1 = force->numeric(FLERR,arg[4]);
-  const double j2 = force->numeric(FLERR,arg[5]);
-  const double j3 = force->numeric(FLERR,arg[6]);
-  const double k1 = force->numeric(FLERR,arg[7]);
-  const double k2 = force->numeric(FLERR,arg[8]);
-  const double k3 = force->numeric(FLERR,arg[9]);
+  const double rc = utils::numeric(FLERR,arg[3],false,lmp);
+  const double j1 = utils::numeric(FLERR,arg[4],false,lmp);
+  const double j2 = utils::numeric(FLERR,arg[5],false,lmp);
+  const double j3 = utils::numeric(FLERR,arg[6],false,lmp);
+  const double k1 = utils::numeric(FLERR,arg[7],false,lmp);
+  const double k2 = utils::numeric(FLERR,arg[8],false,lmp);
+  const double k3 = utils::numeric(FLERR,arg[9],false,lmp);
 
   // read energy offset flag if specified
 
diff --git a/src/SPIN/pair_spin_magelec.cpp b/src/SPIN/pair_spin_magelec.cpp
index 849590bad2..2a672416b9 100644
--- a/src/SPIN/pair_spin_magelec.cpp
+++ b/src/SPIN/pair_spin_magelec.cpp
@@ -237,31 +237,35 @@ void PairSpinMagelec::compute(int eflag, int vflag)
 
       if (rsq <= local_cut2) {
         compute_magelec(i,j,eij,fmi,spj);
-        if (lattice_flag) {
+        
+        if (lattice_flag)
           compute_magelec_mech(i,j,fi,spi,spj);
+
+        if (eflag) {
+          evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]);
+          evdwl *= 0.5*hbar;
+          emag[i] += evdwl;
+        } else evdwl = 0.0;
+        
+        f[i][0] += fi[0];
+        f[i][1] += fi[1];
+        f[i][2] += fi[2];
+        if (newton_pair || j < nlocal) {
+          f[j][0] -= fi[0];
+          f[j][1] -= fi[1];
+          f[j][2] -= fi[2];
         }
+        fm[i][0] += fmi[0];
+        fm[i][1] += fmi[1];
+        fm[i][2] += fmi[2];
+
+        if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
+            evdwl,ecoul,fi[0],fi[1],fi[2],delx,dely,delz);
       }
-
-      f[i][0] += fi[0];
-      f[i][1] += fi[1];
-      f[i][2] += fi[2];
-      fm[i][0] += fmi[0];
-      fm[i][1] += fmi[1];
-      fm[i][2] += fmi[2];
-
-      if (eflag) {
-        evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]);
-        evdwl *= 0.5*hbar;
-        emag[i] += evdwl;
-      } else evdwl = 0.0;
-
-      if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
-          evdwl,ecoul,fi[0],fi[1],fi[2],delx,dely,delz);
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
-
 }
 
 /* ----------------------------------------------------------------------
@@ -400,9 +404,9 @@ void PairSpinMagelec::compute_magelec_mech(int i, int j, double fi[3], double sp
   meiy *= ME_mech[itype][jtype];
   meiz *= ME_mech[itype][jtype];
 
-  fi[0] += (meiy*vz - meiz*vy);
-  fi[1] += (meiz*vx - meix*vz);
-  fi[2] += (meix*vy - meiy*vx);
+  fi[0] += 0.5*(meiy*vz - meiz*vy);
+  fi[1] += 0.5*(meiz*vx - meix*vz);
+  fi[2] += 0.5*(meix*vy - meiy*vx);
 
 }
 
diff --git a/src/SPIN/pair_spin_neel.cpp b/src/SPIN/pair_spin_neel.cpp
index c09b5ac191..5c05bef525 100644
--- a/src/SPIN/pair_spin_neel.cpp
+++ b/src/SPIN/pair_spin_neel.cpp
@@ -246,31 +246,33 @@ void PairSpinNeel::compute(int eflag, int vflag)
 
       if (rsq <= local_cut2) {
         compute_neel(i,j,rsq,eij,fmi,spi,spj);
-        if (lattice_flag) {
+        if (lattice_flag)
           compute_neel_mech(i,j,rsq,eij,fi,spi,spj);
+
+        f[i][0] += fi[0];
+        f[i][1] += fi[1];
+        f[i][2] += fi[2];
+        if (newton_pair || j < nlocal) {
+          f[j][0] -= fi[0];
+          f[j][1] -= fi[1];
+          f[j][2] -= fi[2];
         }
+        fm[i][0] += fmi[0];
+        fm[i][1] += fmi[1];
+        fm[i][2] += fmi[2];
+
+        if (eflag) {
+          evdwl -= compute_neel_energy(i,j,rsq,eij,spi,spj);
+          emag[i] += evdwl;
+        } else evdwl = 0.0;
+
+        if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
+            evdwl,ecoul,fi[0],fi[1],fi[2],rij[0],rij[1],rij[2]);
       }
-
-      f[i][0] += fi[0];
-      f[i][1] += fi[1];
-      f[i][2] += fi[2];
-      fm[i][0] += fmi[0];
-      fm[i][1] += fmi[1];
-      fm[i][2] += fmi[2];
-
-      if (eflag) {
-        evdwl -= compute_neel_energy(i,j,rsq,eij,spi,spj);
-        // evdwl *= 0.5*hbar;
-        emag[i] += evdwl;
-      } else evdwl = 0.0;
-
-      if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
-          evdwl,ecoul,fi[0],fi[1],fi[2],rij[0],rij[1],rij[2]);
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
-
 }
 
 /* ----------------------------------------------------------------------
@@ -563,9 +565,9 @@ void PairSpinNeel::compute_neel_mech(int i, int j, double rsq, double eij[3], do
 
   // adding three contributions
 
-  fi[0] = pdx + pq1x + pq2x;
-  fi[1] = pdy + pq1y + pq2y;
-  fi[2] = pdz + pq1z + pq2z;
+  fi[0] = 0.5*(pdx + pq1x + pq2x);
+  fi[1] = 0.5*(pdy + pq1y + pq2y);
+  fi[2] = 0.5*(pdz + pq1z + pq2z);
 }
 
 /* ---------------------------------------------------------------------- */

From 2825abb0284a164b368d1dda18a62140c807b000 Mon Sep 17 00:00:00 2001
From: julient31 <julien.tranchida1@gmail.com>
Date: Mon, 5 Oct 2020 17:13:54 -0600
Subject: [PATCH 13/44] Improved scripts validaton problems

---
 .../validation_damped_exchange/run-test-exchange.sh           | 4 ++--
 examples/SPIN/test_problems/validation_nve/run-test-nve.sh    | 2 +-
 examples/SPIN/test_problems/validation_nvt/plot_nvt.py        | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/SPIN/test_problems/validation_damped_exchange/run-test-exchange.sh b/examples/SPIN/test_problems/validation_damped_exchange/run-test-exchange.sh
index 599730fe7b..bd878a52de 100755
--- a/examples/SPIN/test_problems/validation_damped_exchange/run-test-exchange.sh
+++ b/examples/SPIN/test_problems/validation_damped_exchange/run-test-exchange.sh
@@ -13,7 +13,7 @@ en="$(echo "$en-$in" | bc -l)"
 tail -n +$in log.lammps | head -n $en > res_lammps.dat
 
 # compute Langevin
-python3 -m llg_exchange.py > res_llg.dat
+python3 llg_exchange.py > res_llg.dat
 
 # plot results
-python3 -m plot_precession.py res_lammps.dat res_llg.dat
+python3 plot_precession.py res_lammps.dat res_llg.dat
diff --git a/examples/SPIN/test_problems/validation_nve/run-test-nve.sh b/examples/SPIN/test_problems/validation_nve/run-test-nve.sh
index 441e7cf46d..18cedd9503 100755
--- a/examples/SPIN/test_problems/validation_nve/run-test-nve.sh
+++ b/examples/SPIN/test_problems/validation_nve/run-test-nve.sh
@@ -13,4 +13,4 @@ en="$(echo "$en-$in" | bc -l)"
 tail -n +$in log.lammps | head -n $en > res_lammps.dat
 
 # plot results
-python3 -m plot_nve.py res_lammps.dat res_llg.dat
+python3 plot_nve.py res_lammps.dat res_llg.dat
diff --git a/examples/SPIN/test_problems/validation_nvt/plot_nvt.py b/examples/SPIN/test_problems/validation_nvt/plot_nvt.py
index 06c48b4c28..4109d60245 100755
--- a/examples/SPIN/test_problems/validation_nvt/plot_nvt.py
+++ b/examples/SPIN/test_problems/validation_nvt/plot_nvt.py
@@ -39,5 +39,5 @@ plt.xlabel('Time (in ps)')
 plt.legend()
 plt.show()
 
-fig.savefig(os.path.join(os.getcwd(), "nve_spin_lattice.pdf"), bbox_inches="tight")
+fig.savefig(os.path.join(os.getcwd(), "nvt_spin_lattice.pdf"), bbox_inches="tight")
 plt.close(fig)

From 2d7494186c053e96d85440dd4892566072f9d90e Mon Sep 17 00:00:00 2001
From: julient31 <julien.tranchida1@gmail.com>
Date: Mon, 5 Oct 2020 19:37:24 -0600
Subject: [PATCH 14/44] rerun all validations tests (modifed one)

---
 .../test-spin-precession.in                   | 18 ++++++++-------
 .../validation_damped_exchange/two_spins.data | 22 -------------------
 .../validation_nvt/in.spin.nvt_lattice        |  2 +-
 .../validation_nvt/in.spin.nvt_spin           |  2 +-
 src/SPIN/pair_spin_exchange.cpp               |  4 ----
 5 files changed, 12 insertions(+), 36 deletions(-)
 delete mode 100644 examples/SPIN/test_problems/validation_damped_exchange/two_spins.data

diff --git a/examples/SPIN/test_problems/validation_damped_exchange/test-spin-precession.in b/examples/SPIN/test_problems/validation_damped_exchange/test-spin-precession.in
index 0ca49364d2..86da20e6f9 100644
--- a/examples/SPIN/test_problems/validation_damped_exchange/test-spin-precession.in
+++ b/examples/SPIN/test_problems/validation_damped_exchange/test-spin-precession.in
@@ -5,22 +5,24 @@ atom_style      spin
 atom_modify     map array
 boundary        f f f 
 
-read_data	two_spins.data
+atom_modify 	map array 
+lattice 	sc 3.0
+region 		box block 0 2 0 1 0 1 
+create_box 	1 box
+create_atoms 	1 box
+
+mass		1 55.845
+set 		atom 1 spin 2.0 1.0 0.0 0.0
+set 		atom 2 spin 2.0 0.0 1.0 0.0
 
 pair_style      spin/exchange 3.1
 pair_coeff	* * exchange 3.1 11.254 0.0 1.0
 
-group bead      type 1  
- 
-variable        H equal 0.0
-variable        Kan equal 0.0
 variable        Temperature equal 0.0 
 variable        RUN equal 30000
 
 fix             1 all nve/spin lattice no
-fix             2 all precession/spin zeeman ${H} 0.0 0.0 1.0 anisotropy ${Kan} 0.0 0.0 1.0
-fix_modify      2 energy yes
-fix             3 all langevin/spin ${Temperature} 0.01 12345
+fix             2 all langevin/spin ${Temperature} 0.01 12345
 
 compute		out_mag    all spin
 compute		out_pe     all pe
diff --git a/examples/SPIN/test_problems/validation_damped_exchange/two_spins.data b/examples/SPIN/test_problems/validation_damped_exchange/two_spins.data
deleted file mode 100644
index 013f813751..0000000000
--- a/examples/SPIN/test_problems/validation_damped_exchange/two_spins.data
+++ /dev/null
@@ -1,22 +0,0 @@
-LAMMPS data file via write_data, version 19 Sep 2019, timestep = 0
-
-2 atoms
-1 atom types
-
-0.0 6.0 xlo xhi
-0.0 3.0 ylo yhi
-0.0 3.0 zlo zhi
-
-Masses
-
-1 1
-
-Atoms # spin
-
-1 1 2.0 0.0 0.0 0.0 1.0 0.0 0.0 0 0 0
-2 1 2.0 3.0 0.0 0.0 0.0 1.0 0.0 0 0 0
-
-Velocities
-
-1 0.0 0.0 0.0
-2 0.0 0.0 0.0
diff --git a/examples/SPIN/test_problems/validation_nvt/in.spin.nvt_lattice b/examples/SPIN/test_problems/validation_nvt/in.spin.nvt_lattice
index 1d63f01d43..2375c0ff8d 100644
--- a/examples/SPIN/test_problems/validation_nvt/in.spin.nvt_lattice
+++ b/examples/SPIN/test_problems/validation_nvt/in.spin.nvt_lattice
@@ -30,7 +30,7 @@ neighbor 	0.1 bin
 neigh_modify 	every 10 check yes delay 20
 
 fix 		1 all precession/spin zeeman 0.0 0.0 0.0 1.0
-fix             2 all langevin 200.0 200.0 10.0 48279
+fix             2 all langevin 200.0 200.0 1.0 48279
 fix 		3 all langevin/spin 0.0 0.00001 321
 fix 		4 all nve/spin lattice moving
 timestep	0.001
diff --git a/examples/SPIN/test_problems/validation_nvt/in.spin.nvt_spin b/examples/SPIN/test_problems/validation_nvt/in.spin.nvt_spin
index 435e877bdf..6b65df7109 100644
--- a/examples/SPIN/test_problems/validation_nvt/in.spin.nvt_spin
+++ b/examples/SPIN/test_problems/validation_nvt/in.spin.nvt_spin
@@ -29,7 +29,7 @@ neighbor 	0.1 bin
 neigh_modify 	every 10 check yes delay 20
 
 fix 		1 all precession/spin zeeman 0.0 0.0 0.0 1.0
-fix 		2 all langevin/spin 200.0 0.1 321
+fix 		2 all langevin/spin 200.0 0.01 321
 fix 		3 all nve/spin lattice moving
 timestep	0.001
 
diff --git a/src/SPIN/pair_spin_exchange.cpp b/src/SPIN/pair_spin_exchange.cpp
index e6b6db375f..e35408e9ec 100644
--- a/src/SPIN/pair_spin_exchange.cpp
+++ b/src/SPIN/pair_spin_exchange.cpp
@@ -426,10 +426,6 @@ void PairSpinExchange::compute_exchange_mech(int i, int j, double rsq,
   fi[0] -= 0.5*fx;
   fi[1] -= 0.5*fy;
   fi[2] -= 0.5*fz;
-  // fi[0] -= fx;
-  // fi[1] -= fy;
-  // fi[2] -= fz;
-
 }
 
 /* ----------------------------------------------------------------------

From 3147dd850c53be305c776b1b5ff76fce7c1b4b0f Mon Sep 17 00:00:00 2001
From: julient31 <julien.tranchida1@gmail.com>
Date: Mon, 5 Oct 2020 20:01:34 -0600
Subject: [PATCH 15/44] adding corrections to doc page

---
 doc/src/Commands_pair.rst      | 1 +
 doc/src/Packages_details.rst   | 2 ++
 doc/src/pair_spin_exchange.rst | 1 +
 doc/src/pair_style.rst         | 1 +
 4 files changed, 5 insertions(+)

diff --git a/doc/src/Commands_pair.rst b/doc/src/Commands_pair.rst
index 888a445daa..4f3b164c98 100644
--- a/doc/src/Commands_pair.rst
+++ b/doc/src/Commands_pair.rst
@@ -240,6 +240,7 @@ OPT.
    * :doc:`spin/dipole/long <pair_spin_dipole>`
    * :doc:`spin/dmi <pair_spin_dmi>`
    * :doc:`spin/exchange <pair_spin_exchange>`
+   * :doc:`spin/exchange/biquadratic <pair_spin_exchange>`
    * :doc:`spin/magelec <pair_spin_magelec>`
    * :doc:`spin/neel <pair_spin_neel>`
    * :doc:`srp <pair_srp>`
diff --git a/doc/src/Packages_details.rst b/doc/src/Packages_details.rst
index 1beeeff5b4..d9e1e31470 100644
--- a/doc/src/Packages_details.rst
+++ b/doc/src/Packages_details.rst
@@ -1036,9 +1036,11 @@ the usual manner via MD.  Various pair, fix, and compute styles.
 * :doc:`pair_style spin/dipole/long <pair_spin_dipole>`
 * :doc:`pair_style spin/dmi <pair_spin_dmi>`
 * :doc:`pair_style spin/exchange <pair_spin_exchange>`
+* :doc:`pair_style spin/exchange/biquadratic <pair_spin_exchange>`
 * :doc:`pair_style spin/magelec <pair_spin_magelec>`
 * :doc:`pair_style spin/neel <pair_spin_neel>`
 * :doc:`fix nve/spin <fix_nve_spin>`
+* :doc:`fix langevin/spin <fix_langevin_spin>`
 * :doc:`fix precession/spin <fix_precession_spin>`
 * :doc:`compute spin <compute_spin>`
 * :doc:`neb/spin <neb_spin>`
diff --git a/doc/src/pair_spin_exchange.rst b/doc/src/pair_spin_exchange.rst
index 85cf6d3aa8..38e59eed19 100644
--- a/doc/src/pair_spin_exchange.rst
+++ b/doc/src/pair_spin_exchange.rst
@@ -1,4 +1,5 @@
 .. index:: pair_style spin/exchange
+.. index:: pair_style spin/exchange/biquadratic
 
 pair_style spin/exchange command
 ================================
diff --git a/doc/src/pair_style.rst b/doc/src/pair_style.rst
index 4feaeacad0..2a6c81c0f8 100644
--- a/doc/src/pair_style.rst
+++ b/doc/src/pair_style.rst
@@ -304,6 +304,7 @@ accelerated styles exist.
 * :doc:`spin/dipole/long <pair_spin_dipole>` -
 * :doc:`spin/dmi <pair_spin_dmi>` -
 * :doc:`spin/exchange <pair_spin_exchange>` -
+* :doc:`spin/exchange/biquadratic <pair_spin_exchange>` -
 * :doc:`spin/magelec <pair_spin_magelec>` -
 * :doc:`spin/neel <pair_spin_neel>` -
 * :doc:`srp <pair_srp>` -

From 4baf60ffd1393e063b9332c3c0cfca94d986da7a Mon Sep 17 00:00:00 2001
From: julient31 <julien.tranchida1@gmail.com>
Date: Mon, 5 Oct 2020 20:47:07 -0600
Subject: [PATCH 16/44] adding examples of the biquadratic pair_style and
 offset option

---
 examples/SPIN/cobalt_hcp/in.spin.cobalt_hcp | 2 +-
 examples/SPIN/iron/in.spin.iron             | 2 +-
 examples/SPIN/iron/in.spin.iron_cubic       | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/SPIN/cobalt_hcp/in.spin.cobalt_hcp b/examples/SPIN/cobalt_hcp/in.spin.cobalt_hcp
index 2bfa8393f3..6429cec349 100644
--- a/examples/SPIN/cobalt_hcp/in.spin.cobalt_hcp
+++ b/examples/SPIN/cobalt_hcp/in.spin.cobalt_hcp
@@ -26,7 +26,7 @@ velocity 	all create 100 4928459 rot yes dist gaussian
 #pair_style 	hybrid/overlay eam/alloy spin/exchange 4.0 spin/neel 4.0
 pair_style 	hybrid/overlay eam/alloy spin/exchange 4.0
 pair_coeff 	* * eam/alloy Co_PurjaPun_2012.eam.alloy Co
-pair_coeff 	* * spin/exchange exchange 4.0 -0.3593 1.135028015e-05 1.064568567
+pair_coeff 	* * spin/exchange exchange 4.0 -0.3593 1.135028015e-05 1.0645 offset yes
 #pair_coeff 	* * spin/neel neel 4.0 0.0048 0.234 1.168 2.6905 0.705 0.652  
 
 neighbor 	0.1 bin
diff --git a/examples/SPIN/iron/in.spin.iron b/examples/SPIN/iron/in.spin.iron
index 58c0537af7..f678d39f56 100644
--- a/examples/SPIN/iron/in.spin.iron
+++ b/examples/SPIN/iron/in.spin.iron
@@ -25,7 +25,7 @@ velocity 	all create 100 4928459 rot yes dist gaussian
 
 pair_style 	hybrid/overlay eam/alloy spin/exchange 3.5
 pair_coeff 	* * eam/alloy Fe_Mishin2006.eam.alloy Fe
-pair_coeff 	* * spin/exchange exchange 3.4 0.02726 0.2171 1.841
+pair_coeff 	* * spin/exchange exchange 3.4 0.02726 0.2171 1.841 offset yes
 
 neighbor 	0.1 bin
 neigh_modify 	every 10 check yes delay 20
diff --git a/examples/SPIN/iron/in.spin.iron_cubic b/examples/SPIN/iron/in.spin.iron_cubic
index 30a3e0e97c..35011e796f 100644
--- a/examples/SPIN/iron/in.spin.iron_cubic
+++ b/examples/SPIN/iron/in.spin.iron_cubic
@@ -21,9 +21,9 @@ mass		1 55.845
 set 		group all spin 2.2 -1.0 0.0 0.0
 velocity 	all create 100 4928459 rot yes dist gaussian
 
-pair_style 	hybrid/overlay eam/alloy spin/exchange 3.5
+pair_style 	hybrid/overlay eam/alloy spin/exchange/biquadratic 3.5
 pair_coeff 	* * eam/alloy Fe_Mishin2006.eam.alloy Fe
-pair_coeff 	* * spin/exchange exchange 3.4 0.02726 0.2171 1.841
+pair_coeff 	* * spin/exchange/biquadratic biquadratic 3.4 0.02726 0.2171 1.841 0.0 0.0 2.0 offset yes
 neighbor 	0.1 bin
 neigh_modify 	every 10 check yes delay 20
 

From e3b8563ed9785455a7211e9933cd5daa6ff88d7c Mon Sep 17 00:00:00 2001
From: julient31 <julien.tranchida1@gmail.com>
Date: Mon, 5 Oct 2020 21:28:26 -0600
Subject: [PATCH 17/44] correcting spelling errors

---
 doc/src/pair_spin_exchange.rst              | 2 +-
 doc/utils/sphinx-config/false_positives.txt | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/src/pair_spin_exchange.rst b/doc/src/pair_spin_exchange.rst
index 38e59eed19..72c416ac72 100644
--- a/doc/src/pair_spin_exchange.rst
+++ b/doc/src/pair_spin_exchange.rst
@@ -132,7 +132,7 @@ for the *spin/exchange/biquadratic* pair style.
 Note that :math:`R_c` is the radius cutoff of the considered exchange 
 interaction, and :math:`a`, :math:`b` and :math:`d` are the three coefficients 
 performing the parameterization of the function :math:`J(r_{ij})` defined 
-above (in the *biquadratic* ase, :math:`a_j`, :math:`b_j`, :math:`d_j` and 
+above (in the *biquadratic* style, :math:`a_j`, :math:`b_j`, :math:`d_j` and 
 :math:`a_k`, :math:`b_k`, :math:`d_k` are the coefficients of :math:`J(r_{ij})`
 and :math:`K(r_{ij})` respectively).
 
diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt
index b276933a88..a06f72fde5 100644
--- a/doc/utils/sphinx-config/false_positives.txt
+++ b/doc/utils/sphinx-config/false_positives.txt
@@ -240,6 +240,7 @@ bigint
 Bij
 bilayer
 bilayers
+biquadratic
 binsize
 binstyle
 binutils

From 73b2ad0acce681b5203ffe8c67d7f8f3a906ee26 Mon Sep 17 00:00:00 2001
From: julient31 <julien.tranchida1@gmail.com>
Date: Mon, 12 Oct 2020 11:38:52 -0600
Subject: [PATCH 18/44] - slight modifs of the damped exchange example

---
 .../validation_damped_exchange/llg_exchange.py | 18 +++++++++++++++++-
 .../test-spin-precession.in                    |  5 ++++-
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/examples/SPIN/test_problems/validation_damped_exchange/llg_exchange.py b/examples/SPIN/test_problems/validation_damped_exchange/llg_exchange.py
index dd1c543bb3..5b93ac5c2d 100755
--- a/examples/SPIN/test_problems/validation_damped_exchange/llg_exchange.py
+++ b/examples/SPIN/test_problems/validation_damped_exchange/llg_exchange.py
@@ -6,9 +6,17 @@ import matplotlib.pyplot as plt
 import mpmath as mp
 
 hbar=0.658212           # Planck's constant (eV.fs/rad)
-J0=0.05                 # per-neighbor exchange interaction (eV)
+# J0=0.05                 # per-neighbor exchange interaction (eV)
+
+# exchange interaction parameters
+J1 = 11.254 # in eV 
+J2 = 0.0    # adim
+J3 = 1.0    # in Ang.
+
+# initial spins
 S1 = np.array([1.0, 0.0, 0.0])
 S2 = np.array([0.0, 1.0, 0.0])
+
 alpha=0.01              # damping coefficient
 pi=math.pi
 
@@ -30,6 +38,14 @@ def rotation_matrix(axis, theta):
       [2 * (bc - ad), aa + cc - bb - dd, 2 * (cd + ab)],
       [2 * (bd + ac), 2 * (cd - ab), aa + dd - bb - cc]])
 
+#Definition of the Bethe-Slater function
+def func_BS(x,a,b,c):
+    return 4*a*((x/c)**2)*(1-b*(x/c)**2)*np.exp(-(x/c)**2)
+
+#Definition of the derivative of the Bethe-Slater function
+def func_dBS(x,a,b,c):
+    return 4*a*((x/c)**2)*(1-b*(x/c)**2)*np.exp(-(x/c)**2)
+
 # calculating precession field of spin Sr
 def calc_rot_vector(Sr,Sf):
   rot = (J0/hbar)*(Sf-alpha*np.cross(Sf,Sr))/(1.0+alpha**2)
diff --git a/examples/SPIN/test_problems/validation_damped_exchange/test-spin-precession.in b/examples/SPIN/test_problems/validation_damped_exchange/test-spin-precession.in
index 86da20e6f9..9dfb4a98d6 100644
--- a/examples/SPIN/test_problems/validation_damped_exchange/test-spin-precession.in
+++ b/examples/SPIN/test_problems/validation_damped_exchange/test-spin-precession.in
@@ -21,7 +21,7 @@ pair_coeff	* * exchange 3.1 11.254 0.0 1.0
 variable        Temperature equal 0.0 
 variable        RUN equal 30000
 
-fix             1 all nve/spin lattice no
+fix             1 all nve/spin lattice frozen
 fix             2 all langevin/spin ${Temperature} 0.01 12345
 
 compute		out_mag    all spin
@@ -36,6 +36,9 @@ variable	emag      equal c_out_mag[5]
 thermo_style    custom step time v_magx v_magy v_magz v_emag pe etotal
 thermo          10
 
+compute 	outsp all property/atom spx spy spz sp fmx fmy fmz
+dump 		1 all custom 10 dump.data type x y z c_outsp[1] c_outsp[2] c_outsp[3] fx fy fz
+
 timestep	0.0001
 
 run             ${RUN}

From e86b4d3a78dbaaf454bd734491c4dcc4c4bfb449 Mon Sep 17 00:00:00 2001
From: iafoss <49990208+iafoss@users.noreply.github.com>
Date: Mon, 2 Nov 2020 11:54:11 -0500
Subject: [PATCH 19/44] bug fix in pair_mesont_tpm.cpp

---
 src/USER-MESONT/pair_mesont_tpm.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/USER-MESONT/pair_mesont_tpm.cpp b/src/USER-MESONT/pair_mesont_tpm.cpp
index b15a1e5b1f..9185786341 100644
--- a/src/USER-MESONT/pair_mesont_tpm.cpp
+++ b/src/USER-MESONT/pair_mesont_tpm.cpp
@@ -510,6 +510,12 @@ void PairMESONTTPM::compute(int eflag, int vflag){
 
   // set per atom values and accumulators
   // reallocate per-atom arrays if necessary
+  if (eatom_s == nullptr)
+   memory->create(eatom_s,comm->nthreads*maxeatom,"pair:eatom_s");
+  if (eatom_b == nullptr)
+   memory->create(eatom_b,comm->nthreads*maxeatom,"pair:eatom_b");
+  if (eatom_t == nullptr)
+   memory->create(eatom_t,comm->nthreads*maxeatom,"pair:eatom_t");
   if (atom->nmax > maxeatom) {
     maxeatom = atom->nmax;
     memory->destroy(eatom);

From e6643979516195965c7261053878f4b88d9aaa2b Mon Sep 17 00:00:00 2001
From: iafoss <49990208+iafoss@users.noreply.github.com>
Date: Mon, 2 Nov 2020 16:12:57 -0500
Subject: [PATCH 20/44] Add files via upload

---
 src/USER-MESONT/pair_mesont_tpm.cpp | 1611 ++++++++++++++-------------
 src/USER-MESONT/pair_mesont_tpm.h   |  197 ++--
 2 files changed, 907 insertions(+), 901 deletions(-)

diff --git a/src/USER-MESONT/pair_mesont_tpm.cpp b/src/USER-MESONT/pair_mesont_tpm.cpp
index 9185786341..a58f9892ed 100644
--- a/src/USER-MESONT/pair_mesont_tpm.cpp
+++ b/src/USER-MESONT/pair_mesont_tpm.cpp
@@ -1,803 +1,808 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://lammps.sandia.gov/, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-
-   Contributing author: Maxim Shugaev (UVA), mvs9t@virginia.edu
-------------------------------------------------------------------------- */
-
-#include "pair_mesont_tpm.h"
-#include "export_mesont.h"
-
-
-#include "atom.h"
-#include "comm.h"
-#include "force.h"
-#include "memory.h"
-#include "error.h"
-#include "neighbor.h"
-#include "neigh_list.h"
-#include "neigh_request.h"
-
-#include <cstring>
-#include <vector>
-#include <cmath>
-
-#include <fstream>
-#include <sstream>
-#include <algorithm>
-
-using namespace LAMMPS_NS;
-
-//since LAMMPS is compiled with C++ 2003, define a substitution for std::array
-template<typename T, int N>
-class array2003{
-public:
-  T& operator[] (int idx){ return data[idx];};
-  const T& operator[] (int idx) const{ return data[idx];};
-private:
-  T data[N];
-};
-
-
-class MESONTList {
-public:
-  MESONTList(const Atom* atom, const NeighList* nblist, double rc2);
-  ~MESONTList() {};
-  //list of segments
-  const std::vector<array2003<int,2> >& get_segments() const;
-  //list of triplets
-  const std::vector<array2003<int,3> >& get_triplets() const;
-  //list of neighbor chains [start,end] for segments
-  //(use idx() to get real indexes)
-  const std::vector<std::vector<array2003<int,2> > >& get_nbs() const;
-  //convert idx from sorted representation to real idx
-  int get_idx(int idx) const;
-  //return list of indexes for conversion from sorted representation
-  const std::vector<int>& get_idx_list() const;
-  //convert idx from real idx to sorted representation
-  int get_idxb(int idx) const;
-  //return list of indexes for conversion to sorted representation
-  const std::vector<int>& get_idxb_list() const;
-  //check if the node is the end of the tube
-  bool is_end(int idx) const;
-
-  array2003<int, 2> get_segment(int idx) const;
-  array2003<int, 3> get_triplet(int idx) const;
-
-  static const int cnt_end = -1;
-  static const int domain_end = -2;
-  static const int not_cnt = -3;
-private:
-  std::vector<array2003<int, 2> > chain_list, segments;
-  std::vector<array2003<int, 3> > triplets;
-  std::vector<std::vector<array2003<int, 2> > > nb_chains;
-  std::vector<int> index_list, index_list_b;
-};
-
-//=============================================================================
-
-inline const std::vector<std::vector<array2003<int, 2> > > &
- MESONTList::get_nbs() const {
-  return nb_chains;
-}
-
-inline int MESONTList::get_idx(int idx) const {
-  return index_list[idx];
-}
-
-inline const std::vector<int>& MESONTList::get_idx_list() const {
-  return index_list;
-};
-
-
-inline int MESONTList::get_idxb(int idx) const {
-  return index_list_b[idx];
-}
-
-inline const std::vector<int>& MESONTList::get_idxb_list() const {
-  return index_list_b;
-};
-
-inline const std::vector<array2003<int, 2> > & MESONTList::get_segments()
- const {
-  return segments;
-}
-
-inline const std::vector<array2003<int, 3> > & MESONTList::get_triplets()
- const {
-  return triplets;
-}
-
-inline array2003<int, 2> MESONTList::get_segment(int idx) const {
-  array2003<int, 2> result;
-  result[0] = chain_list[idx][0];
-  result[1] = idx;
-  return result;
-}
-
-inline array2003<int, 3> MESONTList::get_triplet(int idx) const {
-  array2003<int, 3> result;
-  result[0] = chain_list[idx][0];
-  result[1] = idx;
-  result[2] = chain_list[idx][1];
-  return result;
-}
-
-inline bool MESONTList::is_end(int idx) const {
-  return chain_list[idx][0] == cnt_end || chain_list[idx][1] == cnt_end;
-};
-
-template<typename T>
-void vector_union(std::vector<T>& v1, std::vector<T>& v2,
- std::vector<T>& merged) {
-  std::sort(v1.begin(), v1.end());
-  std::sort(v2.begin(), v2.end());
-  merged.reserve(v1.size() + v2.size());
-  typename std::vector<T>::iterator it1 = v1.begin();
-  typename std::vector<T>::iterator it2 = v2.begin();
-
-  while (it1 != v1.end() && it2 != v2.end()) {
-    if (*it1 < *it2) {
-      if (merged.empty() || merged.back() < *it1) merged.push_back(*it1);
-        ++it1;
-    }
-    else {
-      if (merged.empty() || merged.back() < *it2) merged.push_back(*it2);
-      ++it2;
-    }
-  }
-  while (it1 != v1.end()) {
-    if (merged.empty() || merged.back() < *it1) merged.push_back(*it1);
-    ++it1;
-  }
-
-  while (it2 != v2.end()) {
-  if (merged.empty() || merged.back() < *it2) merged.push_back(*it2);
-    ++it2;
-  }
-}
-
-MESONTList::MESONTList(const Atom* atom, const NeighList* nblist, double /* rc2 */){
-  if (atom == nullptr || nblist == nullptr) return;
-  //number of local atoms at the node
-  int nlocal = atom->nlocal;
-  //total number of atoms in the node and ghost shell
-  int nall = nblist->inum + nblist->gnum;
-  int ntot = atom->nlocal + atom->nghost;
-  tagint* const g_id = atom->tag;
-  tagint** const bonds = atom->bond_nt;
-  tagint* const chain_id = atom->molecule;
-  int* ilist = nblist->ilist;
-
-  //convert bonds to local id representation
-  array2003<int, 2> tmp_arr;
-  tmp_arr[0] = not_cnt; tmp_arr[1] = not_cnt;
-  chain_list.resize(ntot, tmp_arr);
-  for (int ii = 0; ii < nall; ii++) {
-    int i = ilist[ii];
-    chain_list[i][0] = domain_end;
-    chain_list[i][1] = domain_end;
-  }
-  for (int ii = 0; ii < nall; ii++) {
-    int i = ilist[ii];
-    int nnb = nblist->numneigh[i];
-    for (int m = 0; m < 2; m++)
-      if (bonds[i][m] == cnt_end) chain_list[i][m] = cnt_end;
-    for (int j = 0; j < nnb; j++) {
-      int nb = nblist->firstneigh[i][j];
-      if (bonds[i][0] == g_id[nb]){
-        chain_list[i][0] = nb;
-        chain_list[nb][1] = i;
-        break;
-      }
-    }
-  }
-
-  //reorder chains: index list
-  //list of indexes for conversion FROM reordered representation
-  index_list.reserve(nall);
-  index_list_b.resize(ntot, -1); // convert index TO reordered representation
-  for (int i = 0; i < ntot; i++) {
-    if (chain_list[i][0] == cnt_end || chain_list[i][0] == domain_end) {
-      index_list.push_back(i);
-      index_list_b[i] = index_list.size() - 1;
-      int idx = i;
-      while (1) {
-        idx = chain_list[idx][1];
-        if (idx == cnt_end || idx == domain_end) break;
-        else index_list.push_back(idx);
-        index_list_b[idx] = index_list.size() - 1;
-      }
-    }
-  }
-
-  //segment list
-  for (int i = 0; i < nlocal; i++) {
-    if (chain_list[i][0] == not_cnt) continue;
-    if (chain_list[i][0] != cnt_end && chain_list[i][0] != domain_end &&
-     g_id[i] < g_id[chain_list[i][0]]){
-      array2003<int, 2> tmp_c;
-      tmp_c[0] = i; tmp_c[1] = chain_list[i][0];
-      segments.push_back(tmp_c);
-    }
-    if (chain_list[i][1] != cnt_end && chain_list[i][1] != domain_end &&
-     g_id[i] < g_id[chain_list[i][1]]){
-      array2003<int, 2> tmp_c;
-       tmp_c[0] = i; tmp_c[1] = chain_list[i][1];
-       segments.push_back(tmp_c);
-    }
-  }
-  int nbonds = segments.size();
-
-  //triplets
-  for (int i = 0; i < nlocal; i++){
-    if (chain_list[i][0] == not_cnt) continue;
-    if (chain_list[i][0] != cnt_end && chain_list[i][0] != domain_end &&
-     chain_list[i][1] != cnt_end && chain_list[i][1] != domain_end)
-      triplets.push_back(get_triplet(i));
-  }
-
-  //segment neighbor list
-  nb_chains.resize(nbonds);
-  std::vector<int> nb_list_i[2], nb_list;
-  for (int i = 0; i < nbonds; i++) {
-    //union of nb lists
-    for (int m = 0; m < 2; m++) {
-      nb_list_i[m].resize(0);
-      int idx = segments[i][m];
-      if (idx >= nlocal) continue;
-      int nnb = nblist->numneigh[idx];
-      for (int j = 0; j < nnb; j++) {
-        int jdx = nblist->firstneigh[idx][j];
-        //no self interactions for nbs within the same tube
-        if (chain_id[jdx] == chain_id[idx] &&
-         std::abs(index_list_b[idx] - index_list_b[jdx]) <= 5) continue;
-        nb_list_i[m].push_back(index_list_b[jdx]);
-      }
-    }
-    vector_union(nb_list_i[0], nb_list_i[1], nb_list);
-
-    int nnb = nb_list.size();
-    if (nnb > 0) {
-      int idx_s = nb_list[0];
-      for (int j = 0; j < nnb; j++) {
-        //if nodes are not continuous in the sorted representation
-        //or represent chain ends, create a new neighbor chain
-        int idx_next = chain_list[index_list[nb_list[j]]][1];
-        if ((j == nnb - 1) || (nb_list[j] + 1 != nb_list[j+1]) ||
-         (idx_next == cnt_end) || (idx_next == domain_end)) {
-          array2003<int, 2> chain;
-          chain[0] = idx_s;
-          chain[1] = nb_list[j];
-          //make sure that segments having at least one node
-          //in the neighbor list are included
-          int idx0 = index_list[chain[0]]; // real id of the ends
-          int idx1 = index_list[chain[1]];
-          if (chain_list[idx0][0] != cnt_end &&
-           chain_list[idx0][0] != domain_end) chain[0] -= 1;
-          if (chain_list[idx1][1] != cnt_end &&
-           chain_list[idx1][1] != domain_end) chain[1] += 1;
-          if(chain[0] != chain[1]) nb_chains[i].push_back(chain);
-          idx_s = (j == nnb - 1) ? -1 : nb_list[j + 1];
-        }
-      }
-    }
-    nb_list.resize(0);
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
-// the cutoff distance between walls of tubes
-static const double TPBRcutoff  = 3.0*3.4;
-int PairMESONTTPM::instance_count = 0;
-/* ---------------------------------------------------------------------- */
-
-PairMESONTTPM::PairMESONTTPM(LAMMPS *lmp) : Pair(lmp) {
-  writedata=1;
-  BendingMode = 0;  // Harmonic bending model
-  TPMType = 0;      // Inter-tube segment-segment interaction
-  tab_path = nullptr;
-  tab_path_length = 0;
-
-  eatom_s = nullptr;
-  eatom_b = nullptr;
-  eatom_t = nullptr;
-  instance_count++;
-  if(instance_count > 1) error->all(FLERR,
-   "only a single instance of mesont/tpm pair style can be created");
-}
-
-/* ---------------------------------------------------------------------- */
-
-PairMESONTTPM::~PairMESONTTPM()
-{
-  if (allocated) {
-    memory->destroy(setflag);
-    memory->destroy(cutsq);
-    memory->destroy(cut);
-
-    memory->destroy(eatom_s);
-    memory->destroy(eatom_b);
-    memory->destroy(eatom_t);
-  }
-  instance_count--;
-  if (tab_path != nullptr) memory->destroy(tab_path);
-}
-
-/* ---------------------------------------------------------------------- */
-
-void PairMESONTTPM::compute(int eflag, int vflag){
-  ev_init(eflag,vflag);
-  //total number of atoms in the node and ghost shell
-  int nall = list->inum + list->gnum;
-  int ntot = atom->nlocal + atom->nghost;
-  int newton_pair = force->newton_pair;
-  if(!newton_pair)
-   error->all(FLERR,"Pair style mesont/tpm requires newton pair on");
-
-  double **x = atom->x;
-  double **f = atom->f;
-  double *r = atom->radius;
-  double *l = atom->length;
-  int *buckling = atom->buckling;
-  tagint *g_id = atom->tag;
-
-  //check if cutoff is chosen correctly
-  double RT = mesont_lib_get_R();
-  double Lmax = 0.0;
-  for (int ii = 0; ii < list->inum; ii++) {
-    int i = list->ilist[ii];
-    if (Lmax < l[i]) Lmax = l[i];
-  }
-  double Rcut_min = std::max(2.0*Lmax, std::sqrt(0.5*Lmax*Lmax +
-   std::pow((2.0*RT + TPBRcutoff),2)));
-  if (cut_global < Rcut_min){
-    std::stringstream err;
-    err << "The selected cutoff is too small for the current system : " <<
-     "L_max = " << Lmax << ", R_max = " << RT << ", Rc = " << cut_global <<
-     ", Rcut_min = " << Rcut_min;
-    error->all(FLERR, err.str().c_str());
-  }
-
-  //generate bonds and chain nblist
-  MESONTList ntlist(atom, list, cut_global*cut_global);
-
-  //reorder data to make it contiguous within tubes
-  //and compatible with Fortran functions
-  std::vector<double> x_sort(3*nall), f_sort(3*nall), s_sort(9*nall);
-  std::vector<double> u_ts_sort(nall), u_tb_sort(nall), u_tt_sort(nall);
-  std::vector<int> b_sort(nall);
-  for (int i = 0; i < nall; i++){
-    int idx = ntlist.get_idx(i);
-    for (int j = 0; j < 3; j++) x_sort[3*i+j] = x[idx][j];
-    b_sort[i] = buckling[idx];
-  }
-
-  //bending potential
-  int n_triplets = ntlist.get_triplets().size();
-  for (int i = 0; i < n_triplets; i++) {
-    const array2003<int,3>& t = ntlist.get_triplets()[i];
-    //idx of nodes of a triplet in sorted representation
-    int idx_s0 = ntlist.get_idxb(t[0]);
-    int idx_s1 = ntlist.get_idxb(t[1]);
-    int idx_s2 = ntlist.get_idxb(t[2]);
-
-    double* X1 = &(x_sort[3*idx_s0]);
-    double* X2 = &(x_sort[3*idx_s1]);
-    double* X3 = &(x_sort[3*idx_s2]);
-    double& U1b = u_tb_sort[idx_s0];
-    double& U2b = u_tb_sort[idx_s1];
-    double& U3b = u_tb_sort[idx_s2];
-    double* F1 = &(f_sort[3*idx_s0]);
-    double* F2 = &(f_sort[3*idx_s1]);
-    double* F3 = &(f_sort[3*idx_s2]);
-    double* S1 = &(s_sort[9*idx_s0]);
-    double* S2 = &(s_sort[9*idx_s1]);
-    double* S3 = &(s_sort[9*idx_s2]);
-    double& R123 = r[t[1]];
-    double& L123 = l[t[1]];
-    int& BBF2 = b_sort[idx_s1];
-
-    mesont_lib_TubeBendingForceField(U1b, U2b, U3b, F1, F2, F3, S1, S2, S3,
-     X1, X2, X3, R123, L123, BBF2);
-  }
-
-  //share new values of buckling
-  if (BendingMode == 1){
-    for (int i = 0; i < nall; i++){
-      int idx = ntlist.get_idx(i);
-      buckling[idx] = b_sort[i];
-    }
-    comm->forward_comm_pair(this);
-    for (int i = 0; i < nall; i++){
-      int idx = ntlist.get_idx(i);
-      b_sort[i] = buckling[idx];
-    }
-  }
-
-  //segment-segment and segment-tube interactions
-  int n_segments = ntlist.get_segments().size();
-  double Rmax = 0.0;
-  Lmax = 0.0;
-  for (int i = 0; i < n_segments; i++) {
-    const array2003<int,2>& s = ntlist.get_segments()[i];
-    //idx of a segment end 1 in sorted representation
-    int idx_s0 = ntlist.get_idxb(s[0]);
-    //idx of a segment end 2 in sorted representation
-    int idx_s1 = ntlist.get_idxb(s[1]);
-    double* X1 = &(x_sort[3*idx_s0]);
-    double* X2 = &(x_sort[3*idx_s1]);
-    double length = std::sqrt(std::pow(X1[0]-X2[0],2) +
-     std::pow(X1[1]-X2[1],2) + std::pow(X1[2]-X2[2],2));
-    if (length > Lmax) Lmax = length;
-    double& U1t = u_tt_sort[idx_s0];
-    double& U2t = u_tt_sort[idx_s1];
-    double& U1s = u_ts_sort[idx_s0];
-    double& U2s = u_ts_sort[idx_s1];
-    double* F1 = &(f_sort[3*idx_s0]);
-    double* F2 = &(f_sort[3*idx_s1]);
-    double* S1 = &(s_sort[9*idx_s0]);
-    double* S2 = &(s_sort[9*idx_s1]);
-    double R12 = r[s[0]]; if (R12 > Rmax) Rmax = R12;
-    if (std::abs(R12 - RT) > 1e-3)
-        error->all(FLERR,"Inconsistent input and potential table");
-    //assume that the length of the segment is defined by the node with
-    //smallest global id
-    double L12 = (g_id[s[0]] > g_id[s[1]]) ? l[s[1]] : l[s[0]];
-    mesont_lib_TubeStretchingForceField(U1s, U2s, F1, F2, S1, S2, X1, X2,
-     R12, L12);
-
-    for (int nc = 0; nc < (int)ntlist.get_nbs()[i].size(); nc++){
-      //id of the beginning and end of the chain in the sorted representation
-      const array2003<int,2>& chain = ntlist.get_nbs()[i][nc];
-      int N = chain[1] - chain[0] + 1;  //number of elements in the chain
-      int end1 = ntlist.get_idx(chain[0]);  //chain ends (real representation)
-      int end2 = ntlist.get_idx(chain[1]);
-      double* X = &(x_sort[3*chain[0]]);
-      double* Ut = &(u_tt_sort[chain[0]]);
-      double* F = &(f_sort[3*chain[0]]);
-      double* S = &(s_sort[9*chain[0]]);
-      double R = r[end1];
-      int* BBF = &(b_sort[chain[0]]);
-      int E1 = ntlist.is_end(end1);
-      int E2 = ntlist.is_end(end2);
-
-      int Ee = 0;
-      double* Xe = X; double* Fe = F; double* Se = S;
-      if (!E1 && ntlist.get_triplet(end1)[0] != MESONTList::domain_end &&
-       ntlist.get_triplet(ntlist.get_triplet(end1)[0])[0] ==
-       MESONTList::cnt_end){
-        Ee = 1;
-        int idx = ntlist.get_idxb(ntlist.get_triplet(end1)[0]);
-        Xe = &(x_sort[3*idx]);
-        Fe = &(f_sort[3*idx]);
-        Se = &(s_sort[9*idx]);
-      }
-      else if (!E2 && ntlist.get_triplet(end2)[2] != MESONTList::domain_end &&
-       ntlist.get_triplet(ntlist.get_triplet(end2)[2])[2] ==
-       MESONTList::cnt_end){
-        Ee = 2;
-        int idx = ntlist.get_idxb(ntlist.get_triplet(end2)[2]);
-        Xe = &(x_sort[3*idx]);
-        Fe = &(f_sort[3*idx]);
-        Se = &(s_sort[9*idx]);
-      }
-
-      mesont_lib_SegmentTubeForceField(U1t, U2t, Ut, F1, F2, F, Fe, S1, S2, S,
-       Se, X1, X2, R12, N, X, Xe, BBF, R, E1, E2, Ee, TPMType);
-    }
-  }
-
-  //check if cutoff is chosen correctly
-  Rcut_min = std::max(2.0*Lmax, std::sqrt(0.5*Lmax*Lmax +
-   std::pow((2.0*Rmax + TPBRcutoff),2)));
-  if (cut_global < Rcut_min){
-    std::stringstream err;
-    err << "The selected cutoff is too small for the current system : " <<
-     "L_max = " << Lmax << ", R_max = " << RT << ", Rc = " << cut_global <<
-     ", Rcut_min = " << Rcut_min;
-    error->all(FLERR, err.str().c_str());
-  }
-
-  // set per atom values and accumulators
-  // reallocate per-atom arrays if necessary
-  if (eatom_s == nullptr)
-   memory->create(eatom_s,comm->nthreads*maxeatom,"pair:eatom_s");
-  if (eatom_b == nullptr)
-   memory->create(eatom_b,comm->nthreads*maxeatom,"pair:eatom_b");
-  if (eatom_t == nullptr)
-   memory->create(eatom_t,comm->nthreads*maxeatom,"pair:eatom_t");
-  if (atom->nmax > maxeatom) {
-    maxeatom = atom->nmax;
-    memory->destroy(eatom);
-    memory->create(eatom,comm->nthreads*maxeatom,"pair:eatom");
-    memory->destroy(eatom_s);
-    memory->create(eatom_s,comm->nthreads*maxeatom,"pair:eatom_s");
-    memory->destroy(eatom_b);
-    memory->create(eatom_b,comm->nthreads*maxeatom,"pair:eatom_b");
-    memory->destroy(eatom_t);
-    memory->create(eatom_t,comm->nthreads*maxeatom,"pair:eatom_t");
-  }
-
-  if (atom->nmax > maxvatom) {
-    maxvatom = atom->nmax;
-    memory->destroy(vatom);
-    memory->create(vatom,comm->nthreads*maxvatom,6,"pair:vatom");
-  }
-
-  // zero accumulators
-  eng_vdwl = 0.0; energy_s = 0.0;
-  energy_b = 0.0; energy_t = 0.0;
-  for (int i = 0; i < 6; i++) virial[i] = 0.0;
-  for (int i = 0; i < ntot; i++){
-    eatom[i] = 0.0; eatom_s[i] = 0.0;
-    eatom_b[i] = 0.0; eatom_t[i] = 0.0;
-  }
-  for (int i = 0; i < ntot; i++)
-    for (int j = 0; j < 6; j++) vatom[i][j] = 0.0;
-
-  //convert from sorted representation
-  for (int i = 0; i < nall; i++){
-    int idx = ntlist.get_idx(i);
-    for (int j = 0; j < 3; j++) f[idx][j] += f_sort[3*i+j];
-    eatom_s[idx] = u_ts_sort[i];
-    eatom_b[idx] = u_tb_sort[i];
-    eatom_t[idx] = u_tt_sort[i];
-    eatom[idx] = u_ts_sort[i] + u_tb_sort[i] + u_tt_sort[i];
-    energy_s += u_ts_sort[i];
-    energy_b += u_tb_sort[i];
-    energy_t += u_tt_sort[i];
-    vatom[idx][0] = s_sort[9*i+0]; //xx
-    vatom[idx][1] = s_sort[9*i+4]; //yy
-    vatom[idx][2] = s_sort[9*i+8]; //zz
-    vatom[idx][3] = s_sort[9*i+1]; //xy
-    vatom[idx][4] = s_sort[9*i+2]; //xz
-    vatom[idx][5] = s_sort[9*i+5]; //yz
-    for (int j = 0; j < 6; j++) virial[j] += vatom[idx][j];
-    buckling[idx] = b_sort[i];
-  }
-  eng_vdwl = energy_s + energy_b + energy_t;
-}
-
-/* ----------------------------------------------------------------------
-   allocate all arrays
-------------------------------------------------------------------------- */
-
-void PairMESONTTPM::allocate(){
-  allocated = 1;
-  int n = atom->ntypes;
-
-  memory->create(setflag,n+1,n+1,"pair:setflag");
-  for (int i = 1; i <= n; i++)
-    for (int j = i; j <= n; j++)
-      setflag[i][j] = 0;
-
-  memory->create(cutsq,n+1,n+1,"pair:cutsq");
-  memory->create(cut,n+1,n+1,"pair:cut");
-}
-
-/* ----------------------------------------------------------------------
-   global settings
-------------------------------------------------------------------------- */
-
-void PairMESONTTPM::settings(int narg, char **arg){
-  if ((narg == 0) || (narg > 4))
-    error->all(FLERR,"Illegal pair_style command");
-  cut_global = utils::numeric(FLERR,arg[0],false,lmp);
-
-  // reset cutoffs that have been explicitly set
-  if (allocated) {
-    int i,j;
-    for (i = 1; i <= atom->ntypes; i++)
-      for (j = i+1; j <= atom->ntypes; j++)
-        cut[i][j] = cut_global;
-  }
-  std::string TPMAFile = (narg > 1) ? arg[1] : "MESONT-TABTP.xrs";
-  tab_path_length = TPMAFile.length();
-  if (tab_path != nullptr) memory->destroy(tab_path);
-  //c_str returns '\0' terminated string
-  memory->create(tab_path,tab_path_length+1,"pair:path");
-  std::memcpy(tab_path, TPMAFile.c_str(), tab_path_length+1);
-  mesont_lib_SetTablePath(tab_path, tab_path_length);
-
-  if (narg > 2) {
-    BendingMode = utils::numeric(FLERR,arg[2],false,lmp);
-    if ((BendingMode < 0) || (BendingMode > 1))
-      error->all(FLERR,"Incorrect BendingMode");
-  }
-  if (narg > 3) {
-    TPMType = utils::numeric(FLERR,arg[3],false,lmp);
-    if ((TPMType < 0) || (TPMType > 1))
-      error->all(FLERR,"Incorrect TPMType");
-  }
-
-  mesont_lib_TPBInit();
-  int M, N;
-  std::ifstream in(TPMAFile);
-  if (!in.is_open()) error->all(FLERR,"Incorrect table path");
-  std::string tmp;
-  std::getline(in,tmp);
-  std::getline(in,tmp);
-  std::getline(in,tmp);
-  in >> M >> N;
-  in.close();
-  mesont_lib_TPMInit(M, N);
-  mesont_lib_InitCNTPotModule(1, 3, 0, BendingMode, mesont_lib_get_R());
-}
-
-/* ----------------------------------------------------------------------
-   set coeffs for one or more type pairs
-------------------------------------------------------------------------- */
-
-void PairMESONTTPM::coeff(int narg, char **arg){
-  if ((narg < 2) || (narg > 3))
-    error->all(FLERR,"Incorrect args for pair coefficients");
-
-  if (!allocated) allocate();
-
-  int ilo,ihi,jlo,jhi;
-  utils::bounds(FLERR,arg[0],1,atom->ntypes,ilo,ihi,error);
-  utils::bounds(FLERR,arg[1],1,atom->ntypes,jlo,jhi,error);
-
-  double cut_one = cut_global;
-  if (narg == 3) cut_one = utils::numeric(FLERR,arg[2],false,lmp);
-
-  int count = 0;
-  for (int i = ilo; i <= ihi; i++) {
-    for (int j = MAX(jlo,i); j <= jhi; j++) {
-      cut[i][j] = cut_one;
-      setflag[i][j] = 1;
-      count++;
-    }
-  }
-
-  if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
-}
-
-/* ----------------------------------------------------------------------
-   init for one type pair i,j and corresponding j,i
-------------------------------------------------------------------------- */
-
-double PairMESONTTPM::init_one(int i, int j){
-  if (setflag[i][j] == 0) {
-    cut[i][j] = mix_distance(cut[i][i],cut[j][j]);
-  }
-
-  return cut[i][j];
-}
-
-/* ----------------------------------------------------------------------
-   proc 0 writes to restart file
-------------------------------------------------------------------------- */
-
-void PairMESONTTPM::write_restart(FILE *fp){
-  write_restart_settings(fp);
-
-  int i,j;
-  for (i = 1; i <= atom->ntypes; i++)
-    for (j = i; j <= atom->ntypes; j++) {
-      fwrite(&setflag[i][j],sizeof(int),1,fp);
-      if (setflag[i][j]) {
-        fwrite(&cut[i][j],sizeof(double),1,fp);
-      }
-    }
-}
-
-/* ----------------------------------------------------------------------
-   proc 0 reads from restart file, bcasts
-------------------------------------------------------------------------- */
-
-void PairMESONTTPM::read_restart(FILE *fp){
-  read_restart_settings(fp);
-  allocate();
-
-  int i,j;
-  int me = comm->me;
-  for (i = 1; i <= atom->ntypes; i++)
-    for (j = i; j <= atom->ntypes; j++) {
-      if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
-      MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
-      if (setflag[i][j]) {
-        if (me == 0) {
-          fread(&cut[i][j],sizeof(double),1,fp);
-        }
-        MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
-      }
-    }
-}
-
-/* ----------------------------------------------------------------------
-   proc 0 writes to restart file
-------------------------------------------------------------------------- */
-
-void PairMESONTTPM::write_restart_settings(FILE *fp){
-  fwrite(&BendingMode,sizeof(int),1,fp);
-  fwrite(&TPMType,sizeof(int),1,fp);
-  fwrite(&cut_global,sizeof(double),1,fp);
-  fwrite(&tab_path_length,sizeof(int),1,fp);
-  fwrite(tab_path,tab_path_length+1,1,fp);
-}
-
-/* ----------------------------------------------------------------------
-   proc 0 reads from restart file, bcasts
-------------------------------------------------------------------------- */
-
-void PairMESONTTPM::read_restart_settings(FILE *fp){
-  int me = comm->me;
-  if (me == 0) {
-    fread(&BendingMode,sizeof(int),1,fp);
-    fread(&TPMType,sizeof(int),1,fp);
-    fread(&cut_global,sizeof(double),1,fp);
-    fread(&tab_path_length,sizeof(int),1,fp);
-  }
-  MPI_Bcast(&BendingMode,1,MPI_INT,0,world);
-  MPI_Bcast(&TPMType,1,MPI_INT,0,world);
-  MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world);
-  MPI_Bcast(&tab_path_length,1,MPI_INT,0,world);
-
-  if (tab_path != nullptr) memory->destroy(tab_path);
-  memory->create(tab_path,tab_path_length+1,"pair:path");
-  if (me == 0) fread(tab_path,tab_path_length+1,1,fp);
-  MPI_Bcast(tab_path,tab_path_length+1,MPI_CHAR,0,world);
-  mesont_lib_SetTablePath(tab_path,tab_path_length);
-  mesont_lib_TPBInit();
-  int M, N;
-  std::ifstream in(tab_path);
-  if (!in.is_open()) error->all(FLERR,"Incorrect table path");
-  std::string tmp;
-  std::getline(in,tmp);
-  std::getline(in,tmp);
-  std::getline(in,tmp);
-  in >> M >> N;
-  in.close();
-  mesont_lib_TPMInit(M, N);
-  mesont_lib_InitCNTPotModule(1, 3, 0, BendingMode, mesont_lib_get_R());
-}
-
-/* ----------------------------------------------------------------------
-   proc 0 writes to data file
-------------------------------------------------------------------------- */
-
-void PairMESONTTPM::write_data(FILE *fp){
-  for (int i = 1; i <= atom->ntypes; i++)
-    fprintf(fp,"%d\n",i);
-}
-
-/* ----------------------------------------------------------------------
-   proc 0 writes all pairs to data file
-------------------------------------------------------------------------- */
-
-void PairMESONTTPM::write_data_all(FILE *fp){
-  for (int i = 1; i <= atom->ntypes; i++)
-    for (int j = i; j <= atom->ntypes; j++)
-      fprintf(fp,"%d %d %g\n",i,j,cut[i][j]);
-}
-
-/* ---------------------------------------------------------------------- */
-
-void PairMESONTTPM::init_style(){
-  //make sure that a full list is created (including ghost nodes)
-  int r = neighbor->request(this,instance_me);
-  neighbor->requests[r]->half = false;
-  neighbor->requests[r]->full = true;
-  neighbor->requests[r]->ghost = true;
-}
-
-void* PairMESONTTPM::extract(const char *str, int &){
-  if (strcmp(str,"mesonttpm_Es_tot") == 0) return &energy_s;
-  else if (strcmp(str,"mesonttpm_Eb_tot") == 0) return &energy_b;
-  else if (strcmp(str,"mesonttpm_Et_tot") == 0) return &energy_t;
-  else if (strcmp(str,"mesonttpm_Es") == 0) return eatom_s;
-  else if (strcmp(str,"mesonttpm_Eb") == 0) return eatom_b;
-  else if (strcmp(str,"mesonttpm_Et") == 0) return eatom_t;
-  else return nullptr;
-};
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+
+   Contributing author: Maxim Shugaev (UVA), mvs9t@virginia.edu
+------------------------------------------------------------------------- */
+
+#include "pair_mesont_tpm.h"
+#include "export_mesont.h"
+
+
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "memory.h"
+#include "error.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "neigh_request.h"
+
+#include <cstring>
+#include <vector>
+#include <cmath>
+
+#include <fstream>
+#include <sstream>
+#include <algorithm>
+
+using namespace LAMMPS_NS;
+
+//since LAMMPS is compiled with C++ 2003, define a substitution for std::array
+template<typename T, int N>
+class array2003{
+public:
+  T& operator[] (int idx){ return data[idx];};
+  const T& operator[] (int idx) const{ return data[idx];};
+private:
+  T data[N];
+};
+
+
+class MESONTList {
+public:
+  MESONTList(const Atom* atom, const NeighList* nblist, double rc2);
+  ~MESONTList() {};
+  //list of segments
+  const std::vector<array2003<int,2> >& get_segments() const;
+  //list of triplets
+  const std::vector<array2003<int,3> >& get_triplets() const;
+  //list of neighbor chains [start,end] for segments
+  //(use idx() to get real indexes)
+  const std::vector<std::vector<array2003<int,2> > >& get_nbs() const;
+  //convert idx from sorted representation to real idx
+  int get_idx(int idx) const;
+  //return list of indexes for conversion from sorted representation
+  const std::vector<int>& get_idx_list() const;
+  //convert idx from real idx to sorted representation
+  int get_idxb(int idx) const;
+  //return list of indexes for conversion to sorted representation
+  const std::vector<int>& get_idxb_list() const;
+  //check if the node is the end of the tube
+  bool is_end(int idx) const;
+
+  array2003<int, 2> get_segment(int idx) const;
+  array2003<int, 3> get_triplet(int idx) const;
+
+  static const int cnt_end = -1;
+  static const int domain_end = -2;
+  static const int not_cnt = -3;
+private:
+  std::vector<array2003<int, 2> > chain_list, segments;
+  std::vector<array2003<int, 3> > triplets;
+  std::vector<std::vector<array2003<int, 2> > > nb_chains;
+  std::vector<int> index_list, index_list_b;
+};
+
+//=============================================================================
+
+inline const std::vector<std::vector<array2003<int, 2> > > &
+ MESONTList::get_nbs() const {
+  return nb_chains;
+}
+
+inline int MESONTList::get_idx(int idx) const {
+  return index_list[idx];
+}
+
+inline const std::vector<int>& MESONTList::get_idx_list() const {
+  return index_list;
+};
+
+
+inline int MESONTList::get_idxb(int idx) const {
+  return index_list_b[idx];
+}
+
+inline const std::vector<int>& MESONTList::get_idxb_list() const {
+  return index_list_b;
+};
+
+inline const std::vector<array2003<int, 2> > & MESONTList::get_segments()
+ const {
+  return segments;
+}
+
+inline const std::vector<array2003<int, 3> > & MESONTList::get_triplets()
+ const {
+  return triplets;
+}
+
+inline array2003<int, 2> MESONTList::get_segment(int idx) const {
+  array2003<int, 2> result;
+  result[0] = chain_list[idx][0];
+  result[1] = idx;
+  return result;
+}
+
+inline array2003<int, 3> MESONTList::get_triplet(int idx) const {
+  array2003<int, 3> result;
+  result[0] = chain_list[idx][0];
+  result[1] = idx;
+  result[2] = chain_list[idx][1];
+  return result;
+}
+
+inline bool MESONTList::is_end(int idx) const {
+  return chain_list[idx][0] == cnt_end || chain_list[idx][1] == cnt_end;
+};
+
+template<typename T>
+void vector_union(std::vector<T>& v1, std::vector<T>& v2,
+ std::vector<T>& merged) {
+  std::sort(v1.begin(), v1.end());
+  std::sort(v2.begin(), v2.end());
+  merged.reserve(v1.size() + v2.size());
+  typename std::vector<T>::iterator it1 = v1.begin();
+  typename std::vector<T>::iterator it2 = v2.begin();
+
+  while (it1 != v1.end() && it2 != v2.end()) {
+    if (*it1 < *it2) {
+      if (merged.empty() || merged.back() < *it1) merged.push_back(*it1);
+        ++it1;
+    }
+    else {
+      if (merged.empty() || merged.back() < *it2) merged.push_back(*it2);
+      ++it2;
+    }
+  }
+  while (it1 != v1.end()) {
+    if (merged.empty() || merged.back() < *it1) merged.push_back(*it1);
+    ++it1;
+  }
+
+  while (it2 != v2.end()) {
+  if (merged.empty() || merged.back() < *it2) merged.push_back(*it2);
+    ++it2;
+  }
+}
+
+MESONTList::MESONTList(const Atom* atom, const NeighList* nblist, double /* rc2 */){
+  if (atom == nullptr || nblist == nullptr) return;
+  //number of local atoms at the node
+  int nlocal = atom->nlocal;
+  //total number of atoms in the node and ghost shell
+  int nall = nblist->inum + nblist->gnum;
+  int ntot = atom->nlocal + atom->nghost;
+  tagint* const g_id = atom->tag;
+  tagint** const bonds = atom->bond_nt;
+  tagint* const chain_id = atom->molecule;
+  int* ilist = nblist->ilist;
+
+  //convert bonds to local id representation
+  array2003<int, 2> tmp_arr;
+  tmp_arr[0] = not_cnt; tmp_arr[1] = not_cnt;
+  chain_list.resize(ntot, tmp_arr);
+  for (int ii = 0; ii < nall; ii++) {
+    int i = ilist[ii];
+    chain_list[i][0] = domain_end;
+    chain_list[i][1] = domain_end;
+  }
+  for (int ii = 0; ii < nall; ii++) {
+    int i = ilist[ii];
+    int nnb = nblist->numneigh[i];
+    for (int m = 0; m < 2; m++)
+      if (bonds[i][m] == cnt_end) chain_list[i][m] = cnt_end;
+    for (int j = 0; j < nnb; j++) {
+      int nb = nblist->firstneigh[i][j];
+      if (bonds[i][0] == g_id[nb]){
+        chain_list[i][0] = nb;
+        chain_list[nb][1] = i;
+        break;
+      }
+    }
+  }
+
+  //reorder chains: index list
+  //list of indexes for conversion FROM reordered representation
+  index_list.reserve(nall);
+  index_list_b.resize(ntot, -1); // convert index TO reordered representation
+  for (int i = 0; i < ntot; i++) {
+    if (chain_list[i][0] == cnt_end || chain_list[i][0] == domain_end) {
+      index_list.push_back(i);
+      index_list_b[i] = index_list.size() - 1;
+      int idx = i;
+      while (1) {
+        idx = chain_list[idx][1];
+        if (idx == cnt_end || idx == domain_end) break;
+        else index_list.push_back(idx);
+        index_list_b[idx] = index_list.size() - 1;
+      }
+    }
+  }
+
+  //segment list
+  for (int i = 0; i < nlocal; i++) {
+    if (chain_list[i][0] == not_cnt) continue;
+    if (chain_list[i][0] != cnt_end && chain_list[i][0] != domain_end &&
+     g_id[i] < g_id[chain_list[i][0]]){
+      array2003<int, 2> tmp_c;
+      tmp_c[0] = i; tmp_c[1] = chain_list[i][0];
+      segments.push_back(tmp_c);
+    }
+    if (chain_list[i][1] != cnt_end && chain_list[i][1] != domain_end &&
+     g_id[i] < g_id[chain_list[i][1]]){
+      array2003<int, 2> tmp_c;
+       tmp_c[0] = i; tmp_c[1] = chain_list[i][1];
+       segments.push_back(tmp_c);
+    }
+  }
+  int nbonds = segments.size();
+
+  //triplets
+  for (int i = 0; i < nlocal; i++){
+    if (chain_list[i][0] == not_cnt) continue;
+    if (chain_list[i][0] != cnt_end && chain_list[i][0] != domain_end &&
+     chain_list[i][1] != cnt_end && chain_list[i][1] != domain_end)
+      triplets.push_back(get_triplet(i));
+  }
+
+  //segment neighbor list
+  nb_chains.resize(nbonds);
+  std::vector<int> nb_list_i[2], nb_list;
+  for (int i = 0; i < nbonds; i++) {
+    //union of nb lists
+    for (int m = 0; m < 2; m++) {
+      nb_list_i[m].resize(0);
+      int idx = segments[i][m];
+      if (idx >= nlocal) continue;
+      int nnb = nblist->numneigh[idx];
+      for (int j = 0; j < nnb; j++) {
+        int jdx = nblist->firstneigh[idx][j];
+        //no self interactions for nbs within the same tube
+        if (chain_id[jdx] == chain_id[idx] &&
+         std::abs(index_list_b[idx] - index_list_b[jdx]) <= 5) continue;
+        nb_list_i[m].push_back(index_list_b[jdx]);
+      }
+    }
+    vector_union(nb_list_i[0], nb_list_i[1], nb_list);
+
+    int nnb = nb_list.size();
+    if (nnb > 0) {
+      int idx_s = nb_list[0];
+      for (int j = 0; j < nnb; j++) {
+        //if nodes are not continuous in the sorted representation
+        //or represent chain ends, create a new neighbor chain
+        int idx_next = chain_list[index_list[nb_list[j]]][1];
+        if ((j == nnb - 1) || (nb_list[j] + 1 != nb_list[j+1]) ||
+         (idx_next == cnt_end) || (idx_next == domain_end)) {
+          array2003<int, 2> chain;
+          chain[0] = idx_s;
+          chain[1] = nb_list[j];
+          //make sure that segments having at least one node
+          //in the neighbor list are included
+          int idx0 = index_list[chain[0]]; // real id of the ends
+          int idx1 = index_list[chain[1]];
+          if (chain_list[idx0][0] != cnt_end &&
+           chain_list[idx0][0] != domain_end) chain[0] -= 1;
+          if (chain_list[idx1][1] != cnt_end &&
+           chain_list[idx1][1] != domain_end) chain[1] += 1;
+          if(chain[0] != chain[1]) nb_chains[i].push_back(chain);
+          idx_s = (j == nnb - 1) ? -1 : nb_list[j + 1];
+        }
+      }
+    }
+    nb_list.resize(0);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+// the cutoff distance between walls of tubes
+static const double TPBRcutoff  = 3.0*3.4;
+int PairMESONTTPM::instance_count = 0;
+/* ---------------------------------------------------------------------- */
+
+PairMESONTTPM::PairMESONTTPM(LAMMPS *lmp) : Pair(lmp) {
+  writedata=1;
+  BendingMode = 0;  // Harmonic bending model
+  TPMType = 0;      // Inter-tube segment-segment interaction
+  tab_path = nullptr;
+  tab_path_length = 0;
+
+  eatom_s = nullptr;
+  eatom_b = nullptr;
+  eatom_t = nullptr;
+  nmax = 0;
+  instance_count++;
+  if(instance_count > 1) error->all(FLERR,
+   "only a single instance of mesont/tpm pair style can be created");
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairMESONTTPM::~PairMESONTTPM()
+{
+  if (allocated) {
+    memory->destroy(setflag);
+    memory->destroy(cutsq);
+    memory->destroy(cut);
+
+    memory->destroy(eatom_s);
+    memory->destroy(eatom_b);
+    memory->destroy(eatom_t);
+  }
+  instance_count--;
+  if (tab_path != nullptr) memory->destroy(tab_path);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairMESONTTPM::compute(int eflag, int vflag){
+  // set per atom values and accumulators
+  // reallocate per-atom arrays if necessary
+  ev_init(eflag,vflag);
+  if (atom->nmax > nmax) {
+    memory->create(eatom_s,comm->nthreads*maxeatom,"pair:eatom_s");
+    memory->destroy(eatom_b);
+    memory->create(eatom_b,comm->nthreads*maxeatom,"pair:eatom_b");
+    memory->destroy(eatom_t);
+    memory->create(eatom_t,comm->nthreads*maxeatom,"pair:eatom_t");
+    nmax = atom->nmax;
+  }
+  //total number of atoms in the node and ghost shell
+  int nall = list->inum + list->gnum;
+  int ntot = atom->nlocal + atom->nghost;
+  int newton_pair = force->newton_pair;
+  if(!newton_pair)
+   error->all(FLERR,"Pair style mesont/tpm requires newton pair on");
+
+  double **x = atom->x;
+  double **f = atom->f;
+  double *r = atom->radius;
+  double *l = atom->length;
+  int *buckling = atom->buckling;
+  tagint *g_id = atom->tag;
+
+  //check if cutoff is chosen correctly
+  double RT = mesont_lib_get_R();
+  double Lmax = 0.0;
+  for (int ii = 0; ii < list->inum; ii++) {
+    int i = list->ilist[ii];
+    if (Lmax < l[i]) Lmax = l[i];
+  }
+  double Rcut_min = std::max(2.0*Lmax, std::sqrt(0.5*Lmax*Lmax +
+   std::pow((2.0*RT + TPBRcutoff),2)));
+  if (cut_global < Rcut_min){
+    std::stringstream err;
+    err << "The selected cutoff is too small for the current system : " <<
+     "L_max = " << Lmax << ", R_max = " << RT << ", Rc = " << cut_global <<
+     ", Rcut_min = " << Rcut_min;
+    error->all(FLERR, err.str().c_str());
+  }
+
+  //generate bonds and chain nblist
+  MESONTList ntlist(atom, list, cut_global*cut_global);
+
+  //reorder data to make it contiguous within tubes
+  //and compatible with Fortran functions
+  std::vector<double> x_sort(3*nall), f_sort(3*nall), s_sort(9*nall);
+  std::vector<double> u_ts_sort(nall), u_tb_sort(nall), u_tt_sort(nall);
+  std::vector<int> b_sort(nall);
+  for (int i = 0; i < nall; i++){
+    int idx = ntlist.get_idx(i);
+    for (int j = 0; j < 3; j++) x_sort[3*i+j] = x[idx][j];
+    b_sort[i] = buckling[idx];
+  }
+
+  //bending potential
+  int n_triplets = ntlist.get_triplets().size();
+  for (int i = 0; i < n_triplets; i++) {
+    const array2003<int,3>& t = ntlist.get_triplets()[i];
+    //idx of nodes of a triplet in sorted representation
+    int idx_s0 = ntlist.get_idxb(t[0]);
+    int idx_s1 = ntlist.get_idxb(t[1]);
+    int idx_s2 = ntlist.get_idxb(t[2]);
+
+    double* X1 = &(x_sort[3*idx_s0]);
+    double* X2 = &(x_sort[3*idx_s1]);
+    double* X3 = &(x_sort[3*idx_s2]);
+    double& U1b = u_tb_sort[idx_s0];
+    double& U2b = u_tb_sort[idx_s1];
+    double& U3b = u_tb_sort[idx_s2];
+    double* F1 = &(f_sort[3*idx_s0]);
+    double* F2 = &(f_sort[3*idx_s1]);
+    double* F3 = &(f_sort[3*idx_s2]);
+    double* S1 = &(s_sort[9*idx_s0]);
+    double* S2 = &(s_sort[9*idx_s1]);
+    double* S3 = &(s_sort[9*idx_s2]);
+    double& R123 = r[t[1]];
+    double& L123 = l[t[1]];
+    int& BBF2 = b_sort[idx_s1];
+
+    mesont_lib_TubeBendingForceField(U1b, U2b, U3b, F1, F2, F3, S1, S2, S3,
+     X1, X2, X3, R123, L123, BBF2);
+  }
+
+  //share new values of buckling
+  if (BendingMode == 1){
+    for (int i = 0; i < nall; i++){
+      int idx = ntlist.get_idx(i);
+      buckling[idx] = b_sort[i];
+    }
+    comm->forward_comm_pair(this);
+    for (int i = 0; i < nall; i++){
+      int idx = ntlist.get_idx(i);
+      b_sort[i] = buckling[idx];
+    }
+  }
+
+  //segment-segment and segment-tube interactions
+  int n_segments = ntlist.get_segments().size();
+  double Rmax = 0.0;
+  Lmax = 0.0;
+  for (int i = 0; i < n_segments; i++) {
+    const array2003<int,2>& s = ntlist.get_segments()[i];
+    //idx of a segment end 1 in sorted representation
+    int idx_s0 = ntlist.get_idxb(s[0]);
+    //idx of a segment end 2 in sorted representation
+    int idx_s1 = ntlist.get_idxb(s[1]);
+    double* X1 = &(x_sort[3*idx_s0]);
+    double* X2 = &(x_sort[3*idx_s1]);
+    double length = std::sqrt(std::pow(X1[0]-X2[0],2) +
+     std::pow(X1[1]-X2[1],2) + std::pow(X1[2]-X2[2],2));
+    if (length > Lmax) Lmax = length;
+    double& U1t = u_tt_sort[idx_s0];
+    double& U2t = u_tt_sort[idx_s1];
+    double& U1s = u_ts_sort[idx_s0];
+    double& U2s = u_ts_sort[idx_s1];
+    double* F1 = &(f_sort[3*idx_s0]);
+    double* F2 = &(f_sort[3*idx_s1]);
+    double* S1 = &(s_sort[9*idx_s0]);
+    double* S2 = &(s_sort[9*idx_s1]);
+    double R12 = r[s[0]]; if (R12 > Rmax) Rmax = R12;
+    if (std::abs(R12 - RT) > 1e-3)
+        error->all(FLERR,"Inconsistent input and potential table");
+    //assume that the length of the segment is defined by the node with
+    //smallest global id
+    double L12 = (g_id[s[0]] > g_id[s[1]]) ? l[s[1]] : l[s[0]];
+    mesont_lib_TubeStretchingForceField(U1s, U2s, F1, F2, S1, S2, X1, X2,
+     R12, L12);
+
+    for (int nc = 0; nc < (int)ntlist.get_nbs()[i].size(); nc++){
+      //id of the beginning and end of the chain in the sorted representation
+      const array2003<int,2>& chain = ntlist.get_nbs()[i][nc];
+      int N = chain[1] - chain[0] + 1;  //number of elements in the chain
+      int end1 = ntlist.get_idx(chain[0]);  //chain ends (real representation)
+      int end2 = ntlist.get_idx(chain[1]);
+      double* X = &(x_sort[3*chain[0]]);
+      double* Ut = &(u_tt_sort[chain[0]]);
+      double* F = &(f_sort[3*chain[0]]);
+      double* S = &(s_sort[9*chain[0]]);
+      double R = r[end1];
+      int* BBF = &(b_sort[chain[0]]);
+      int E1 = ntlist.is_end(end1);
+      int E2 = ntlist.is_end(end2);
+
+      int Ee = 0;
+      double* Xe = X; double* Fe = F; double* Se = S;
+      if (!E1 && ntlist.get_triplet(end1)[0] != MESONTList::domain_end &&
+       ntlist.get_triplet(ntlist.get_triplet(end1)[0])[0] ==
+       MESONTList::cnt_end){
+        Ee = 1;
+        int idx = ntlist.get_idxb(ntlist.get_triplet(end1)[0]);
+        Xe = &(x_sort[3*idx]);
+        Fe = &(f_sort[3*idx]);
+        Se = &(s_sort[9*idx]);
+      }
+      else if (!E2 && ntlist.get_triplet(end2)[2] != MESONTList::domain_end &&
+       ntlist.get_triplet(ntlist.get_triplet(end2)[2])[2] ==
+       MESONTList::cnt_end){
+        Ee = 2;
+        int idx = ntlist.get_idxb(ntlist.get_triplet(end2)[2]);
+        Xe = &(x_sort[3*idx]);
+        Fe = &(f_sort[3*idx]);
+        Se = &(s_sort[9*idx]);
+      }
+
+      mesont_lib_SegmentTubeForceField(U1t, U2t, Ut, F1, F2, F, Fe, S1, S2, S,
+       Se, X1, X2, R12, N, X, Xe, BBF, R, E1, E2, Ee, TPMType);
+    }
+  }
+
+  //check if cutoff is chosen correctly
+  Rcut_min = std::max(2.0*Lmax, std::sqrt(0.5*Lmax*Lmax +
+   std::pow((2.0*Rmax + TPBRcutoff),2)));
+  if (cut_global < Rcut_min){
+    std::stringstream err;
+    err << "The selected cutoff is too small for the current system : " <<
+     "L_max = " << Lmax << ", R_max = " << RT << ", Rc = " << cut_global <<
+     ", Rcut_min = " << Rcut_min;
+    error->all(FLERR, err.str().c_str());
+  }
+
+  //convert from sorted representation
+  for (int i = 0; i < nall; i++){
+      int idx = ntlist.get_idx(i);
+      for (int j = 0; j < 3; j++) f[idx][j] += f_sort[3*i+j];
+      buckling[idx] = b_sort[i];
+  }
+  if(eflag){
+    eng_vdwl = 0.0; energy_s = 0.0;
+    energy_b = 0.0; energy_t = 0.0;
+    for (int i = 0; i < ntot; i++){
+      eatom[i] = 0.0; eatom_s[i] = 0.0;
+      eatom_b[i] = 0.0; eatom_t[i] = 0.0;
+    }
+    for (int i = 0; i < nall; i++){
+      int idx = ntlist.get_idx(i);
+      eatom_s[idx] = u_ts_sort[i];
+      eatom_b[idx] = u_tb_sort[i];
+      eatom_t[idx] = u_tt_sort[i];
+      eatom[idx] = u_ts_sort[i] + u_tb_sort[i] + u_tt_sort[i];
+      energy_s += u_ts_sort[i];
+      energy_b += u_tb_sort[i];
+      energy_t += u_tt_sort[i];
+    }
+    eng_vdwl = energy_s + energy_b + energy_t;
+  }
+  if(vflag){
+    for (int i = 0; i < 6; i++) virial[i] = 0.0;
+    for (int i = 0; i < nall; i++){
+      int idx = ntlist.get_idx(i);
+      virial[0] += s_sort[9*i+0]; //xx
+      virial[1] += s_sort[9*i+4]; //yy
+      virial[2] += s_sort[9*i+8]; //zz
+      virial[3] += s_sort[9*i+1]; //xy
+      virial[4] += s_sort[9*i+2]; //xz
+      virial[5] += s_sort[9*i+5]; //yz
+    }
+  }
+  int vflag_atom = vflag & 4;
+  if(vflag_atom){
+    for (int i = 0; i < ntot; i++)
+      for (int j = 0; j < 6; j++) vatom[i][j] = 0.0;
+    for (int i = 0; i < nall; i++){
+      int idx = ntlist.get_idx(i);
+      vatom[idx][0] = s_sort[9*i+0]; //xx
+      vatom[idx][1] = s_sort[9*i+4]; //yy
+      vatom[idx][2] = s_sort[9*i+8]; //zz
+      vatom[idx][3] = s_sort[9*i+1]; //xy
+      vatom[idx][4] = s_sort[9*i+2]; //xz
+      vatom[idx][5] = s_sort[9*i+5]; //yz
+    }
+  }
+
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays
+------------------------------------------------------------------------- */
+
+void PairMESONTTPM::allocate(){
+  allocated = 1;
+  int n = atom->ntypes;
+
+  memory->create(setflag,n+1,n+1,"pair:setflag");
+  for (int i = 1; i <= n; i++)
+    for (int j = i; j <= n; j++)
+      setflag[i][j] = 0;
+
+  memory->create(cutsq,n+1,n+1,"pair:cutsq");
+  memory->create(cut,n+1,n+1,"pair:cut");
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void PairMESONTTPM::settings(int narg, char **arg){
+  if ((narg == 0) || (narg > 4))
+    error->all(FLERR,"Illegal pair_style command");
+  cut_global = utils::numeric(FLERR,arg[0],false,lmp);
+
+  // reset cutoffs that have been explicitly set
+  if (allocated) {
+    int i,j;
+    for (i = 1; i <= atom->ntypes; i++)
+      for (j = i+1; j <= atom->ntypes; j++)
+        cut[i][j] = cut_global;
+  }
+  std::string TPMAFile = (narg > 1) ? arg[1] : "MESONT-TABTP.xrs";
+  tab_path_length = TPMAFile.length();
+  if (tab_path != nullptr) memory->destroy(tab_path);
+  //c_str returns '\0' terminated string
+  memory->create(tab_path,tab_path_length+1,"pair:path");
+  std::memcpy(tab_path, TPMAFile.c_str(), tab_path_length+1);
+  mesont_lib_SetTablePath(tab_path, tab_path_length);
+
+  if (narg > 2) {
+    BendingMode = utils::numeric(FLERR,arg[2],false,lmp);
+    if ((BendingMode < 0) || (BendingMode > 1))
+      error->all(FLERR,"Incorrect BendingMode");
+  }
+  if (narg > 3) {
+    TPMType = utils::numeric(FLERR,arg[3],false,lmp);
+    if ((TPMType < 0) || (TPMType > 1))
+      error->all(FLERR,"Incorrect TPMType");
+  }
+
+  mesont_lib_TPBInit();
+  int M, N;
+  std::ifstream in(TPMAFile);
+  if (!in.is_open()) error->all(FLERR,"Incorrect table path");
+  std::string tmp;
+  std::getline(in,tmp);
+  std::getline(in,tmp);
+  std::getline(in,tmp);
+  in >> M >> N;
+  in.close();
+  mesont_lib_TPMInit(M, N);
+  mesont_lib_InitCNTPotModule(1, 3, 0, BendingMode, mesont_lib_get_R());
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more type pairs
+------------------------------------------------------------------------- */
+
+void PairMESONTTPM::coeff(int narg, char **arg){
+  if ((narg < 2) || (narg > 3))
+    error->all(FLERR,"Incorrect args for pair coefficients");
+
+  if (!allocated) allocate();
+
+  int ilo,ihi,jlo,jhi;
+  utils::bounds(FLERR,arg[0],1,atom->ntypes,ilo,ihi,error);
+  utils::bounds(FLERR,arg[1],1,atom->ntypes,jlo,jhi,error);
+
+  double cut_one = cut_global;
+  if (narg == 3) cut_one = utils::numeric(FLERR,arg[2],false,lmp);
+
+  int count = 0;
+  for (int i = ilo; i <= ihi; i++) {
+    for (int j = MAX(jlo,i); j <= jhi; j++) {
+      cut[i][j] = cut_one;
+      setflag[i][j] = 1;
+      count++;
+    }
+  }
+
+  if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+double PairMESONTTPM::init_one(int i, int j){
+  if (setflag[i][j] == 0) {
+    cut[i][j] = mix_distance(cut[i][i],cut[j][j]);
+  }
+
+  return cut[i][j];
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairMESONTTPM::write_restart(FILE *fp){
+  write_restart_settings(fp);
+
+  int i,j;
+  for (i = 1; i <= atom->ntypes; i++)
+    for (j = i; j <= atom->ntypes; j++) {
+      fwrite(&setflag[i][j],sizeof(int),1,fp);
+      if (setflag[i][j]) {
+        fwrite(&cut[i][j],sizeof(double),1,fp);
+      }
+    }
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairMESONTTPM::read_restart(FILE *fp){
+  read_restart_settings(fp);
+  allocate();
+
+  int i,j;
+  int me = comm->me;
+  for (i = 1; i <= atom->ntypes; i++)
+    for (j = i; j <= atom->ntypes; j++) {
+      if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
+      MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
+      if (setflag[i][j]) {
+        if (me == 0) {
+          fread(&cut[i][j],sizeof(double),1,fp);
+        }
+        MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
+      }
+    }
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairMESONTTPM::write_restart_settings(FILE *fp){
+  fwrite(&BendingMode,sizeof(int),1,fp);
+  fwrite(&TPMType,sizeof(int),1,fp);
+  fwrite(&cut_global,sizeof(double),1,fp);
+  fwrite(&tab_path_length,sizeof(int),1,fp);
+  fwrite(tab_path,tab_path_length+1,1,fp);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairMESONTTPM::read_restart_settings(FILE *fp){
+  int me = comm->me;
+  if (me == 0) {
+    fread(&BendingMode,sizeof(int),1,fp);
+    fread(&TPMType,sizeof(int),1,fp);
+    fread(&cut_global,sizeof(double),1,fp);
+    fread(&tab_path_length,sizeof(int),1,fp);
+  }
+  MPI_Bcast(&BendingMode,1,MPI_INT,0,world);
+  MPI_Bcast(&TPMType,1,MPI_INT,0,world);
+  MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&tab_path_length,1,MPI_INT,0,world);
+
+  if (tab_path != nullptr) memory->destroy(tab_path);
+  memory->create(tab_path,tab_path_length+1,"pair:path");
+  if (me == 0) fread(tab_path,tab_path_length+1,1,fp);
+  MPI_Bcast(tab_path,tab_path_length+1,MPI_CHAR,0,world);
+  mesont_lib_SetTablePath(tab_path,tab_path_length);
+  mesont_lib_TPBInit();
+  int M, N;
+  std::ifstream in(tab_path);
+  if (!in.is_open()) error->all(FLERR,"Incorrect table path");
+  std::string tmp;
+  std::getline(in,tmp);
+  std::getline(in,tmp);
+  std::getline(in,tmp);
+  in >> M >> N;
+  in.close();
+  mesont_lib_TPMInit(M, N);
+  mesont_lib_InitCNTPotModule(1, 3, 0, BendingMode, mesont_lib_get_R());
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to data file
+------------------------------------------------------------------------- */
+
+void PairMESONTTPM::write_data(FILE *fp){
+  for (int i = 1; i <= atom->ntypes; i++)
+    fprintf(fp,"%d\n",i);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes all pairs to data file
+------------------------------------------------------------------------- */
+
+void PairMESONTTPM::write_data_all(FILE *fp){
+  for (int i = 1; i <= atom->ntypes; i++)
+    for (int j = i; j <= atom->ntypes; j++)
+      fprintf(fp,"%d %d %g\n",i,j,cut[i][j]);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairMESONTTPM::init_style(){
+  //make sure that a full list is created (including ghost nodes)
+  int r = neighbor->request(this,instance_me);
+  neighbor->requests[r]->half = false;
+  neighbor->requests[r]->full = true;
+  neighbor->requests[r]->ghost = true;
+}
+
+void* PairMESONTTPM::extract(const char *str, int &){
+  if (strcmp(str,"mesonttpm_Es_tot") == 0) return &energy_s;
+  else if (strcmp(str,"mesonttpm_Eb_tot") == 0) return &energy_b;
+  else if (strcmp(str,"mesonttpm_Et_tot") == 0) return &energy_t;
+  else if (strcmp(str,"mesonttpm_Es") == 0) return eatom_s;
+  else if (strcmp(str,"mesonttpm_Eb") == 0) return eatom_b;
+  else if (strcmp(str,"mesonttpm_Et") == 0) return eatom_t;
+  else return nullptr;
+};
diff --git a/src/USER-MESONT/pair_mesont_tpm.h b/src/USER-MESONT/pair_mesont_tpm.h
index 704556d75e..c3d71ae953 100644
--- a/src/USER-MESONT/pair_mesont_tpm.h
+++ b/src/USER-MESONT/pair_mesont_tpm.h
@@ -1,98 +1,99 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-
-   Contributing author: Maxim Shugaev (UVA), mvs9t@virginia.edu
-------------------------------------------------------------------------- */
-
-#ifdef PAIR_CLASS
-
-PairStyle(mesont/tpm,PairMESONTTPM)
-
-#else
-
-#ifndef LMP_PAIR_MESONT_TPM_H
-#define LMP_PAIR_MESONT_TPM_H
-
-#include "pair.h"
-
-namespace LAMMPS_NS {
-
-class PairMESONTTPM : public Pair {
- public:
-  PairMESONTTPM(class LAMMPS *);
-  virtual ~PairMESONTTPM();
-  virtual void compute(int, int);
-  void settings(int, char **);
-  void coeff(int, char **);
-  double init_one(int, int);
-  void write_restart(FILE *);
-  void read_restart(FILE *);
-  void write_restart_settings(FILE *);
-  void read_restart_settings(FILE *);
-  void write_data(FILE *);
-  void write_data_all(FILE *);
-  virtual void init_style();
-
-  double energy_s;  // accumulated energies for stretching
-  double energy_b;  // accumulated energies for bending
-  double energy_t;  // accumulated energies for tube-tube interaction
-  double *eatom_s, *eatom_b, *eatom_t; // accumulated per-atom values
-
- protected:
-  int BendingMode, TPMType;
-  char* tab_path;
-  int tab_path_length;
-  double cut_global;
-  double **cut;
-  static int instance_count;
-
-  virtual void allocate();
-  virtual void *extract(const char *, int &);
-};
-
-}
-
-#endif
-#endif
-
-/* ERROR/WARNING messages:
-
-E: Pair style mesont/tpm requires newton pair on
-
-newton_pair must be set to on
-
-E: The selected cutoff is too small for the current system
-
-cutoff must be increased.
-
-E: Illegal pair_style command
-
-Incorrect argument list in the style init.
-
-E: Incorrect table path
-
-Incorrect path to the table files.
-
-E: Incorrect BendingMode
-
-Self-explanatory.
-
-E: Incorrect TPMType
-
-Self-explanatory.
-
-E: Inconsistent input and potential table
-
-The tube diameter is inconsistent with the chirality specified
-during generation of the potential table.
-
-*/
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+
+   Contributing author: Maxim Shugaev (UVA), mvs9t@virginia.edu
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(mesont/tpm,PairMESONTTPM)
+
+#else
+
+#ifndef LMP_PAIR_MESONT_TPM_H
+#define LMP_PAIR_MESONT_TPM_H
+
+#include "pair.h"
+
+namespace LAMMPS_NS {
+
+class PairMESONTTPM : public Pair {
+ public:
+  PairMESONTTPM(class LAMMPS *);
+  virtual ~PairMESONTTPM();
+  virtual void compute(int, int);
+  void settings(int, char **);
+  void coeff(int, char **);
+  double init_one(int, int);
+  void write_restart(FILE *);
+  void read_restart(FILE *);
+  void write_restart_settings(FILE *);
+  void read_restart_settings(FILE *);
+  void write_data(FILE *);
+  void write_data_all(FILE *);
+  virtual void init_style();
+
+  double energy_s;  // accumulated energies for stretching
+  double energy_b;  // accumulated energies for bending
+  double energy_t;  // accumulated energies for tube-tube interaction
+  double *eatom_s, *eatom_b, *eatom_t; // accumulated per-atom values
+
+ protected:
+  int BendingMode, TPMType;
+  char* tab_path;
+  int tab_path_length;
+  double cut_global;
+  double **cut;
+  static int instance_count;
+  int nmax;
+
+  virtual void allocate();
+  virtual void *extract(const char *, int &);
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: Pair style mesont/tpm requires newton pair on
+
+newton_pair must be set to on
+
+E: The selected cutoff is too small for the current system
+
+cutoff must be increased.
+
+E: Illegal pair_style command
+
+Incorrect argument list in the style init.
+
+E: Incorrect table path
+
+Incorrect path to the table files.
+
+E: Incorrect BendingMode
+
+Self-explanatory.
+
+E: Incorrect TPMType
+
+Self-explanatory.
+
+E: Inconsistent input and potential table
+
+The tube diameter is inconsistent with the chirality specified
+during generation of the potential table.
+
+*/

From 769e7a099511f15e857e079722520576bf7ad940 Mon Sep 17 00:00:00 2001
From: iafoss <iafoss@yandex.ru>
Date: Mon, 2 Nov 2020 16:24:57 -0500
Subject: [PATCH 21/44] Revert "Add files via upload"

This reverts commit e6643979516195965c7261053878f4b88d9aaa2b.
---
 src/USER-MESONT/pair_mesont_tpm.cpp | 1611 +++++++++++++--------------
 src/USER-MESONT/pair_mesont_tpm.h   |  197 ++--
 2 files changed, 901 insertions(+), 907 deletions(-)

diff --git a/src/USER-MESONT/pair_mesont_tpm.cpp b/src/USER-MESONT/pair_mesont_tpm.cpp
index a58f9892ed..9185786341 100644
--- a/src/USER-MESONT/pair_mesont_tpm.cpp
+++ b/src/USER-MESONT/pair_mesont_tpm.cpp
@@ -1,808 +1,803 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-
-   Contributing author: Maxim Shugaev (UVA), mvs9t@virginia.edu
-------------------------------------------------------------------------- */
-
-#include "pair_mesont_tpm.h"
-#include "export_mesont.h"
-
-
-#include "atom.h"
-#include "comm.h"
-#include "force.h"
-#include "memory.h"
-#include "error.h"
-#include "neighbor.h"
-#include "neigh_list.h"
-#include "neigh_request.h"
-
-#include <cstring>
-#include <vector>
-#include <cmath>
-
-#include <fstream>
-#include <sstream>
-#include <algorithm>
-
-using namespace LAMMPS_NS;
-
-//since LAMMPS is compiled with C++ 2003, define a substitution for std::array
-template<typename T, int N>
-class array2003{
-public:
-  T& operator[] (int idx){ return data[idx];};
-  const T& operator[] (int idx) const{ return data[idx];};
-private:
-  T data[N];
-};
-
-
-class MESONTList {
-public:
-  MESONTList(const Atom* atom, const NeighList* nblist, double rc2);
-  ~MESONTList() {};
-  //list of segments
-  const std::vector<array2003<int,2> >& get_segments() const;
-  //list of triplets
-  const std::vector<array2003<int,3> >& get_triplets() const;
-  //list of neighbor chains [start,end] for segments
-  //(use idx() to get real indexes)
-  const std::vector<std::vector<array2003<int,2> > >& get_nbs() const;
-  //convert idx from sorted representation to real idx
-  int get_idx(int idx) const;
-  //return list of indexes for conversion from sorted representation
-  const std::vector<int>& get_idx_list() const;
-  //convert idx from real idx to sorted representation
-  int get_idxb(int idx) const;
-  //return list of indexes for conversion to sorted representation
-  const std::vector<int>& get_idxb_list() const;
-  //check if the node is the end of the tube
-  bool is_end(int idx) const;
-
-  array2003<int, 2> get_segment(int idx) const;
-  array2003<int, 3> get_triplet(int idx) const;
-
-  static const int cnt_end = -1;
-  static const int domain_end = -2;
-  static const int not_cnt = -3;
-private:
-  std::vector<array2003<int, 2> > chain_list, segments;
-  std::vector<array2003<int, 3> > triplets;
-  std::vector<std::vector<array2003<int, 2> > > nb_chains;
-  std::vector<int> index_list, index_list_b;
-};
-
-//=============================================================================
-
-inline const std::vector<std::vector<array2003<int, 2> > > &
- MESONTList::get_nbs() const {
-  return nb_chains;
-}
-
-inline int MESONTList::get_idx(int idx) const {
-  return index_list[idx];
-}
-
-inline const std::vector<int>& MESONTList::get_idx_list() const {
-  return index_list;
-};
-
-
-inline int MESONTList::get_idxb(int idx) const {
-  return index_list_b[idx];
-}
-
-inline const std::vector<int>& MESONTList::get_idxb_list() const {
-  return index_list_b;
-};
-
-inline const std::vector<array2003<int, 2> > & MESONTList::get_segments()
- const {
-  return segments;
-}
-
-inline const std::vector<array2003<int, 3> > & MESONTList::get_triplets()
- const {
-  return triplets;
-}
-
-inline array2003<int, 2> MESONTList::get_segment(int idx) const {
-  array2003<int, 2> result;
-  result[0] = chain_list[idx][0];
-  result[1] = idx;
-  return result;
-}
-
-inline array2003<int, 3> MESONTList::get_triplet(int idx) const {
-  array2003<int, 3> result;
-  result[0] = chain_list[idx][0];
-  result[1] = idx;
-  result[2] = chain_list[idx][1];
-  return result;
-}
-
-inline bool MESONTList::is_end(int idx) const {
-  return chain_list[idx][0] == cnt_end || chain_list[idx][1] == cnt_end;
-};
-
-template<typename T>
-void vector_union(std::vector<T>& v1, std::vector<T>& v2,
- std::vector<T>& merged) {
-  std::sort(v1.begin(), v1.end());
-  std::sort(v2.begin(), v2.end());
-  merged.reserve(v1.size() + v2.size());
-  typename std::vector<T>::iterator it1 = v1.begin();
-  typename std::vector<T>::iterator it2 = v2.begin();
-
-  while (it1 != v1.end() && it2 != v2.end()) {
-    if (*it1 < *it2) {
-      if (merged.empty() || merged.back() < *it1) merged.push_back(*it1);
-        ++it1;
-    }
-    else {
-      if (merged.empty() || merged.back() < *it2) merged.push_back(*it2);
-      ++it2;
-    }
-  }
-  while (it1 != v1.end()) {
-    if (merged.empty() || merged.back() < *it1) merged.push_back(*it1);
-    ++it1;
-  }
-
-  while (it2 != v2.end()) {
-  if (merged.empty() || merged.back() < *it2) merged.push_back(*it2);
-    ++it2;
-  }
-}
-
-MESONTList::MESONTList(const Atom* atom, const NeighList* nblist, double /* rc2 */){
-  if (atom == nullptr || nblist == nullptr) return;
-  //number of local atoms at the node
-  int nlocal = atom->nlocal;
-  //total number of atoms in the node and ghost shell
-  int nall = nblist->inum + nblist->gnum;
-  int ntot = atom->nlocal + atom->nghost;
-  tagint* const g_id = atom->tag;
-  tagint** const bonds = atom->bond_nt;
-  tagint* const chain_id = atom->molecule;
-  int* ilist = nblist->ilist;
-
-  //convert bonds to local id representation
-  array2003<int, 2> tmp_arr;
-  tmp_arr[0] = not_cnt; tmp_arr[1] = not_cnt;
-  chain_list.resize(ntot, tmp_arr);
-  for (int ii = 0; ii < nall; ii++) {
-    int i = ilist[ii];
-    chain_list[i][0] = domain_end;
-    chain_list[i][1] = domain_end;
-  }
-  for (int ii = 0; ii < nall; ii++) {
-    int i = ilist[ii];
-    int nnb = nblist->numneigh[i];
-    for (int m = 0; m < 2; m++)
-      if (bonds[i][m] == cnt_end) chain_list[i][m] = cnt_end;
-    for (int j = 0; j < nnb; j++) {
-      int nb = nblist->firstneigh[i][j];
-      if (bonds[i][0] == g_id[nb]){
-        chain_list[i][0] = nb;
-        chain_list[nb][1] = i;
-        break;
-      }
-    }
-  }
-
-  //reorder chains: index list
-  //list of indexes for conversion FROM reordered representation
-  index_list.reserve(nall);
-  index_list_b.resize(ntot, -1); // convert index TO reordered representation
-  for (int i = 0; i < ntot; i++) {
-    if (chain_list[i][0] == cnt_end || chain_list[i][0] == domain_end) {
-      index_list.push_back(i);
-      index_list_b[i] = index_list.size() - 1;
-      int idx = i;
-      while (1) {
-        idx = chain_list[idx][1];
-        if (idx == cnt_end || idx == domain_end) break;
-        else index_list.push_back(idx);
-        index_list_b[idx] = index_list.size() - 1;
-      }
-    }
-  }
-
-  //segment list
-  for (int i = 0; i < nlocal; i++) {
-    if (chain_list[i][0] == not_cnt) continue;
-    if (chain_list[i][0] != cnt_end && chain_list[i][0] != domain_end &&
-     g_id[i] < g_id[chain_list[i][0]]){
-      array2003<int, 2> tmp_c;
-      tmp_c[0] = i; tmp_c[1] = chain_list[i][0];
-      segments.push_back(tmp_c);
-    }
-    if (chain_list[i][1] != cnt_end && chain_list[i][1] != domain_end &&
-     g_id[i] < g_id[chain_list[i][1]]){
-      array2003<int, 2> tmp_c;
-       tmp_c[0] = i; tmp_c[1] = chain_list[i][1];
-       segments.push_back(tmp_c);
-    }
-  }
-  int nbonds = segments.size();
-
-  //triplets
-  for (int i = 0; i < nlocal; i++){
-    if (chain_list[i][0] == not_cnt) continue;
-    if (chain_list[i][0] != cnt_end && chain_list[i][0] != domain_end &&
-     chain_list[i][1] != cnt_end && chain_list[i][1] != domain_end)
-      triplets.push_back(get_triplet(i));
-  }
-
-  //segment neighbor list
-  nb_chains.resize(nbonds);
-  std::vector<int> nb_list_i[2], nb_list;
-  for (int i = 0; i < nbonds; i++) {
-    //union of nb lists
-    for (int m = 0; m < 2; m++) {
-      nb_list_i[m].resize(0);
-      int idx = segments[i][m];
-      if (idx >= nlocal) continue;
-      int nnb = nblist->numneigh[idx];
-      for (int j = 0; j < nnb; j++) {
-        int jdx = nblist->firstneigh[idx][j];
-        //no self interactions for nbs within the same tube
-        if (chain_id[jdx] == chain_id[idx] &&
-         std::abs(index_list_b[idx] - index_list_b[jdx]) <= 5) continue;
-        nb_list_i[m].push_back(index_list_b[jdx]);
-      }
-    }
-    vector_union(nb_list_i[0], nb_list_i[1], nb_list);
-
-    int nnb = nb_list.size();
-    if (nnb > 0) {
-      int idx_s = nb_list[0];
-      for (int j = 0; j < nnb; j++) {
-        //if nodes are not continuous in the sorted representation
-        //or represent chain ends, create a new neighbor chain
-        int idx_next = chain_list[index_list[nb_list[j]]][1];
-        if ((j == nnb - 1) || (nb_list[j] + 1 != nb_list[j+1]) ||
-         (idx_next == cnt_end) || (idx_next == domain_end)) {
-          array2003<int, 2> chain;
-          chain[0] = idx_s;
-          chain[1] = nb_list[j];
-          //make sure that segments having at least one node
-          //in the neighbor list are included
-          int idx0 = index_list[chain[0]]; // real id of the ends
-          int idx1 = index_list[chain[1]];
-          if (chain_list[idx0][0] != cnt_end &&
-           chain_list[idx0][0] != domain_end) chain[0] -= 1;
-          if (chain_list[idx1][1] != cnt_end &&
-           chain_list[idx1][1] != domain_end) chain[1] += 1;
-          if(chain[0] != chain[1]) nb_chains[i].push_back(chain);
-          idx_s = (j == nnb - 1) ? -1 : nb_list[j + 1];
-        }
-      }
-    }
-    nb_list.resize(0);
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
-// the cutoff distance between walls of tubes
-static const double TPBRcutoff  = 3.0*3.4;
-int PairMESONTTPM::instance_count = 0;
-/* ---------------------------------------------------------------------- */
-
-PairMESONTTPM::PairMESONTTPM(LAMMPS *lmp) : Pair(lmp) {
-  writedata=1;
-  BendingMode = 0;  // Harmonic bending model
-  TPMType = 0;      // Inter-tube segment-segment interaction
-  tab_path = nullptr;
-  tab_path_length = 0;
-
-  eatom_s = nullptr;
-  eatom_b = nullptr;
-  eatom_t = nullptr;
-  nmax = 0;
-  instance_count++;
-  if(instance_count > 1) error->all(FLERR,
-   "only a single instance of mesont/tpm pair style can be created");
-}
-
-/* ---------------------------------------------------------------------- */
-
-PairMESONTTPM::~PairMESONTTPM()
-{
-  if (allocated) {
-    memory->destroy(setflag);
-    memory->destroy(cutsq);
-    memory->destroy(cut);
-
-    memory->destroy(eatom_s);
-    memory->destroy(eatom_b);
-    memory->destroy(eatom_t);
-  }
-  instance_count--;
-  if (tab_path != nullptr) memory->destroy(tab_path);
-}
-
-/* ---------------------------------------------------------------------- */
-
-void PairMESONTTPM::compute(int eflag, int vflag){
-  // set per atom values and accumulators
-  // reallocate per-atom arrays if necessary
-  ev_init(eflag,vflag);
-  if (atom->nmax > nmax) {
-    memory->create(eatom_s,comm->nthreads*maxeatom,"pair:eatom_s");
-    memory->destroy(eatom_b);
-    memory->create(eatom_b,comm->nthreads*maxeatom,"pair:eatom_b");
-    memory->destroy(eatom_t);
-    memory->create(eatom_t,comm->nthreads*maxeatom,"pair:eatom_t");
-    nmax = atom->nmax;
-  }
-  //total number of atoms in the node and ghost shell
-  int nall = list->inum + list->gnum;
-  int ntot = atom->nlocal + atom->nghost;
-  int newton_pair = force->newton_pair;
-  if(!newton_pair)
-   error->all(FLERR,"Pair style mesont/tpm requires newton pair on");
-
-  double **x = atom->x;
-  double **f = atom->f;
-  double *r = atom->radius;
-  double *l = atom->length;
-  int *buckling = atom->buckling;
-  tagint *g_id = atom->tag;
-
-  //check if cutoff is chosen correctly
-  double RT = mesont_lib_get_R();
-  double Lmax = 0.0;
-  for (int ii = 0; ii < list->inum; ii++) {
-    int i = list->ilist[ii];
-    if (Lmax < l[i]) Lmax = l[i];
-  }
-  double Rcut_min = std::max(2.0*Lmax, std::sqrt(0.5*Lmax*Lmax +
-   std::pow((2.0*RT + TPBRcutoff),2)));
-  if (cut_global < Rcut_min){
-    std::stringstream err;
-    err << "The selected cutoff is too small for the current system : " <<
-     "L_max = " << Lmax << ", R_max = " << RT << ", Rc = " << cut_global <<
-     ", Rcut_min = " << Rcut_min;
-    error->all(FLERR, err.str().c_str());
-  }
-
-  //generate bonds and chain nblist
-  MESONTList ntlist(atom, list, cut_global*cut_global);
-
-  //reorder data to make it contiguous within tubes
-  //and compatible with Fortran functions
-  std::vector<double> x_sort(3*nall), f_sort(3*nall), s_sort(9*nall);
-  std::vector<double> u_ts_sort(nall), u_tb_sort(nall), u_tt_sort(nall);
-  std::vector<int> b_sort(nall);
-  for (int i = 0; i < nall; i++){
-    int idx = ntlist.get_idx(i);
-    for (int j = 0; j < 3; j++) x_sort[3*i+j] = x[idx][j];
-    b_sort[i] = buckling[idx];
-  }
-
-  //bending potential
-  int n_triplets = ntlist.get_triplets().size();
-  for (int i = 0; i < n_triplets; i++) {
-    const array2003<int,3>& t = ntlist.get_triplets()[i];
-    //idx of nodes of a triplet in sorted representation
-    int idx_s0 = ntlist.get_idxb(t[0]);
-    int idx_s1 = ntlist.get_idxb(t[1]);
-    int idx_s2 = ntlist.get_idxb(t[2]);
-
-    double* X1 = &(x_sort[3*idx_s0]);
-    double* X2 = &(x_sort[3*idx_s1]);
-    double* X3 = &(x_sort[3*idx_s2]);
-    double& U1b = u_tb_sort[idx_s0];
-    double& U2b = u_tb_sort[idx_s1];
-    double& U3b = u_tb_sort[idx_s2];
-    double* F1 = &(f_sort[3*idx_s0]);
-    double* F2 = &(f_sort[3*idx_s1]);
-    double* F3 = &(f_sort[3*idx_s2]);
-    double* S1 = &(s_sort[9*idx_s0]);
-    double* S2 = &(s_sort[9*idx_s1]);
-    double* S3 = &(s_sort[9*idx_s2]);
-    double& R123 = r[t[1]];
-    double& L123 = l[t[1]];
-    int& BBF2 = b_sort[idx_s1];
-
-    mesont_lib_TubeBendingForceField(U1b, U2b, U3b, F1, F2, F3, S1, S2, S3,
-     X1, X2, X3, R123, L123, BBF2);
-  }
-
-  //share new values of buckling
-  if (BendingMode == 1){
-    for (int i = 0; i < nall; i++){
-      int idx = ntlist.get_idx(i);
-      buckling[idx] = b_sort[i];
-    }
-    comm->forward_comm_pair(this);
-    for (int i = 0; i < nall; i++){
-      int idx = ntlist.get_idx(i);
-      b_sort[i] = buckling[idx];
-    }
-  }
-
-  //segment-segment and segment-tube interactions
-  int n_segments = ntlist.get_segments().size();
-  double Rmax = 0.0;
-  Lmax = 0.0;
-  for (int i = 0; i < n_segments; i++) {
-    const array2003<int,2>& s = ntlist.get_segments()[i];
-    //idx of a segment end 1 in sorted representation
-    int idx_s0 = ntlist.get_idxb(s[0]);
-    //idx of a segment end 2 in sorted representation
-    int idx_s1 = ntlist.get_idxb(s[1]);
-    double* X1 = &(x_sort[3*idx_s0]);
-    double* X2 = &(x_sort[3*idx_s1]);
-    double length = std::sqrt(std::pow(X1[0]-X2[0],2) +
-     std::pow(X1[1]-X2[1],2) + std::pow(X1[2]-X2[2],2));
-    if (length > Lmax) Lmax = length;
-    double& U1t = u_tt_sort[idx_s0];
-    double& U2t = u_tt_sort[idx_s1];
-    double& U1s = u_ts_sort[idx_s0];
-    double& U2s = u_ts_sort[idx_s1];
-    double* F1 = &(f_sort[3*idx_s0]);
-    double* F2 = &(f_sort[3*idx_s1]);
-    double* S1 = &(s_sort[9*idx_s0]);
-    double* S2 = &(s_sort[9*idx_s1]);
-    double R12 = r[s[0]]; if (R12 > Rmax) Rmax = R12;
-    if (std::abs(R12 - RT) > 1e-3)
-        error->all(FLERR,"Inconsistent input and potential table");
-    //assume that the length of the segment is defined by the node with
-    //smallest global id
-    double L12 = (g_id[s[0]] > g_id[s[1]]) ? l[s[1]] : l[s[0]];
-    mesont_lib_TubeStretchingForceField(U1s, U2s, F1, F2, S1, S2, X1, X2,
-     R12, L12);
-
-    for (int nc = 0; nc < (int)ntlist.get_nbs()[i].size(); nc++){
-      //id of the beginning and end of the chain in the sorted representation
-      const array2003<int,2>& chain = ntlist.get_nbs()[i][nc];
-      int N = chain[1] - chain[0] + 1;  //number of elements in the chain
-      int end1 = ntlist.get_idx(chain[0]);  //chain ends (real representation)
-      int end2 = ntlist.get_idx(chain[1]);
-      double* X = &(x_sort[3*chain[0]]);
-      double* Ut = &(u_tt_sort[chain[0]]);
-      double* F = &(f_sort[3*chain[0]]);
-      double* S = &(s_sort[9*chain[0]]);
-      double R = r[end1];
-      int* BBF = &(b_sort[chain[0]]);
-      int E1 = ntlist.is_end(end1);
-      int E2 = ntlist.is_end(end2);
-
-      int Ee = 0;
-      double* Xe = X; double* Fe = F; double* Se = S;
-      if (!E1 && ntlist.get_triplet(end1)[0] != MESONTList::domain_end &&
-       ntlist.get_triplet(ntlist.get_triplet(end1)[0])[0] ==
-       MESONTList::cnt_end){
-        Ee = 1;
-        int idx = ntlist.get_idxb(ntlist.get_triplet(end1)[0]);
-        Xe = &(x_sort[3*idx]);
-        Fe = &(f_sort[3*idx]);
-        Se = &(s_sort[9*idx]);
-      }
-      else if (!E2 && ntlist.get_triplet(end2)[2] != MESONTList::domain_end &&
-       ntlist.get_triplet(ntlist.get_triplet(end2)[2])[2] ==
-       MESONTList::cnt_end){
-        Ee = 2;
-        int idx = ntlist.get_idxb(ntlist.get_triplet(end2)[2]);
-        Xe = &(x_sort[3*idx]);
-        Fe = &(f_sort[3*idx]);
-        Se = &(s_sort[9*idx]);
-      }
-
-      mesont_lib_SegmentTubeForceField(U1t, U2t, Ut, F1, F2, F, Fe, S1, S2, S,
-       Se, X1, X2, R12, N, X, Xe, BBF, R, E1, E2, Ee, TPMType);
-    }
-  }
-
-  //check if cutoff is chosen correctly
-  Rcut_min = std::max(2.0*Lmax, std::sqrt(0.5*Lmax*Lmax +
-   std::pow((2.0*Rmax + TPBRcutoff),2)));
-  if (cut_global < Rcut_min){
-    std::stringstream err;
-    err << "The selected cutoff is too small for the current system : " <<
-     "L_max = " << Lmax << ", R_max = " << RT << ", Rc = " << cut_global <<
-     ", Rcut_min = " << Rcut_min;
-    error->all(FLERR, err.str().c_str());
-  }
-
-  //convert from sorted representation
-  for (int i = 0; i < nall; i++){
-      int idx = ntlist.get_idx(i);
-      for (int j = 0; j < 3; j++) f[idx][j] += f_sort[3*i+j];
-      buckling[idx] = b_sort[i];
-  }
-  if(eflag){
-    eng_vdwl = 0.0; energy_s = 0.0;
-    energy_b = 0.0; energy_t = 0.0;
-    for (int i = 0; i < ntot; i++){
-      eatom[i] = 0.0; eatom_s[i] = 0.0;
-      eatom_b[i] = 0.0; eatom_t[i] = 0.0;
-    }
-    for (int i = 0; i < nall; i++){
-      int idx = ntlist.get_idx(i);
-      eatom_s[idx] = u_ts_sort[i];
-      eatom_b[idx] = u_tb_sort[i];
-      eatom_t[idx] = u_tt_sort[i];
-      eatom[idx] = u_ts_sort[i] + u_tb_sort[i] + u_tt_sort[i];
-      energy_s += u_ts_sort[i];
-      energy_b += u_tb_sort[i];
-      energy_t += u_tt_sort[i];
-    }
-    eng_vdwl = energy_s + energy_b + energy_t;
-  }
-  if(vflag){
-    for (int i = 0; i < 6; i++) virial[i] = 0.0;
-    for (int i = 0; i < nall; i++){
-      int idx = ntlist.get_idx(i);
-      virial[0] += s_sort[9*i+0]; //xx
-      virial[1] += s_sort[9*i+4]; //yy
-      virial[2] += s_sort[9*i+8]; //zz
-      virial[3] += s_sort[9*i+1]; //xy
-      virial[4] += s_sort[9*i+2]; //xz
-      virial[5] += s_sort[9*i+5]; //yz
-    }
-  }
-  int vflag_atom = vflag & 4;
-  if(vflag_atom){
-    for (int i = 0; i < ntot; i++)
-      for (int j = 0; j < 6; j++) vatom[i][j] = 0.0;
-    for (int i = 0; i < nall; i++){
-      int idx = ntlist.get_idx(i);
-      vatom[idx][0] = s_sort[9*i+0]; //xx
-      vatom[idx][1] = s_sort[9*i+4]; //yy
-      vatom[idx][2] = s_sort[9*i+8]; //zz
-      vatom[idx][3] = s_sort[9*i+1]; //xy
-      vatom[idx][4] = s_sort[9*i+2]; //xz
-      vatom[idx][5] = s_sort[9*i+5]; //yz
-    }
-  }
-
-}
-
-/* ----------------------------------------------------------------------
-   allocate all arrays
-------------------------------------------------------------------------- */
-
-void PairMESONTTPM::allocate(){
-  allocated = 1;
-  int n = atom->ntypes;
-
-  memory->create(setflag,n+1,n+1,"pair:setflag");
-  for (int i = 1; i <= n; i++)
-    for (int j = i; j <= n; j++)
-      setflag[i][j] = 0;
-
-  memory->create(cutsq,n+1,n+1,"pair:cutsq");
-  memory->create(cut,n+1,n+1,"pair:cut");
-}
-
-/* ----------------------------------------------------------------------
-   global settings
-------------------------------------------------------------------------- */
-
-void PairMESONTTPM::settings(int narg, char **arg){
-  if ((narg == 0) || (narg > 4))
-    error->all(FLERR,"Illegal pair_style command");
-  cut_global = utils::numeric(FLERR,arg[0],false,lmp);
-
-  // reset cutoffs that have been explicitly set
-  if (allocated) {
-    int i,j;
-    for (i = 1; i <= atom->ntypes; i++)
-      for (j = i+1; j <= atom->ntypes; j++)
-        cut[i][j] = cut_global;
-  }
-  std::string TPMAFile = (narg > 1) ? arg[1] : "MESONT-TABTP.xrs";
-  tab_path_length = TPMAFile.length();
-  if (tab_path != nullptr) memory->destroy(tab_path);
-  //c_str returns '\0' terminated string
-  memory->create(tab_path,tab_path_length+1,"pair:path");
-  std::memcpy(tab_path, TPMAFile.c_str(), tab_path_length+1);
-  mesont_lib_SetTablePath(tab_path, tab_path_length);
-
-  if (narg > 2) {
-    BendingMode = utils::numeric(FLERR,arg[2],false,lmp);
-    if ((BendingMode < 0) || (BendingMode > 1))
-      error->all(FLERR,"Incorrect BendingMode");
-  }
-  if (narg > 3) {
-    TPMType = utils::numeric(FLERR,arg[3],false,lmp);
-    if ((TPMType < 0) || (TPMType > 1))
-      error->all(FLERR,"Incorrect TPMType");
-  }
-
-  mesont_lib_TPBInit();
-  int M, N;
-  std::ifstream in(TPMAFile);
-  if (!in.is_open()) error->all(FLERR,"Incorrect table path");
-  std::string tmp;
-  std::getline(in,tmp);
-  std::getline(in,tmp);
-  std::getline(in,tmp);
-  in >> M >> N;
-  in.close();
-  mesont_lib_TPMInit(M, N);
-  mesont_lib_InitCNTPotModule(1, 3, 0, BendingMode, mesont_lib_get_R());
-}
-
-/* ----------------------------------------------------------------------
-   set coeffs for one or more type pairs
-------------------------------------------------------------------------- */
-
-void PairMESONTTPM::coeff(int narg, char **arg){
-  if ((narg < 2) || (narg > 3))
-    error->all(FLERR,"Incorrect args for pair coefficients");
-
-  if (!allocated) allocate();
-
-  int ilo,ihi,jlo,jhi;
-  utils::bounds(FLERR,arg[0],1,atom->ntypes,ilo,ihi,error);
-  utils::bounds(FLERR,arg[1],1,atom->ntypes,jlo,jhi,error);
-
-  double cut_one = cut_global;
-  if (narg == 3) cut_one = utils::numeric(FLERR,arg[2],false,lmp);
-
-  int count = 0;
-  for (int i = ilo; i <= ihi; i++) {
-    for (int j = MAX(jlo,i); j <= jhi; j++) {
-      cut[i][j] = cut_one;
-      setflag[i][j] = 1;
-      count++;
-    }
-  }
-
-  if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
-}
-
-/* ----------------------------------------------------------------------
-   init for one type pair i,j and corresponding j,i
-------------------------------------------------------------------------- */
-
-double PairMESONTTPM::init_one(int i, int j){
-  if (setflag[i][j] == 0) {
-    cut[i][j] = mix_distance(cut[i][i],cut[j][j]);
-  }
-
-  return cut[i][j];
-}
-
-/* ----------------------------------------------------------------------
-   proc 0 writes to restart file
-------------------------------------------------------------------------- */
-
-void PairMESONTTPM::write_restart(FILE *fp){
-  write_restart_settings(fp);
-
-  int i,j;
-  for (i = 1; i <= atom->ntypes; i++)
-    for (j = i; j <= atom->ntypes; j++) {
-      fwrite(&setflag[i][j],sizeof(int),1,fp);
-      if (setflag[i][j]) {
-        fwrite(&cut[i][j],sizeof(double),1,fp);
-      }
-    }
-}
-
-/* ----------------------------------------------------------------------
-   proc 0 reads from restart file, bcasts
-------------------------------------------------------------------------- */
-
-void PairMESONTTPM::read_restart(FILE *fp){
-  read_restart_settings(fp);
-  allocate();
-
-  int i,j;
-  int me = comm->me;
-  for (i = 1; i <= atom->ntypes; i++)
-    for (j = i; j <= atom->ntypes; j++) {
-      if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
-      MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
-      if (setflag[i][j]) {
-        if (me == 0) {
-          fread(&cut[i][j],sizeof(double),1,fp);
-        }
-        MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
-      }
-    }
-}
-
-/* ----------------------------------------------------------------------
-   proc 0 writes to restart file
-------------------------------------------------------------------------- */
-
-void PairMESONTTPM::write_restart_settings(FILE *fp){
-  fwrite(&BendingMode,sizeof(int),1,fp);
-  fwrite(&TPMType,sizeof(int),1,fp);
-  fwrite(&cut_global,sizeof(double),1,fp);
-  fwrite(&tab_path_length,sizeof(int),1,fp);
-  fwrite(tab_path,tab_path_length+1,1,fp);
-}
-
-/* ----------------------------------------------------------------------
-   proc 0 reads from restart file, bcasts
-------------------------------------------------------------------------- */
-
-void PairMESONTTPM::read_restart_settings(FILE *fp){
-  int me = comm->me;
-  if (me == 0) {
-    fread(&BendingMode,sizeof(int),1,fp);
-    fread(&TPMType,sizeof(int),1,fp);
-    fread(&cut_global,sizeof(double),1,fp);
-    fread(&tab_path_length,sizeof(int),1,fp);
-  }
-  MPI_Bcast(&BendingMode,1,MPI_INT,0,world);
-  MPI_Bcast(&TPMType,1,MPI_INT,0,world);
-  MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world);
-  MPI_Bcast(&tab_path_length,1,MPI_INT,0,world);
-
-  if (tab_path != nullptr) memory->destroy(tab_path);
-  memory->create(tab_path,tab_path_length+1,"pair:path");
-  if (me == 0) fread(tab_path,tab_path_length+1,1,fp);
-  MPI_Bcast(tab_path,tab_path_length+1,MPI_CHAR,0,world);
-  mesont_lib_SetTablePath(tab_path,tab_path_length);
-  mesont_lib_TPBInit();
-  int M, N;
-  std::ifstream in(tab_path);
-  if (!in.is_open()) error->all(FLERR,"Incorrect table path");
-  std::string tmp;
-  std::getline(in,tmp);
-  std::getline(in,tmp);
-  std::getline(in,tmp);
-  in >> M >> N;
-  in.close();
-  mesont_lib_TPMInit(M, N);
-  mesont_lib_InitCNTPotModule(1, 3, 0, BendingMode, mesont_lib_get_R());
-}
-
-/* ----------------------------------------------------------------------
-   proc 0 writes to data file
-------------------------------------------------------------------------- */
-
-void PairMESONTTPM::write_data(FILE *fp){
-  for (int i = 1; i <= atom->ntypes; i++)
-    fprintf(fp,"%d\n",i);
-}
-
-/* ----------------------------------------------------------------------
-   proc 0 writes all pairs to data file
-------------------------------------------------------------------------- */
-
-void PairMESONTTPM::write_data_all(FILE *fp){
-  for (int i = 1; i <= atom->ntypes; i++)
-    for (int j = i; j <= atom->ntypes; j++)
-      fprintf(fp,"%d %d %g\n",i,j,cut[i][j]);
-}
-
-/* ---------------------------------------------------------------------- */
-
-void PairMESONTTPM::init_style(){
-  //make sure that a full list is created (including ghost nodes)
-  int r = neighbor->request(this,instance_me);
-  neighbor->requests[r]->half = false;
-  neighbor->requests[r]->full = true;
-  neighbor->requests[r]->ghost = true;
-}
-
-void* PairMESONTTPM::extract(const char *str, int &){
-  if (strcmp(str,"mesonttpm_Es_tot") == 0) return &energy_s;
-  else if (strcmp(str,"mesonttpm_Eb_tot") == 0) return &energy_b;
-  else if (strcmp(str,"mesonttpm_Et_tot") == 0) return &energy_t;
-  else if (strcmp(str,"mesonttpm_Es") == 0) return eatom_s;
-  else if (strcmp(str,"mesonttpm_Eb") == 0) return eatom_b;
-  else if (strcmp(str,"mesonttpm_Et") == 0) return eatom_t;
-  else return nullptr;
-};
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://lammps.sandia.gov/, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+
+   Contributing author: Maxim Shugaev (UVA), mvs9t@virginia.edu
+------------------------------------------------------------------------- */
+
+#include "pair_mesont_tpm.h"
+#include "export_mesont.h"
+
+
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "memory.h"
+#include "error.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "neigh_request.h"
+
+#include <cstring>
+#include <vector>
+#include <cmath>
+
+#include <fstream>
+#include <sstream>
+#include <algorithm>
+
+using namespace LAMMPS_NS;
+
+//since LAMMPS is compiled with C++ 2003, define a substitution for std::array
+template<typename T, int N>
+class array2003{
+public:
+  T& operator[] (int idx){ return data[idx];};
+  const T& operator[] (int idx) const{ return data[idx];};
+private:
+  T data[N];
+};
+
+
+class MESONTList {
+public:
+  MESONTList(const Atom* atom, const NeighList* nblist, double rc2);
+  ~MESONTList() {};
+  //list of segments
+  const std::vector<array2003<int,2> >& get_segments() const;
+  //list of triplets
+  const std::vector<array2003<int,3> >& get_triplets() const;
+  //list of neighbor chains [start,end] for segments
+  //(use idx() to get real indexes)
+  const std::vector<std::vector<array2003<int,2> > >& get_nbs() const;
+  //convert idx from sorted representation to real idx
+  int get_idx(int idx) const;
+  //return list of indexes for conversion from sorted representation
+  const std::vector<int>& get_idx_list() const;
+  //convert idx from real idx to sorted representation
+  int get_idxb(int idx) const;
+  //return list of indexes for conversion to sorted representation
+  const std::vector<int>& get_idxb_list() const;
+  //check if the node is the end of the tube
+  bool is_end(int idx) const;
+
+  array2003<int, 2> get_segment(int idx) const;
+  array2003<int, 3> get_triplet(int idx) const;
+
+  static const int cnt_end = -1;
+  static const int domain_end = -2;
+  static const int not_cnt = -3;
+private:
+  std::vector<array2003<int, 2> > chain_list, segments;
+  std::vector<array2003<int, 3> > triplets;
+  std::vector<std::vector<array2003<int, 2> > > nb_chains;
+  std::vector<int> index_list, index_list_b;
+};
+
+//=============================================================================
+
+inline const std::vector<std::vector<array2003<int, 2> > > &
+ MESONTList::get_nbs() const {
+  return nb_chains;
+}
+
+inline int MESONTList::get_idx(int idx) const {
+  return index_list[idx];
+}
+
+inline const std::vector<int>& MESONTList::get_idx_list() const {
+  return index_list;
+};
+
+
+inline int MESONTList::get_idxb(int idx) const {
+  return index_list_b[idx];
+}
+
+inline const std::vector<int>& MESONTList::get_idxb_list() const {
+  return index_list_b;
+};
+
+inline const std::vector<array2003<int, 2> > & MESONTList::get_segments()
+ const {
+  return segments;
+}
+
+inline const std::vector<array2003<int, 3> > & MESONTList::get_triplets()
+ const {
+  return triplets;
+}
+
+inline array2003<int, 2> MESONTList::get_segment(int idx) const {
+  array2003<int, 2> result;
+  result[0] = chain_list[idx][0];
+  result[1] = idx;
+  return result;
+}
+
+inline array2003<int, 3> MESONTList::get_triplet(int idx) const {
+  array2003<int, 3> result;
+  result[0] = chain_list[idx][0];
+  result[1] = idx;
+  result[2] = chain_list[idx][1];
+  return result;
+}
+
+inline bool MESONTList::is_end(int idx) const {
+  return chain_list[idx][0] == cnt_end || chain_list[idx][1] == cnt_end;
+};
+
+template<typename T>
+void vector_union(std::vector<T>& v1, std::vector<T>& v2,
+ std::vector<T>& merged) {
+  std::sort(v1.begin(), v1.end());
+  std::sort(v2.begin(), v2.end());
+  merged.reserve(v1.size() + v2.size());
+  typename std::vector<T>::iterator it1 = v1.begin();
+  typename std::vector<T>::iterator it2 = v2.begin();
+
+  while (it1 != v1.end() && it2 != v2.end()) {
+    if (*it1 < *it2) {
+      if (merged.empty() || merged.back() < *it1) merged.push_back(*it1);
+        ++it1;
+    }
+    else {
+      if (merged.empty() || merged.back() < *it2) merged.push_back(*it2);
+      ++it2;
+    }
+  }
+  while (it1 != v1.end()) {
+    if (merged.empty() || merged.back() < *it1) merged.push_back(*it1);
+    ++it1;
+  }
+
+  while (it2 != v2.end()) {
+  if (merged.empty() || merged.back() < *it2) merged.push_back(*it2);
+    ++it2;
+  }
+}
+
+MESONTList::MESONTList(const Atom* atom, const NeighList* nblist, double /* rc2 */){
+  if (atom == nullptr || nblist == nullptr) return;
+  //number of local atoms at the node
+  int nlocal = atom->nlocal;
+  //total number of atoms in the node and ghost shell
+  int nall = nblist->inum + nblist->gnum;
+  int ntot = atom->nlocal + atom->nghost;
+  tagint* const g_id = atom->tag;
+  tagint** const bonds = atom->bond_nt;
+  tagint* const chain_id = atom->molecule;
+  int* ilist = nblist->ilist;
+
+  //convert bonds to local id representation
+  array2003<int, 2> tmp_arr;
+  tmp_arr[0] = not_cnt; tmp_arr[1] = not_cnt;
+  chain_list.resize(ntot, tmp_arr);
+  for (int ii = 0; ii < nall; ii++) {
+    int i = ilist[ii];
+    chain_list[i][0] = domain_end;
+    chain_list[i][1] = domain_end;
+  }
+  for (int ii = 0; ii < nall; ii++) {
+    int i = ilist[ii];
+    int nnb = nblist->numneigh[i];
+    for (int m = 0; m < 2; m++)
+      if (bonds[i][m] == cnt_end) chain_list[i][m] = cnt_end;
+    for (int j = 0; j < nnb; j++) {
+      int nb = nblist->firstneigh[i][j];
+      if (bonds[i][0] == g_id[nb]){
+        chain_list[i][0] = nb;
+        chain_list[nb][1] = i;
+        break;
+      }
+    }
+  }
+
+  //reorder chains: index list
+  //list of indexes for conversion FROM reordered representation
+  index_list.reserve(nall);
+  index_list_b.resize(ntot, -1); // convert index TO reordered representation
+  for (int i = 0; i < ntot; i++) {
+    if (chain_list[i][0] == cnt_end || chain_list[i][0] == domain_end) {
+      index_list.push_back(i);
+      index_list_b[i] = index_list.size() - 1;
+      int idx = i;
+      while (1) {
+        idx = chain_list[idx][1];
+        if (idx == cnt_end || idx == domain_end) break;
+        else index_list.push_back(idx);
+        index_list_b[idx] = index_list.size() - 1;
+      }
+    }
+  }
+
+  //segment list
+  for (int i = 0; i < nlocal; i++) {
+    if (chain_list[i][0] == not_cnt) continue;
+    if (chain_list[i][0] != cnt_end && chain_list[i][0] != domain_end &&
+     g_id[i] < g_id[chain_list[i][0]]){
+      array2003<int, 2> tmp_c;
+      tmp_c[0] = i; tmp_c[1] = chain_list[i][0];
+      segments.push_back(tmp_c);
+    }
+    if (chain_list[i][1] != cnt_end && chain_list[i][1] != domain_end &&
+     g_id[i] < g_id[chain_list[i][1]]){
+      array2003<int, 2> tmp_c;
+       tmp_c[0] = i; tmp_c[1] = chain_list[i][1];
+       segments.push_back(tmp_c);
+    }
+  }
+  int nbonds = segments.size();
+
+  //triplets
+  for (int i = 0; i < nlocal; i++){
+    if (chain_list[i][0] == not_cnt) continue;
+    if (chain_list[i][0] != cnt_end && chain_list[i][0] != domain_end &&
+     chain_list[i][1] != cnt_end && chain_list[i][1] != domain_end)
+      triplets.push_back(get_triplet(i));
+  }
+
+  //segment neighbor list
+  nb_chains.resize(nbonds);
+  std::vector<int> nb_list_i[2], nb_list;
+  for (int i = 0; i < nbonds; i++) {
+    //union of nb lists
+    for (int m = 0; m < 2; m++) {
+      nb_list_i[m].resize(0);
+      int idx = segments[i][m];
+      if (idx >= nlocal) continue;
+      int nnb = nblist->numneigh[idx];
+      for (int j = 0; j < nnb; j++) {
+        int jdx = nblist->firstneigh[idx][j];
+        //no self interactions for nbs within the same tube
+        if (chain_id[jdx] == chain_id[idx] &&
+         std::abs(index_list_b[idx] - index_list_b[jdx]) <= 5) continue;
+        nb_list_i[m].push_back(index_list_b[jdx]);
+      }
+    }
+    vector_union(nb_list_i[0], nb_list_i[1], nb_list);
+
+    int nnb = nb_list.size();
+    if (nnb > 0) {
+      int idx_s = nb_list[0];
+      for (int j = 0; j < nnb; j++) {
+        //if nodes are not continuous in the sorted representation
+        //or represent chain ends, create a new neighbor chain
+        int idx_next = chain_list[index_list[nb_list[j]]][1];
+        if ((j == nnb - 1) || (nb_list[j] + 1 != nb_list[j+1]) ||
+         (idx_next == cnt_end) || (idx_next == domain_end)) {
+          array2003<int, 2> chain;
+          chain[0] = idx_s;
+          chain[1] = nb_list[j];
+          //make sure that segments having at least one node
+          //in the neighbor list are included
+          int idx0 = index_list[chain[0]]; // real id of the ends
+          int idx1 = index_list[chain[1]];
+          if (chain_list[idx0][0] != cnt_end &&
+           chain_list[idx0][0] != domain_end) chain[0] -= 1;
+          if (chain_list[idx1][1] != cnt_end &&
+           chain_list[idx1][1] != domain_end) chain[1] += 1;
+          if(chain[0] != chain[1]) nb_chains[i].push_back(chain);
+          idx_s = (j == nnb - 1) ? -1 : nb_list[j + 1];
+        }
+      }
+    }
+    nb_list.resize(0);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+// the cutoff distance between walls of tubes
+static const double TPBRcutoff  = 3.0*3.4;
+int PairMESONTTPM::instance_count = 0;
+/* ---------------------------------------------------------------------- */
+
+PairMESONTTPM::PairMESONTTPM(LAMMPS *lmp) : Pair(lmp) {
+  writedata=1;
+  BendingMode = 0;  // Harmonic bending model
+  TPMType = 0;      // Inter-tube segment-segment interaction
+  tab_path = nullptr;
+  tab_path_length = 0;
+
+  eatom_s = nullptr;
+  eatom_b = nullptr;
+  eatom_t = nullptr;
+  instance_count++;
+  if(instance_count > 1) error->all(FLERR,
+   "only a single instance of mesont/tpm pair style can be created");
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairMESONTTPM::~PairMESONTTPM()
+{
+  if (allocated) {
+    memory->destroy(setflag);
+    memory->destroy(cutsq);
+    memory->destroy(cut);
+
+    memory->destroy(eatom_s);
+    memory->destroy(eatom_b);
+    memory->destroy(eatom_t);
+  }
+  instance_count--;
+  if (tab_path != nullptr) memory->destroy(tab_path);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairMESONTTPM::compute(int eflag, int vflag){
+  ev_init(eflag,vflag);
+  //total number of atoms in the node and ghost shell
+  int nall = list->inum + list->gnum;
+  int ntot = atom->nlocal + atom->nghost;
+  int newton_pair = force->newton_pair;
+  if(!newton_pair)
+   error->all(FLERR,"Pair style mesont/tpm requires newton pair on");
+
+  double **x = atom->x;
+  double **f = atom->f;
+  double *r = atom->radius;
+  double *l = atom->length;
+  int *buckling = atom->buckling;
+  tagint *g_id = atom->tag;
+
+  //check if cutoff is chosen correctly
+  double RT = mesont_lib_get_R();
+  double Lmax = 0.0;
+  for (int ii = 0; ii < list->inum; ii++) {
+    int i = list->ilist[ii];
+    if (Lmax < l[i]) Lmax = l[i];
+  }
+  double Rcut_min = std::max(2.0*Lmax, std::sqrt(0.5*Lmax*Lmax +
+   std::pow((2.0*RT + TPBRcutoff),2)));
+  if (cut_global < Rcut_min){
+    std::stringstream err;
+    err << "The selected cutoff is too small for the current system : " <<
+     "L_max = " << Lmax << ", R_max = " << RT << ", Rc = " << cut_global <<
+     ", Rcut_min = " << Rcut_min;
+    error->all(FLERR, err.str().c_str());
+  }
+
+  //generate bonds and chain nblist
+  MESONTList ntlist(atom, list, cut_global*cut_global);
+
+  //reorder data to make it contiguous within tubes
+  //and compatible with Fortran functions
+  std::vector<double> x_sort(3*nall), f_sort(3*nall), s_sort(9*nall);
+  std::vector<double> u_ts_sort(nall), u_tb_sort(nall), u_tt_sort(nall);
+  std::vector<int> b_sort(nall);
+  for (int i = 0; i < nall; i++){
+    int idx = ntlist.get_idx(i);
+    for (int j = 0; j < 3; j++) x_sort[3*i+j] = x[idx][j];
+    b_sort[i] = buckling[idx];
+  }
+
+  //bending potential
+  int n_triplets = ntlist.get_triplets().size();
+  for (int i = 0; i < n_triplets; i++) {
+    const array2003<int,3>& t = ntlist.get_triplets()[i];
+    //idx of nodes of a triplet in sorted representation
+    int idx_s0 = ntlist.get_idxb(t[0]);
+    int idx_s1 = ntlist.get_idxb(t[1]);
+    int idx_s2 = ntlist.get_idxb(t[2]);
+
+    double* X1 = &(x_sort[3*idx_s0]);
+    double* X2 = &(x_sort[3*idx_s1]);
+    double* X3 = &(x_sort[3*idx_s2]);
+    double& U1b = u_tb_sort[idx_s0];
+    double& U2b = u_tb_sort[idx_s1];
+    double& U3b = u_tb_sort[idx_s2];
+    double* F1 = &(f_sort[3*idx_s0]);
+    double* F2 = &(f_sort[3*idx_s1]);
+    double* F3 = &(f_sort[3*idx_s2]);
+    double* S1 = &(s_sort[9*idx_s0]);
+    double* S2 = &(s_sort[9*idx_s1]);
+    double* S3 = &(s_sort[9*idx_s2]);
+    double& R123 = r[t[1]];
+    double& L123 = l[t[1]];
+    int& BBF2 = b_sort[idx_s1];
+
+    mesont_lib_TubeBendingForceField(U1b, U2b, U3b, F1, F2, F3, S1, S2, S3,
+     X1, X2, X3, R123, L123, BBF2);
+  }
+
+  //share new values of buckling
+  if (BendingMode == 1){
+    for (int i = 0; i < nall; i++){
+      int idx = ntlist.get_idx(i);
+      buckling[idx] = b_sort[i];
+    }
+    comm->forward_comm_pair(this);
+    for (int i = 0; i < nall; i++){
+      int idx = ntlist.get_idx(i);
+      b_sort[i] = buckling[idx];
+    }
+  }
+
+  //segment-segment and segment-tube interactions
+  int n_segments = ntlist.get_segments().size();
+  double Rmax = 0.0;
+  Lmax = 0.0;
+  for (int i = 0; i < n_segments; i++) {
+    const array2003<int,2>& s = ntlist.get_segments()[i];
+    //idx of a segment end 1 in sorted representation
+    int idx_s0 = ntlist.get_idxb(s[0]);
+    //idx of a segment end 2 in sorted representation
+    int idx_s1 = ntlist.get_idxb(s[1]);
+    double* X1 = &(x_sort[3*idx_s0]);
+    double* X2 = &(x_sort[3*idx_s1]);
+    double length = std::sqrt(std::pow(X1[0]-X2[0],2) +
+     std::pow(X1[1]-X2[1],2) + std::pow(X1[2]-X2[2],2));
+    if (length > Lmax) Lmax = length;
+    double& U1t = u_tt_sort[idx_s0];
+    double& U2t = u_tt_sort[idx_s1];
+    double& U1s = u_ts_sort[idx_s0];
+    double& U2s = u_ts_sort[idx_s1];
+    double* F1 = &(f_sort[3*idx_s0]);
+    double* F2 = &(f_sort[3*idx_s1]);
+    double* S1 = &(s_sort[9*idx_s0]);
+    double* S2 = &(s_sort[9*idx_s1]);
+    double R12 = r[s[0]]; if (R12 > Rmax) Rmax = R12;
+    if (std::abs(R12 - RT) > 1e-3)
+        error->all(FLERR,"Inconsistent input and potential table");
+    //assume that the length of the segment is defined by the node with
+    //smallest global id
+    double L12 = (g_id[s[0]] > g_id[s[1]]) ? l[s[1]] : l[s[0]];
+    mesont_lib_TubeStretchingForceField(U1s, U2s, F1, F2, S1, S2, X1, X2,
+     R12, L12);
+
+    for (int nc = 0; nc < (int)ntlist.get_nbs()[i].size(); nc++){
+      //id of the beginning and end of the chain in the sorted representation
+      const array2003<int,2>& chain = ntlist.get_nbs()[i][nc];
+      int N = chain[1] - chain[0] + 1;  //number of elements in the chain
+      int end1 = ntlist.get_idx(chain[0]);  //chain ends (real representation)
+      int end2 = ntlist.get_idx(chain[1]);
+      double* X = &(x_sort[3*chain[0]]);
+      double* Ut = &(u_tt_sort[chain[0]]);
+      double* F = &(f_sort[3*chain[0]]);
+      double* S = &(s_sort[9*chain[0]]);
+      double R = r[end1];
+      int* BBF = &(b_sort[chain[0]]);
+      int E1 = ntlist.is_end(end1);
+      int E2 = ntlist.is_end(end2);
+
+      int Ee = 0;
+      double* Xe = X; double* Fe = F; double* Se = S;
+      if (!E1 && ntlist.get_triplet(end1)[0] != MESONTList::domain_end &&
+       ntlist.get_triplet(ntlist.get_triplet(end1)[0])[0] ==
+       MESONTList::cnt_end){
+        Ee = 1;
+        int idx = ntlist.get_idxb(ntlist.get_triplet(end1)[0]);
+        Xe = &(x_sort[3*idx]);
+        Fe = &(f_sort[3*idx]);
+        Se = &(s_sort[9*idx]);
+      }
+      else if (!E2 && ntlist.get_triplet(end2)[2] != MESONTList::domain_end &&
+       ntlist.get_triplet(ntlist.get_triplet(end2)[2])[2] ==
+       MESONTList::cnt_end){
+        Ee = 2;
+        int idx = ntlist.get_idxb(ntlist.get_triplet(end2)[2]);
+        Xe = &(x_sort[3*idx]);
+        Fe = &(f_sort[3*idx]);
+        Se = &(s_sort[9*idx]);
+      }
+
+      mesont_lib_SegmentTubeForceField(U1t, U2t, Ut, F1, F2, F, Fe, S1, S2, S,
+       Se, X1, X2, R12, N, X, Xe, BBF, R, E1, E2, Ee, TPMType);
+    }
+  }
+
+  //check if cutoff is chosen correctly
+  Rcut_min = std::max(2.0*Lmax, std::sqrt(0.5*Lmax*Lmax +
+   std::pow((2.0*Rmax + TPBRcutoff),2)));
+  if (cut_global < Rcut_min){
+    std::stringstream err;
+    err << "The selected cutoff is too small for the current system : " <<
+     "L_max = " << Lmax << ", R_max = " << RT << ", Rc = " << cut_global <<
+     ", Rcut_min = " << Rcut_min;
+    error->all(FLERR, err.str().c_str());
+  }
+
+  // set per atom values and accumulators
+  // reallocate per-atom arrays if necessary
+  if (eatom_s == nullptr)
+   memory->create(eatom_s,comm->nthreads*maxeatom,"pair:eatom_s");
+  if (eatom_b == nullptr)
+   memory->create(eatom_b,comm->nthreads*maxeatom,"pair:eatom_b");
+  if (eatom_t == nullptr)
+   memory->create(eatom_t,comm->nthreads*maxeatom,"pair:eatom_t");
+  if (atom->nmax > maxeatom) {
+    maxeatom = atom->nmax;
+    memory->destroy(eatom);
+    memory->create(eatom,comm->nthreads*maxeatom,"pair:eatom");
+    memory->destroy(eatom_s);
+    memory->create(eatom_s,comm->nthreads*maxeatom,"pair:eatom_s");
+    memory->destroy(eatom_b);
+    memory->create(eatom_b,comm->nthreads*maxeatom,"pair:eatom_b");
+    memory->destroy(eatom_t);
+    memory->create(eatom_t,comm->nthreads*maxeatom,"pair:eatom_t");
+  }
+
+  if (atom->nmax > maxvatom) {
+    maxvatom = atom->nmax;
+    memory->destroy(vatom);
+    memory->create(vatom,comm->nthreads*maxvatom,6,"pair:vatom");
+  }
+
+  // zero accumulators
+  eng_vdwl = 0.0; energy_s = 0.0;
+  energy_b = 0.0; energy_t = 0.0;
+  for (int i = 0; i < 6; i++) virial[i] = 0.0;
+  for (int i = 0; i < ntot; i++){
+    eatom[i] = 0.0; eatom_s[i] = 0.0;
+    eatom_b[i] = 0.0; eatom_t[i] = 0.0;
+  }
+  for (int i = 0; i < ntot; i++)
+    for (int j = 0; j < 6; j++) vatom[i][j] = 0.0;
+
+  //convert from sorted representation
+  for (int i = 0; i < nall; i++){
+    int idx = ntlist.get_idx(i);
+    for (int j = 0; j < 3; j++) f[idx][j] += f_sort[3*i+j];
+    eatom_s[idx] = u_ts_sort[i];
+    eatom_b[idx] = u_tb_sort[i];
+    eatom_t[idx] = u_tt_sort[i];
+    eatom[idx] = u_ts_sort[i] + u_tb_sort[i] + u_tt_sort[i];
+    energy_s += u_ts_sort[i];
+    energy_b += u_tb_sort[i];
+    energy_t += u_tt_sort[i];
+    vatom[idx][0] = s_sort[9*i+0]; //xx
+    vatom[idx][1] = s_sort[9*i+4]; //yy
+    vatom[idx][2] = s_sort[9*i+8]; //zz
+    vatom[idx][3] = s_sort[9*i+1]; //xy
+    vatom[idx][4] = s_sort[9*i+2]; //xz
+    vatom[idx][5] = s_sort[9*i+5]; //yz
+    for (int j = 0; j < 6; j++) virial[j] += vatom[idx][j];
+    buckling[idx] = b_sort[i];
+  }
+  eng_vdwl = energy_s + energy_b + energy_t;
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays
+------------------------------------------------------------------------- */
+
+void PairMESONTTPM::allocate(){
+  allocated = 1;
+  int n = atom->ntypes;
+
+  memory->create(setflag,n+1,n+1,"pair:setflag");
+  for (int i = 1; i <= n; i++)
+    for (int j = i; j <= n; j++)
+      setflag[i][j] = 0;
+
+  memory->create(cutsq,n+1,n+1,"pair:cutsq");
+  memory->create(cut,n+1,n+1,"pair:cut");
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void PairMESONTTPM::settings(int narg, char **arg){
+  if ((narg == 0) || (narg > 4))
+    error->all(FLERR,"Illegal pair_style command");
+  cut_global = utils::numeric(FLERR,arg[0],false,lmp);
+
+  // reset cutoffs that have been explicitly set
+  if (allocated) {
+    int i,j;
+    for (i = 1; i <= atom->ntypes; i++)
+      for (j = i+1; j <= atom->ntypes; j++)
+        cut[i][j] = cut_global;
+  }
+  std::string TPMAFile = (narg > 1) ? arg[1] : "MESONT-TABTP.xrs";
+  tab_path_length = TPMAFile.length();
+  if (tab_path != nullptr) memory->destroy(tab_path);
+  //c_str returns '\0' terminated string
+  memory->create(tab_path,tab_path_length+1,"pair:path");
+  std::memcpy(tab_path, TPMAFile.c_str(), tab_path_length+1);
+  mesont_lib_SetTablePath(tab_path, tab_path_length);
+
+  if (narg > 2) {
+    BendingMode = utils::numeric(FLERR,arg[2],false,lmp);
+    if ((BendingMode < 0) || (BendingMode > 1))
+      error->all(FLERR,"Incorrect BendingMode");
+  }
+  if (narg > 3) {
+    TPMType = utils::numeric(FLERR,arg[3],false,lmp);
+    if ((TPMType < 0) || (TPMType > 1))
+      error->all(FLERR,"Incorrect TPMType");
+  }
+
+  mesont_lib_TPBInit();
+  int M, N;
+  std::ifstream in(TPMAFile);
+  if (!in.is_open()) error->all(FLERR,"Incorrect table path");
+  std::string tmp;
+  std::getline(in,tmp);
+  std::getline(in,tmp);
+  std::getline(in,tmp);
+  in >> M >> N;
+  in.close();
+  mesont_lib_TPMInit(M, N);
+  mesont_lib_InitCNTPotModule(1, 3, 0, BendingMode, mesont_lib_get_R());
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more type pairs
+------------------------------------------------------------------------- */
+
+void PairMESONTTPM::coeff(int narg, char **arg){
+  if ((narg < 2) || (narg > 3))
+    error->all(FLERR,"Incorrect args for pair coefficients");
+
+  if (!allocated) allocate();
+
+  int ilo,ihi,jlo,jhi;
+  utils::bounds(FLERR,arg[0],1,atom->ntypes,ilo,ihi,error);
+  utils::bounds(FLERR,arg[1],1,atom->ntypes,jlo,jhi,error);
+
+  double cut_one = cut_global;
+  if (narg == 3) cut_one = utils::numeric(FLERR,arg[2],false,lmp);
+
+  int count = 0;
+  for (int i = ilo; i <= ihi; i++) {
+    for (int j = MAX(jlo,i); j <= jhi; j++) {
+      cut[i][j] = cut_one;
+      setflag[i][j] = 1;
+      count++;
+    }
+  }
+
+  if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+double PairMESONTTPM::init_one(int i, int j){
+  if (setflag[i][j] == 0) {
+    cut[i][j] = mix_distance(cut[i][i],cut[j][j]);
+  }
+
+  return cut[i][j];
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairMESONTTPM::write_restart(FILE *fp){
+  write_restart_settings(fp);
+
+  int i,j;
+  for (i = 1; i <= atom->ntypes; i++)
+    for (j = i; j <= atom->ntypes; j++) {
+      fwrite(&setflag[i][j],sizeof(int),1,fp);
+      if (setflag[i][j]) {
+        fwrite(&cut[i][j],sizeof(double),1,fp);
+      }
+    }
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairMESONTTPM::read_restart(FILE *fp){
+  read_restart_settings(fp);
+  allocate();
+
+  int i,j;
+  int me = comm->me;
+  for (i = 1; i <= atom->ntypes; i++)
+    for (j = i; j <= atom->ntypes; j++) {
+      if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
+      MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
+      if (setflag[i][j]) {
+        if (me == 0) {
+          fread(&cut[i][j],sizeof(double),1,fp);
+        }
+        MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
+      }
+    }
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairMESONTTPM::write_restart_settings(FILE *fp){
+  fwrite(&BendingMode,sizeof(int),1,fp);
+  fwrite(&TPMType,sizeof(int),1,fp);
+  fwrite(&cut_global,sizeof(double),1,fp);
+  fwrite(&tab_path_length,sizeof(int),1,fp);
+  fwrite(tab_path,tab_path_length+1,1,fp);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairMESONTTPM::read_restart_settings(FILE *fp){
+  int me = comm->me;
+  if (me == 0) {
+    fread(&BendingMode,sizeof(int),1,fp);
+    fread(&TPMType,sizeof(int),1,fp);
+    fread(&cut_global,sizeof(double),1,fp);
+    fread(&tab_path_length,sizeof(int),1,fp);
+  }
+  MPI_Bcast(&BendingMode,1,MPI_INT,0,world);
+  MPI_Bcast(&TPMType,1,MPI_INT,0,world);
+  MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&tab_path_length,1,MPI_INT,0,world);
+
+  if (tab_path != nullptr) memory->destroy(tab_path);
+  memory->create(tab_path,tab_path_length+1,"pair:path");
+  if (me == 0) fread(tab_path,tab_path_length+1,1,fp);
+  MPI_Bcast(tab_path,tab_path_length+1,MPI_CHAR,0,world);
+  mesont_lib_SetTablePath(tab_path,tab_path_length);
+  mesont_lib_TPBInit();
+  int M, N;
+  std::ifstream in(tab_path);
+  if (!in.is_open()) error->all(FLERR,"Incorrect table path");
+  std::string tmp;
+  std::getline(in,tmp);
+  std::getline(in,tmp);
+  std::getline(in,tmp);
+  in >> M >> N;
+  in.close();
+  mesont_lib_TPMInit(M, N);
+  mesont_lib_InitCNTPotModule(1, 3, 0, BendingMode, mesont_lib_get_R());
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to data file
+------------------------------------------------------------------------- */
+
+void PairMESONTTPM::write_data(FILE *fp){
+  for (int i = 1; i <= atom->ntypes; i++)
+    fprintf(fp,"%d\n",i);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes all pairs to data file
+------------------------------------------------------------------------- */
+
+void PairMESONTTPM::write_data_all(FILE *fp){
+  for (int i = 1; i <= atom->ntypes; i++)
+    for (int j = i; j <= atom->ntypes; j++)
+      fprintf(fp,"%d %d %g\n",i,j,cut[i][j]);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairMESONTTPM::init_style(){
+  //make sure that a full list is created (including ghost nodes)
+  int r = neighbor->request(this,instance_me);
+  neighbor->requests[r]->half = false;
+  neighbor->requests[r]->full = true;
+  neighbor->requests[r]->ghost = true;
+}
+
+void* PairMESONTTPM::extract(const char *str, int &){
+  if (strcmp(str,"mesonttpm_Es_tot") == 0) return &energy_s;
+  else if (strcmp(str,"mesonttpm_Eb_tot") == 0) return &energy_b;
+  else if (strcmp(str,"mesonttpm_Et_tot") == 0) return &energy_t;
+  else if (strcmp(str,"mesonttpm_Es") == 0) return eatom_s;
+  else if (strcmp(str,"mesonttpm_Eb") == 0) return eatom_b;
+  else if (strcmp(str,"mesonttpm_Et") == 0) return eatom_t;
+  else return nullptr;
+};
diff --git a/src/USER-MESONT/pair_mesont_tpm.h b/src/USER-MESONT/pair_mesont_tpm.h
index c3d71ae953..704556d75e 100644
--- a/src/USER-MESONT/pair_mesont_tpm.h
+++ b/src/USER-MESONT/pair_mesont_tpm.h
@@ -1,99 +1,98 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-
-   Contributing author: Maxim Shugaev (UVA), mvs9t@virginia.edu
-------------------------------------------------------------------------- */
-
-#ifdef PAIR_CLASS
-
-PairStyle(mesont/tpm,PairMESONTTPM)
-
-#else
-
-#ifndef LMP_PAIR_MESONT_TPM_H
-#define LMP_PAIR_MESONT_TPM_H
-
-#include "pair.h"
-
-namespace LAMMPS_NS {
-
-class PairMESONTTPM : public Pair {
- public:
-  PairMESONTTPM(class LAMMPS *);
-  virtual ~PairMESONTTPM();
-  virtual void compute(int, int);
-  void settings(int, char **);
-  void coeff(int, char **);
-  double init_one(int, int);
-  void write_restart(FILE *);
-  void read_restart(FILE *);
-  void write_restart_settings(FILE *);
-  void read_restart_settings(FILE *);
-  void write_data(FILE *);
-  void write_data_all(FILE *);
-  virtual void init_style();
-
-  double energy_s;  // accumulated energies for stretching
-  double energy_b;  // accumulated energies for bending
-  double energy_t;  // accumulated energies for tube-tube interaction
-  double *eatom_s, *eatom_b, *eatom_t; // accumulated per-atom values
-
- protected:
-  int BendingMode, TPMType;
-  char* tab_path;
-  int tab_path_length;
-  double cut_global;
-  double **cut;
-  static int instance_count;
-  int nmax;
-
-  virtual void allocate();
-  virtual void *extract(const char *, int &);
-};
-
-}
-
-#endif
-#endif
-
-/* ERROR/WARNING messages:
-
-E: Pair style mesont/tpm requires newton pair on
-
-newton_pair must be set to on
-
-E: The selected cutoff is too small for the current system
-
-cutoff must be increased.
-
-E: Illegal pair_style command
-
-Incorrect argument list in the style init.
-
-E: Incorrect table path
-
-Incorrect path to the table files.
-
-E: Incorrect BendingMode
-
-Self-explanatory.
-
-E: Incorrect TPMType
-
-Self-explanatory.
-
-E: Inconsistent input and potential table
-
-The tube diameter is inconsistent with the chirality specified
-during generation of the potential table.
-
-*/
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+
+   Contributing author: Maxim Shugaev (UVA), mvs9t@virginia.edu
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(mesont/tpm,PairMESONTTPM)
+
+#else
+
+#ifndef LMP_PAIR_MESONT_TPM_H
+#define LMP_PAIR_MESONT_TPM_H
+
+#include "pair.h"
+
+namespace LAMMPS_NS {
+
+class PairMESONTTPM : public Pair {
+ public:
+  PairMESONTTPM(class LAMMPS *);
+  virtual ~PairMESONTTPM();
+  virtual void compute(int, int);
+  void settings(int, char **);
+  void coeff(int, char **);
+  double init_one(int, int);
+  void write_restart(FILE *);
+  void read_restart(FILE *);
+  void write_restart_settings(FILE *);
+  void read_restart_settings(FILE *);
+  void write_data(FILE *);
+  void write_data_all(FILE *);
+  virtual void init_style();
+
+  double energy_s;  // accumulated energies for stretching
+  double energy_b;  // accumulated energies for bending
+  double energy_t;  // accumulated energies for tube-tube interaction
+  double *eatom_s, *eatom_b, *eatom_t; // accumulated per-atom values
+
+ protected:
+  int BendingMode, TPMType;
+  char* tab_path;
+  int tab_path_length;
+  double cut_global;
+  double **cut;
+  static int instance_count;
+
+  virtual void allocate();
+  virtual void *extract(const char *, int &);
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: Pair style mesont/tpm requires newton pair on
+
+newton_pair must be set to on
+
+E: The selected cutoff is too small for the current system
+
+cutoff must be increased.
+
+E: Illegal pair_style command
+
+Incorrect argument list in the style init.
+
+E: Incorrect table path
+
+Incorrect path to the table files.
+
+E: Incorrect BendingMode
+
+Self-explanatory.
+
+E: Incorrect TPMType
+
+Self-explanatory.
+
+E: Inconsistent input and potential table
+
+The tube diameter is inconsistent with the chirality specified
+during generation of the potential table.
+
+*/

From 62c7aca26fb5eab910382bcefdff0ea66f38af94 Mon Sep 17 00:00:00 2001
From: iafoss <iafoss@yandex.ru>
Date: Mon, 2 Nov 2020 16:35:50 -0500
Subject: [PATCH 22/44] fix bug with memory allocation

fix bug with eatom_s, eatom_b, eatom_t allocation
---
 src/USER-MESONT/pair_mesont_tpm.cpp | 115 +++++++++++++++-------------
 src/USER-MESONT/pair_mesont_tpm.h   |   1 +
 2 files changed, 61 insertions(+), 55 deletions(-)

diff --git a/src/USER-MESONT/pair_mesont_tpm.cpp b/src/USER-MESONT/pair_mesont_tpm.cpp
index 9185786341..f341a73e23 100644
--- a/src/USER-MESONT/pair_mesont_tpm.cpp
+++ b/src/USER-MESONT/pair_mesont_tpm.cpp
@@ -311,6 +311,7 @@ PairMESONTTPM::PairMESONTTPM(LAMMPS *lmp) : Pair(lmp) {
   eatom_s = nullptr;
   eatom_b = nullptr;
   eatom_t = nullptr;
+  nmax = 0;
   instance_count++;
   if(instance_count > 1) error->all(FLERR,
    "only a single instance of mesont/tpm pair style can be created");
@@ -336,7 +337,17 @@ PairMESONTTPM::~PairMESONTTPM()
 /* ---------------------------------------------------------------------- */
 
 void PairMESONTTPM::compute(int eflag, int vflag){
+  // set per atom values and accumulators
+  // reallocate per-atom arrays if necessary
   ev_init(eflag,vflag);
+  if (atom->nmax > nmax) {
+    memory->create(eatom_s,comm->nthreads*maxeatom,"pair:eatom_s");
+    memory->destroy(eatom_b);
+    memory->create(eatom_b,comm->nthreads*maxeatom,"pair:eatom_b");
+    memory->destroy(eatom_t);
+    memory->create(eatom_t,comm->nthreads*maxeatom,"pair:eatom_t");
+    nmax = atom->nmax;
+  }
   //total number of atoms in the node and ghost shell
   int nall = list->inum + list->gnum;
   int ntot = atom->nlocal + atom->nghost;
@@ -508,64 +519,58 @@ void PairMESONTTPM::compute(int eflag, int vflag){
     error->all(FLERR, err.str().c_str());
   }
 
-  // set per atom values and accumulators
-  // reallocate per-atom arrays if necessary
-  if (eatom_s == nullptr)
-   memory->create(eatom_s,comm->nthreads*maxeatom,"pair:eatom_s");
-  if (eatom_b == nullptr)
-   memory->create(eatom_b,comm->nthreads*maxeatom,"pair:eatom_b");
-  if (eatom_t == nullptr)
-   memory->create(eatom_t,comm->nthreads*maxeatom,"pair:eatom_t");
-  if (atom->nmax > maxeatom) {
-    maxeatom = atom->nmax;
-    memory->destroy(eatom);
-    memory->create(eatom,comm->nthreads*maxeatom,"pair:eatom");
-    memory->destroy(eatom_s);
-    memory->create(eatom_s,comm->nthreads*maxeatom,"pair:eatom_s");
-    memory->destroy(eatom_b);
-    memory->create(eatom_b,comm->nthreads*maxeatom,"pair:eatom_b");
-    memory->destroy(eatom_t);
-    memory->create(eatom_t,comm->nthreads*maxeatom,"pair:eatom_t");
-  }
-
-  if (atom->nmax > maxvatom) {
-    maxvatom = atom->nmax;
-    memory->destroy(vatom);
-    memory->create(vatom,comm->nthreads*maxvatom,6,"pair:vatom");
-  }
-
-  // zero accumulators
-  eng_vdwl = 0.0; energy_s = 0.0;
-  energy_b = 0.0; energy_t = 0.0;
-  for (int i = 0; i < 6; i++) virial[i] = 0.0;
-  for (int i = 0; i < ntot; i++){
-    eatom[i] = 0.0; eatom_s[i] = 0.0;
-    eatom_b[i] = 0.0; eatom_t[i] = 0.0;
-  }
-  for (int i = 0; i < ntot; i++)
-    for (int j = 0; j < 6; j++) vatom[i][j] = 0.0;
-
   //convert from sorted representation
   for (int i = 0; i < nall; i++){
-    int idx = ntlist.get_idx(i);
-    for (int j = 0; j < 3; j++) f[idx][j] += f_sort[3*i+j];
-    eatom_s[idx] = u_ts_sort[i];
-    eatom_b[idx] = u_tb_sort[i];
-    eatom_t[idx] = u_tt_sort[i];
-    eatom[idx] = u_ts_sort[i] + u_tb_sort[i] + u_tt_sort[i];
-    energy_s += u_ts_sort[i];
-    energy_b += u_tb_sort[i];
-    energy_t += u_tt_sort[i];
-    vatom[idx][0] = s_sort[9*i+0]; //xx
-    vatom[idx][1] = s_sort[9*i+4]; //yy
-    vatom[idx][2] = s_sort[9*i+8]; //zz
-    vatom[idx][3] = s_sort[9*i+1]; //xy
-    vatom[idx][4] = s_sort[9*i+2]; //xz
-    vatom[idx][5] = s_sort[9*i+5]; //yz
-    for (int j = 0; j < 6; j++) virial[j] += vatom[idx][j];
-    buckling[idx] = b_sort[i];
+      int idx = ntlist.get_idx(i);
+      for (int j = 0; j < 3; j++) f[idx][j] += f_sort[3*i+j];
+      buckling[idx] = b_sort[i];
   }
-  eng_vdwl = energy_s + energy_b + energy_t;
+  if(eflag){
+    eng_vdwl = 0.0; energy_s = 0.0;
+    energy_b = 0.0; energy_t = 0.0;
+    for (int i = 0; i < ntot; i++){
+      eatom[i] = 0.0; eatom_s[i] = 0.0;
+      eatom_b[i] = 0.0; eatom_t[i] = 0.0;
+    }
+    for (int i = 0; i < nall; i++){
+      int idx = ntlist.get_idx(i);
+      eatom_s[idx] = u_ts_sort[i];
+      eatom_b[idx] = u_tb_sort[i];
+      eatom_t[idx] = u_tt_sort[i];
+      eatom[idx] = u_ts_sort[i] + u_tb_sort[i] + u_tt_sort[i];
+      energy_s += u_ts_sort[i];
+      energy_b += u_tb_sort[i];
+      energy_t += u_tt_sort[i];
+    }
+    eng_vdwl = energy_s + energy_b + energy_t;
+  }
+  if(vflag){
+    for (int i = 0; i < 6; i++) virial[i] = 0.0;
+    for (int i = 0; i < nall; i++){
+      int idx = ntlist.get_idx(i);
+      virial[0] += s_sort[9*i+0]; //xx
+      virial[1] += s_sort[9*i+4]; //yy
+      virial[2] += s_sort[9*i+8]; //zz
+      virial[3] += s_sort[9*i+1]; //xy
+      virial[4] += s_sort[9*i+2]; //xz
+      virial[5] += s_sort[9*i+5]; //yz
+    }
+  }
+  int vflag_atom = vflag & 4;
+  if(vflag_atom){
+    for (int i = 0; i < ntot; i++)
+      for (int j = 0; j < 6; j++) vatom[i][j] = 0.0;
+    for (int i = 0; i < nall; i++){
+      int idx = ntlist.get_idx(i);
+      vatom[idx][0] = s_sort[9*i+0]; //xx
+      vatom[idx][1] = s_sort[9*i+4]; //yy
+      vatom[idx][2] = s_sort[9*i+8]; //zz
+      vatom[idx][3] = s_sort[9*i+1]; //xy
+      vatom[idx][4] = s_sort[9*i+2]; //xz
+      vatom[idx][5] = s_sort[9*i+5]; //yz
+    }
+  }
+
 }
 
 /* ----------------------------------------------------------------------
diff --git a/src/USER-MESONT/pair_mesont_tpm.h b/src/USER-MESONT/pair_mesont_tpm.h
index 704556d75e..a18e555349 100644
--- a/src/USER-MESONT/pair_mesont_tpm.h
+++ b/src/USER-MESONT/pair_mesont_tpm.h
@@ -54,6 +54,7 @@ class PairMESONTTPM : public Pair {
   double cut_global;
   double **cut;
   static int instance_count;
+  int nmax;
 
   virtual void allocate();
   virtual void *extract(const char *, int &);

From 559d6b10cfb93f3a59b0318b991b70ed092db429 Mon Sep 17 00:00:00 2001
From: iafoss <iafoss@yandex.ru>
Date: Mon, 2 Nov 2020 16:39:13 -0500
Subject: [PATCH 23/44] fix bug with memory allocation

---
 src/USER-MESONT/pair_mesont_tpm.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/USER-MESONT/pair_mesont_tpm.cpp b/src/USER-MESONT/pair_mesont_tpm.cpp
index f341a73e23..720a821aa6 100644
--- a/src/USER-MESONT/pair_mesont_tpm.cpp
+++ b/src/USER-MESONT/pair_mesont_tpm.cpp
@@ -341,6 +341,7 @@ void PairMESONTTPM::compute(int eflag, int vflag){
   // reallocate per-atom arrays if necessary
   ev_init(eflag,vflag);
   if (atom->nmax > nmax) {
+    memory->destroy(eatom_s);
     memory->create(eatom_s,comm->nthreads*maxeatom,"pair:eatom_s");
     memory->destroy(eatom_b);
     memory->create(eatom_b,comm->nthreads*maxeatom,"pair:eatom_b");

From aff54e948a5890c96864cc2d5a31b318f1909961 Mon Sep 17 00:00:00 2001
From: iafoss <iafoss@yandex.ru>
Date: Mon, 2 Nov 2020 18:39:34 -0500
Subject: [PATCH 24/44] eflag fix

---
 src/USER-MESONT/pair_mesont_tpm.cpp | 32 +++++++++++++++++------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/src/USER-MESONT/pair_mesont_tpm.cpp b/src/USER-MESONT/pair_mesont_tpm.cpp
index 720a821aa6..08b43b122f 100644
--- a/src/USER-MESONT/pair_mesont_tpm.cpp
+++ b/src/USER-MESONT/pair_mesont_tpm.cpp
@@ -49,7 +49,7 @@ private:
 
 class MESONTList {
 public:
-  MESONTList(const Atom* atom, const NeighList* nblist, double rc2);
+  MESONTList(const Atom* atom, const NeighList* nblist);
   ~MESONTList() {};
   //list of segments
   const std::vector<array2003<int,2> >& get_segments() const;
@@ -165,12 +165,13 @@ void vector_union(std::vector<T>& v1, std::vector<T>& v2,
   }
 }
 
-MESONTList::MESONTList(const Atom* atom, const NeighList* nblist, double /* rc2 */){
+MESONTList::MESONTList(const Atom* atom, const NeighList* nblist){
   if (atom == nullptr || nblist == nullptr) return;
   //number of local atoms at the node
   int nlocal = atom->nlocal;
-  //total number of atoms in the node and ghost shell
+  //total number of atoms in the node and ghost shell treated as NTs
   int nall = nblist->inum + nblist->gnum;
+  //total number of atoms in the node and ghost shell
   int ntot = atom->nlocal + atom->nghost;
   tagint* const g_id = atom->tag;
   tagint** const bonds = atom->bond_nt;
@@ -340,7 +341,7 @@ void PairMESONTTPM::compute(int eflag, int vflag){
   // set per atom values and accumulators
   // reallocate per-atom arrays if necessary
   ev_init(eflag,vflag);
-  if (atom->nmax > nmax) {
+  if (atom->nmax > nmax && eflag_atom) {
     memory->destroy(eatom_s);
     memory->create(eatom_s,comm->nthreads*maxeatom,"pair:eatom_s");
     memory->destroy(eatom_b);
@@ -349,8 +350,9 @@ void PairMESONTTPM::compute(int eflag, int vflag){
     memory->create(eatom_t,comm->nthreads*maxeatom,"pair:eatom_t");
     nmax = atom->nmax;
   }
-  //total number of atoms in the node and ghost shell
+  //total number of atoms in the node and ghost shell treated as NTs
   int nall = list->inum + list->gnum;
+  //total number of atoms in the node and ghost shell
   int ntot = atom->nlocal + atom->nghost;
   int newton_pair = force->newton_pair;
   if(!newton_pair)
@@ -381,7 +383,7 @@ void PairMESONTTPM::compute(int eflag, int vflag){
   }
 
   //generate bonds and chain nblist
-  MESONTList ntlist(atom, list, cut_global*cut_global);
+  MESONTList ntlist(atom, list);
 
   //reorder data to make it contiguous within tubes
   //and compatible with Fortran functions
@@ -526,9 +528,18 @@ void PairMESONTTPM::compute(int eflag, int vflag){
       for (int j = 0; j < 3; j++) f[idx][j] += f_sort[3*i+j];
       buckling[idx] = b_sort[i];
   }
-  if(eflag){
+  if(eflag_global){
     eng_vdwl = 0.0; energy_s = 0.0;
     energy_b = 0.0; energy_t = 0.0;
+    for (int i = 0; i < nall; i++){
+      int idx = ntlist.get_idx(i);
+      energy_s += u_ts_sort[i];
+      energy_b += u_tb_sort[i];
+      energy_t += u_tt_sort[i];
+    }
+    eng_vdwl = energy_s + energy_b + energy_t;
+  }
+  if(eflag_atom){
     for (int i = 0; i < ntot; i++){
       eatom[i] = 0.0; eatom_s[i] = 0.0;
       eatom_b[i] = 0.0; eatom_t[i] = 0.0;
@@ -539,13 +550,9 @@ void PairMESONTTPM::compute(int eflag, int vflag){
       eatom_b[idx] = u_tb_sort[i];
       eatom_t[idx] = u_tt_sort[i];
       eatom[idx] = u_ts_sort[i] + u_tb_sort[i] + u_tt_sort[i];
-      energy_s += u_ts_sort[i];
-      energy_b += u_tb_sort[i];
-      energy_t += u_tt_sort[i];
     }
-    eng_vdwl = energy_s + energy_b + energy_t;
   }
-  if(vflag){
+  if(vflag_global){
     for (int i = 0; i < 6; i++) virial[i] = 0.0;
     for (int i = 0; i < nall; i++){
       int idx = ntlist.get_idx(i);
@@ -557,7 +564,6 @@ void PairMESONTTPM::compute(int eflag, int vflag){
       virial[5] += s_sort[9*i+5]; //yz
     }
   }
-  int vflag_atom = vflag & 4;
   if(vflag_atom){
     for (int i = 0; i < ntot; i++)
       for (int j = 0; j < 6; j++) vatom[i][j] = 0.0;

From 4d19b8bf3ad955837740ce7901510c3034a881f2 Mon Sep 17 00:00:00 2001
From: iafoss <iafoss@yandex.ru>
Date: Mon, 2 Nov 2020 19:38:56 -0500
Subject: [PATCH 25/44] stype adjustment

---
 src/USER-MESONT/pair_mesont_tpm.cpp | 82 ++++++++++++++---------------
 1 file changed, 41 insertions(+), 41 deletions(-)

diff --git a/src/USER-MESONT/pair_mesont_tpm.cpp b/src/USER-MESONT/pair_mesont_tpm.cpp
index 08b43b122f..2fe5b2036f 100644
--- a/src/USER-MESONT/pair_mesont_tpm.cpp
+++ b/src/USER-MESONT/pair_mesont_tpm.cpp
@@ -40,7 +40,7 @@ using namespace LAMMPS_NS;
 template<typename T, int N>
 class array2003{
 public:
-  T& operator[] (int idx){ return data[idx];};
+  T& operator[] (int idx) { return data[idx];};
   const T& operator[] (int idx) const{ return data[idx];};
 private:
   T data[N];
@@ -165,7 +165,7 @@ void vector_union(std::vector<T>& v1, std::vector<T>& v2,
   }
 }
 
-MESONTList::MESONTList(const Atom* atom, const NeighList* nblist){
+MESONTList::MESONTList(const Atom* atom, const NeighList* nblist) {
   if (atom == nullptr || nblist == nullptr) return;
   //number of local atoms at the node
   int nlocal = atom->nlocal;
@@ -194,7 +194,7 @@ MESONTList::MESONTList(const Atom* atom, const NeighList* nblist){
       if (bonds[i][m] == cnt_end) chain_list[i][m] = cnt_end;
     for (int j = 0; j < nnb; j++) {
       int nb = nblist->firstneigh[i][j];
-      if (bonds[i][0] == g_id[nb]){
+      if (bonds[i][0] == g_id[nb]) {
         chain_list[i][0] = nb;
         chain_list[nb][1] = i;
         break;
@@ -224,13 +224,13 @@ MESONTList::MESONTList(const Atom* atom, const NeighList* nblist){
   for (int i = 0; i < nlocal; i++) {
     if (chain_list[i][0] == not_cnt) continue;
     if (chain_list[i][0] != cnt_end && chain_list[i][0] != domain_end &&
-     g_id[i] < g_id[chain_list[i][0]]){
+     g_id[i] < g_id[chain_list[i][0]]) {
       array2003<int, 2> tmp_c;
       tmp_c[0] = i; tmp_c[1] = chain_list[i][0];
       segments.push_back(tmp_c);
     }
     if (chain_list[i][1] != cnt_end && chain_list[i][1] != domain_end &&
-     g_id[i] < g_id[chain_list[i][1]]){
+     g_id[i] < g_id[chain_list[i][1]]) {
       array2003<int, 2> tmp_c;
        tmp_c[0] = i; tmp_c[1] = chain_list[i][1];
        segments.push_back(tmp_c);
@@ -239,7 +239,7 @@ MESONTList::MESONTList(const Atom* atom, const NeighList* nblist){
   int nbonds = segments.size();
 
   //triplets
-  for (int i = 0; i < nlocal; i++){
+  for (int i = 0; i < nlocal; i++) {
     if (chain_list[i][0] == not_cnt) continue;
     if (chain_list[i][0] != cnt_end && chain_list[i][0] != domain_end &&
      chain_list[i][1] != cnt_end && chain_list[i][1] != domain_end)
@@ -286,7 +286,7 @@ MESONTList::MESONTList(const Atom* atom, const NeighList* nblist){
            chain_list[idx0][0] != domain_end) chain[0] -= 1;
           if (chain_list[idx1][1] != cnt_end &&
            chain_list[idx1][1] != domain_end) chain[1] += 1;
-          if(chain[0] != chain[1]) nb_chains[i].push_back(chain);
+          if (chain[0] != chain[1]) nb_chains[i].push_back(chain);
           idx_s = (j == nnb - 1) ? -1 : nb_list[j + 1];
         }
       }
@@ -314,7 +314,7 @@ PairMESONTTPM::PairMESONTTPM(LAMMPS *lmp) : Pair(lmp) {
   eatom_t = nullptr;
   nmax = 0;
   instance_count++;
-  if(instance_count > 1) error->all(FLERR,
+  if (instance_count > 1) error->all(FLERR,
    "only a single instance of mesont/tpm pair style can be created");
 }
 
@@ -337,7 +337,7 @@ PairMESONTTPM::~PairMESONTTPM()
 
 /* ---------------------------------------------------------------------- */
 
-void PairMESONTTPM::compute(int eflag, int vflag){
+void PairMESONTTPM::compute(int eflag, int vflag) {
   // set per atom values and accumulators
   // reallocate per-atom arrays if necessary
   ev_init(eflag,vflag);
@@ -355,7 +355,7 @@ void PairMESONTTPM::compute(int eflag, int vflag){
   //total number of atoms in the node and ghost shell
   int ntot = atom->nlocal + atom->nghost;
   int newton_pair = force->newton_pair;
-  if(!newton_pair)
+  if (!newton_pair)
    error->all(FLERR,"Pair style mesont/tpm requires newton pair on");
 
   double **x = atom->x;
@@ -374,7 +374,7 @@ void PairMESONTTPM::compute(int eflag, int vflag){
   }
   double Rcut_min = std::max(2.0*Lmax, std::sqrt(0.5*Lmax*Lmax +
    std::pow((2.0*RT + TPBRcutoff),2)));
-  if (cut_global < Rcut_min){
+  if (cut_global < Rcut_min) {
     std::stringstream err;
     err << "The selected cutoff is too small for the current system : " <<
      "L_max = " << Lmax << ", R_max = " << RT << ", Rc = " << cut_global <<
@@ -390,7 +390,7 @@ void PairMESONTTPM::compute(int eflag, int vflag){
   std::vector<double> x_sort(3*nall), f_sort(3*nall), s_sort(9*nall);
   std::vector<double> u_ts_sort(nall), u_tb_sort(nall), u_tt_sort(nall);
   std::vector<int> b_sort(nall);
-  for (int i = 0; i < nall; i++){
+  for (int i = 0; i < nall; i++) {
     int idx = ntlist.get_idx(i);
     for (int j = 0; j < 3; j++) x_sort[3*i+j] = x[idx][j];
     b_sort[i] = buckling[idx];
@@ -426,13 +426,13 @@ void PairMESONTTPM::compute(int eflag, int vflag){
   }
 
   //share new values of buckling
-  if (BendingMode == 1){
-    for (int i = 0; i < nall; i++){
+  if (BendingMode == 1) {
+    for (int i = 0; i < nall; i++) {
       int idx = ntlist.get_idx(i);
       buckling[idx] = b_sort[i];
     }
     comm->forward_comm_pair(this);
-    for (int i = 0; i < nall; i++){
+    for (int i = 0; i < nall; i++) {
       int idx = ntlist.get_idx(i);
       b_sort[i] = buckling[idx];
     }
@@ -470,7 +470,7 @@ void PairMESONTTPM::compute(int eflag, int vflag){
     mesont_lib_TubeStretchingForceField(U1s, U2s, F1, F2, S1, S2, X1, X2,
      R12, L12);
 
-    for (int nc = 0; nc < (int)ntlist.get_nbs()[i].size(); nc++){
+    for (int nc = 0; nc < (int)ntlist.get_nbs()[i].size(); nc++) {
       //id of the beginning and end of the chain in the sorted representation
       const array2003<int,2>& chain = ntlist.get_nbs()[i][nc];
       int N = chain[1] - chain[0] + 1;  //number of elements in the chain
@@ -489,7 +489,7 @@ void PairMESONTTPM::compute(int eflag, int vflag){
       double* Xe = X; double* Fe = F; double* Se = S;
       if (!E1 && ntlist.get_triplet(end1)[0] != MESONTList::domain_end &&
        ntlist.get_triplet(ntlist.get_triplet(end1)[0])[0] ==
-       MESONTList::cnt_end){
+       MESONTList::cnt_end) {
         Ee = 1;
         int idx = ntlist.get_idxb(ntlist.get_triplet(end1)[0]);
         Xe = &(x_sort[3*idx]);
@@ -498,7 +498,7 @@ void PairMESONTTPM::compute(int eflag, int vflag){
       }
       else if (!E2 && ntlist.get_triplet(end2)[2] != MESONTList::domain_end &&
        ntlist.get_triplet(ntlist.get_triplet(end2)[2])[2] ==
-       MESONTList::cnt_end){
+       MESONTList::cnt_end) {
         Ee = 2;
         int idx = ntlist.get_idxb(ntlist.get_triplet(end2)[2]);
         Xe = &(x_sort[3*idx]);
@@ -514,7 +514,7 @@ void PairMESONTTPM::compute(int eflag, int vflag){
   //check if cutoff is chosen correctly
   Rcut_min = std::max(2.0*Lmax, std::sqrt(0.5*Lmax*Lmax +
    std::pow((2.0*Rmax + TPBRcutoff),2)));
-  if (cut_global < Rcut_min){
+  if (cut_global < Rcut_min) {
     std::stringstream err;
     err << "The selected cutoff is too small for the current system : " <<
      "L_max = " << Lmax << ", R_max = " << RT << ", Rc = " << cut_global <<
@@ -523,15 +523,15 @@ void PairMESONTTPM::compute(int eflag, int vflag){
   }
 
   //convert from sorted representation
-  for (int i = 0; i < nall; i++){
+  for (int i = 0; i < nall; i++) {
       int idx = ntlist.get_idx(i);
       for (int j = 0; j < 3; j++) f[idx][j] += f_sort[3*i+j];
       buckling[idx] = b_sort[i];
   }
-  if(eflag_global){
+  if (eflag_global) {
     eng_vdwl = 0.0; energy_s = 0.0;
     energy_b = 0.0; energy_t = 0.0;
-    for (int i = 0; i < nall; i++){
+    for (int i = 0; i < nall; i++) {
       int idx = ntlist.get_idx(i);
       energy_s += u_ts_sort[i];
       energy_b += u_tb_sort[i];
@@ -539,12 +539,12 @@ void PairMESONTTPM::compute(int eflag, int vflag){
     }
     eng_vdwl = energy_s + energy_b + energy_t;
   }
-  if(eflag_atom){
-    for (int i = 0; i < ntot; i++){
+  if (eflag_atom) {
+    for (int i = 0; i < ntot; i++) {
       eatom[i] = 0.0; eatom_s[i] = 0.0;
       eatom_b[i] = 0.0; eatom_t[i] = 0.0;
     }
-    for (int i = 0; i < nall; i++){
+    for (int i = 0; i < nall; i++) {
       int idx = ntlist.get_idx(i);
       eatom_s[idx] = u_ts_sort[i];
       eatom_b[idx] = u_tb_sort[i];
@@ -552,9 +552,9 @@ void PairMESONTTPM::compute(int eflag, int vflag){
       eatom[idx] = u_ts_sort[i] + u_tb_sort[i] + u_tt_sort[i];
     }
   }
-  if(vflag_global){
+  if (vflag_global) {
     for (int i = 0; i < 6; i++) virial[i] = 0.0;
-    for (int i = 0; i < nall; i++){
+    for (int i = 0; i < nall; i++) {
       int idx = ntlist.get_idx(i);
       virial[0] += s_sort[9*i+0]; //xx
       virial[1] += s_sort[9*i+4]; //yy
@@ -564,10 +564,10 @@ void PairMESONTTPM::compute(int eflag, int vflag){
       virial[5] += s_sort[9*i+5]; //yz
     }
   }
-  if(vflag_atom){
+  if (vflag_atom) {
     for (int i = 0; i < ntot; i++)
       for (int j = 0; j < 6; j++) vatom[i][j] = 0.0;
-    for (int i = 0; i < nall; i++){
+    for (int i = 0; i < nall; i++) {
       int idx = ntlist.get_idx(i);
       vatom[idx][0] = s_sort[9*i+0]; //xx
       vatom[idx][1] = s_sort[9*i+4]; //yy
@@ -584,7 +584,7 @@ void PairMESONTTPM::compute(int eflag, int vflag){
    allocate all arrays
 ------------------------------------------------------------------------- */
 
-void PairMESONTTPM::allocate(){
+void PairMESONTTPM::allocate() {
   allocated = 1;
   int n = atom->ntypes;
 
@@ -601,7 +601,7 @@ void PairMESONTTPM::allocate(){
    global settings
 ------------------------------------------------------------------------- */
 
-void PairMESONTTPM::settings(int narg, char **arg){
+void PairMESONTTPM::settings(int narg, char **arg) {
   if ((narg == 0) || (narg > 4))
     error->all(FLERR,"Illegal pair_style command");
   cut_global = utils::numeric(FLERR,arg[0],false,lmp);
@@ -650,7 +650,7 @@ void PairMESONTTPM::settings(int narg, char **arg){
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
-void PairMESONTTPM::coeff(int narg, char **arg){
+void PairMESONTTPM::coeff(int narg, char **arg) {
   if ((narg < 2) || (narg > 3))
     error->all(FLERR,"Incorrect args for pair coefficients");
 
@@ -679,7 +679,7 @@ void PairMESONTTPM::coeff(int narg, char **arg){
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
-double PairMESONTTPM::init_one(int i, int j){
+double PairMESONTTPM::init_one(int i, int j) {
   if (setflag[i][j] == 0) {
     cut[i][j] = mix_distance(cut[i][i],cut[j][j]);
   }
@@ -691,7 +691,7 @@ double PairMESONTTPM::init_one(int i, int j){
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
-void PairMESONTTPM::write_restart(FILE *fp){
+void PairMESONTTPM::write_restart(FILE *fp) {
   write_restart_settings(fp);
 
   int i,j;
@@ -708,7 +708,7 @@ void PairMESONTTPM::write_restart(FILE *fp){
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
-void PairMESONTTPM::read_restart(FILE *fp){
+void PairMESONTTPM::read_restart(FILE *fp) {
   read_restart_settings(fp);
   allocate();
 
@@ -731,7 +731,7 @@ void PairMESONTTPM::read_restart(FILE *fp){
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
-void PairMESONTTPM::write_restart_settings(FILE *fp){
+void PairMESONTTPM::write_restart_settings(FILE *fp) {
   fwrite(&BendingMode,sizeof(int),1,fp);
   fwrite(&TPMType,sizeof(int),1,fp);
   fwrite(&cut_global,sizeof(double),1,fp);
@@ -743,7 +743,7 @@ void PairMESONTTPM::write_restart_settings(FILE *fp){
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
-void PairMESONTTPM::read_restart_settings(FILE *fp){
+void PairMESONTTPM::read_restart_settings(FILE *fp) {
   int me = comm->me;
   if (me == 0) {
     fread(&BendingMode,sizeof(int),1,fp);
@@ -779,7 +779,7 @@ void PairMESONTTPM::read_restart_settings(FILE *fp){
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
-void PairMESONTTPM::write_data(FILE *fp){
+void PairMESONTTPM::write_data(FILE *fp) {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d\n",i);
 }
@@ -788,7 +788,7 @@ void PairMESONTTPM::write_data(FILE *fp){
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
-void PairMESONTTPM::write_data_all(FILE *fp){
+void PairMESONTTPM::write_data_all(FILE *fp) {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g\n",i,j,cut[i][j]);
@@ -796,7 +796,7 @@ void PairMESONTTPM::write_data_all(FILE *fp){
 
 /* ---------------------------------------------------------------------- */
 
-void PairMESONTTPM::init_style(){
+void PairMESONTTPM::init_style() {
   //make sure that a full list is created (including ghost nodes)
   int r = neighbor->request(this,instance_me);
   neighbor->requests[r]->half = false;
@@ -804,7 +804,7 @@ void PairMESONTTPM::init_style(){
   neighbor->requests[r]->ghost = true;
 }
 
-void* PairMESONTTPM::extract(const char *str, int &){
+void* PairMESONTTPM::extract(const char *str, int &) {
   if (strcmp(str,"mesonttpm_Es_tot") == 0) return &energy_s;
   else if (strcmp(str,"mesonttpm_Eb_tot") == 0) return &energy_b;
   else if (strcmp(str,"mesonttpm_Et_tot") == 0) return &energy_t;

From 3e7df13203a7cb39712930f114bb8f8de413b2d1 Mon Sep 17 00:00:00 2001
From: iafoss <iafoss@yandex.ru>
Date: Mon, 2 Nov 2020 21:22:14 -0500
Subject: [PATCH 26/44] c++11

---
 src/USER-MESONT/pair_mesont_tpm.cpp | 70 +++++++++++------------------
 1 file changed, 26 insertions(+), 44 deletions(-)

diff --git a/src/USER-MESONT/pair_mesont_tpm.cpp b/src/USER-MESONT/pair_mesont_tpm.cpp
index 2fe5b2036f..b92fc16750 100644
--- a/src/USER-MESONT/pair_mesont_tpm.cpp
+++ b/src/USER-MESONT/pair_mesont_tpm.cpp
@@ -9,7 +9,7 @@
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
-
+ 2
    Contributing author: Maxim Shugaev (UVA), mvs9t@virginia.edu
 ------------------------------------------------------------------------- */
 
@@ -29,6 +29,7 @@
 #include <cstring>
 #include <vector>
 #include <cmath>
+#include <array>
 
 #include <fstream>
 #include <sstream>
@@ -36,28 +37,17 @@
 
 using namespace LAMMPS_NS;
 
-//since LAMMPS is compiled with C++ 2003, define a substitution for std::array
-template<typename T, int N>
-class array2003{
-public:
-  T& operator[] (int idx) { return data[idx];};
-  const T& operator[] (int idx) const{ return data[idx];};
-private:
-  T data[N];
-};
-
-
 class MESONTList {
 public:
   MESONTList(const Atom* atom, const NeighList* nblist);
   ~MESONTList() {};
   //list of segments
-  const std::vector<array2003<int,2> >& get_segments() const;
+  const std::vector<std::array<int,2>>& get_segments() const;
   //list of triplets
-  const std::vector<array2003<int,3> >& get_triplets() const;
+  const std::vector<std::array<int,3>>& get_triplets() const;
   //list of neighbor chains [start,end] for segments
   //(use idx() to get real indexes)
-  const std::vector<std::vector<array2003<int,2> > >& get_nbs() const;
+  const std::vector<std::vector<std::array<int,2>>>& get_nbs() const;
   //convert idx from sorted representation to real idx
   int get_idx(int idx) const;
   //return list of indexes for conversion from sorted representation
@@ -69,22 +59,22 @@ public:
   //check if the node is the end of the tube
   bool is_end(int idx) const;
 
-  array2003<int, 2> get_segment(int idx) const;
-  array2003<int, 3> get_triplet(int idx) const;
+  std::array<int,2> get_segment(int idx) const;
+  std::array<int,3> get_triplet(int idx) const;
 
   static const int cnt_end = -1;
   static const int domain_end = -2;
   static const int not_cnt = -3;
 private:
-  std::vector<array2003<int, 2> > chain_list, segments;
-  std::vector<array2003<int, 3> > triplets;
-  std::vector<std::vector<array2003<int, 2> > > nb_chains;
+  std::vector<std::array<int,2>> chain_list, segments;
+  std::vector<std::array<int,3>> triplets;
+  std::vector<std::vector<std::array<int,2>>> nb_chains;
   std::vector<int> index_list, index_list_b;
 };
 
 //=============================================================================
 
-inline const std::vector<std::vector<array2003<int, 2> > > &
+inline const std::vector<std::vector<std::array<int,2>>> &
  MESONTList::get_nbs() const {
   return nb_chains;
 }
@@ -106,25 +96,25 @@ inline const std::vector<int>& MESONTList::get_idxb_list() const {
   return index_list_b;
 };
 
-inline const std::vector<array2003<int, 2> > & MESONTList::get_segments()
+inline const std::vector<std::array<int,2>> & MESONTList::get_segments()
  const {
   return segments;
 }
 
-inline const std::vector<array2003<int, 3> > & MESONTList::get_triplets()
+inline const std::vector<std::array<int,3>> & MESONTList::get_triplets()
  const {
   return triplets;
 }
 
-inline array2003<int, 2> MESONTList::get_segment(int idx) const {
-  array2003<int, 2> result;
+inline std::array<int,2> MESONTList::get_segment(int idx) const {
+  std::array<int,2> result;
   result[0] = chain_list[idx][0];
   result[1] = idx;
   return result;
 }
 
-inline array2003<int, 3> MESONTList::get_triplet(int idx) const {
-  array2003<int, 3> result;
+inline std::array<int,3> MESONTList::get_triplet(int idx) const {
+  std::array<int,3> result;
   result[0] = chain_list[idx][0];
   result[1] = idx;
   result[2] = chain_list[idx][1];
@@ -179,9 +169,7 @@ MESONTList::MESONTList(const Atom* atom, const NeighList* nblist) {
   int* ilist = nblist->ilist;
 
   //convert bonds to local id representation
-  array2003<int, 2> tmp_arr;
-  tmp_arr[0] = not_cnt; tmp_arr[1] = not_cnt;
-  chain_list.resize(ntot, tmp_arr);
+  chain_list.resize(ntot, {not_cnt,not_cnt});
   for (int ii = 0; ii < nall; ii++) {
     int i = ilist[ii];
     chain_list[i][0] = domain_end;
@@ -224,17 +212,11 @@ MESONTList::MESONTList(const Atom* atom, const NeighList* nblist) {
   for (int i = 0; i < nlocal; i++) {
     if (chain_list[i][0] == not_cnt) continue;
     if (chain_list[i][0] != cnt_end && chain_list[i][0] != domain_end &&
-     g_id[i] < g_id[chain_list[i][0]]) {
-      array2003<int, 2> tmp_c;
-      tmp_c[0] = i; tmp_c[1] = chain_list[i][0];
-      segments.push_back(tmp_c);
-    }
+     g_id[i] < g_id[chain_list[i][0]])
+      segments.push_back({i,chain_list[i][0]});
     if (chain_list[i][1] != cnt_end && chain_list[i][1] != domain_end &&
-     g_id[i] < g_id[chain_list[i][1]]) {
-      array2003<int, 2> tmp_c;
-       tmp_c[0] = i; tmp_c[1] = chain_list[i][1];
-       segments.push_back(tmp_c);
-    }
+     g_id[i] < g_id[chain_list[i][1]])
+      segments.push_back({i,chain_list[i][1]});
   }
   int nbonds = segments.size();
 
@@ -275,7 +257,7 @@ MESONTList::MESONTList(const Atom* atom, const NeighList* nblist) {
         int idx_next = chain_list[index_list[nb_list[j]]][1];
         if ((j == nnb - 1) || (nb_list[j] + 1 != nb_list[j+1]) ||
          (idx_next == cnt_end) || (idx_next == domain_end)) {
-          array2003<int, 2> chain;
+          std::array<int,2> chain;
           chain[0] = idx_s;
           chain[1] = nb_list[j];
           //make sure that segments having at least one node
@@ -399,7 +381,7 @@ void PairMESONTTPM::compute(int eflag, int vflag) {
   //bending potential
   int n_triplets = ntlist.get_triplets().size();
   for (int i = 0; i < n_triplets; i++) {
-    const array2003<int,3>& t = ntlist.get_triplets()[i];
+    const std::array<int,3>& t = ntlist.get_triplets()[i];
     //idx of nodes of a triplet in sorted representation
     int idx_s0 = ntlist.get_idxb(t[0]);
     int idx_s1 = ntlist.get_idxb(t[1]);
@@ -443,7 +425,7 @@ void PairMESONTTPM::compute(int eflag, int vflag) {
   double Rmax = 0.0;
   Lmax = 0.0;
   for (int i = 0; i < n_segments; i++) {
-    const array2003<int,2>& s = ntlist.get_segments()[i];
+    const std::array<int,2>& s = ntlist.get_segments()[i];
     //idx of a segment end 1 in sorted representation
     int idx_s0 = ntlist.get_idxb(s[0]);
     //idx of a segment end 2 in sorted representation
@@ -472,7 +454,7 @@ void PairMESONTTPM::compute(int eflag, int vflag) {
 
     for (int nc = 0; nc < (int)ntlist.get_nbs()[i].size(); nc++) {
       //id of the beginning and end of the chain in the sorted representation
-      const array2003<int,2>& chain = ntlist.get_nbs()[i][nc];
+      const std::array<int,2>& chain = ntlist.get_nbs()[i][nc];
       int N = chain[1] - chain[0] + 1;  //number of elements in the chain
       int end1 = ntlist.get_idx(chain[0]);  //chain ends (real representation)
       int end2 = ntlist.get_idx(chain[1]);

From af14739541c9b96d29d7ddfa03dc09de988f21ee Mon Sep 17 00:00:00 2001
From: iafoss <iafoss@yandex.ru>
Date: Mon, 2 Nov 2020 22:09:55 -0500
Subject: [PATCH 27/44] typo

---
 src/USER-MESONT/pair_mesont_tpm.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/USER-MESONT/pair_mesont_tpm.cpp b/src/USER-MESONT/pair_mesont_tpm.cpp
index b92fc16750..1271ebddb6 100644
--- a/src/USER-MESONT/pair_mesont_tpm.cpp
+++ b/src/USER-MESONT/pair_mesont_tpm.cpp
@@ -9,7 +9,6 @@
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
- 2
    Contributing author: Maxim Shugaev (UVA), mvs9t@virginia.edu
 ------------------------------------------------------------------------- */
 

From df672fe7d4f8e0d229ef2e72f12f38cb75ad005a Mon Sep 17 00:00:00 2001
From: julient31 <julien.tranchida1@gmail.com>
Date: Mon, 9 Nov 2020 12:42:12 -0700
Subject: [PATCH 28/44] Correcting indentation issue in pair_spin_dmi.cpp

---
 src/SPIN/pair_spin_dmi.cpp | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/SPIN/pair_spin_dmi.cpp b/src/SPIN/pair_spin_dmi.cpp
index d7b7d1b3d9..69a9873303 100644
--- a/src/SPIN/pair_spin_dmi.cpp
+++ b/src/SPIN/pair_spin_dmi.cpp
@@ -257,16 +257,15 @@ void PairSpinDmi::compute(int eflag, int vflag)
         f[i][0] += fi[0];
         f[i][1] += fi[1];
         f[i][2] += fi[2];
-          if (newton_pair || j < nlocal) {
-            f[j][0] -= fi[0];
-            f[j][1] -= fi[1];
-            f[j][2] -= fi[2];
-          }
+        if (newton_pair || j < nlocal) {
+          f[j][0] -= fi[0];
+          f[j][1] -= fi[1];
+          f[j][2] -= fi[2];
+        }
         fm[i][0] += fmi[0];
         fm[i][1] += fmi[1];
         fm[i][2] += fmi[2];
 
-
         if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
             evdwl,ecoul,fi[0],fi[1],fi[2],delx,dely,delz);
       }

From 5aae2cb44ded9af4596ca10505e9da130747cc48 Mon Sep 17 00:00:00 2001
From: Tim Bernhard <tim@bernhard-webstudio.ch>
Date: Tue, 10 Nov 2020 14:03:16 +0100
Subject: [PATCH 29/44] Fix typo in Howto Walls

---
 doc/src/Howto_walls.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/src/Howto_walls.rst b/doc/src/Howto_walls.rst
index 4d35cd66b3..6e3e22a3f0 100644
--- a/doc/src/Howto_walls.rst
+++ b/doc/src/Howto_walls.rst
@@ -67,5 +67,5 @@ rotate.
 
 The only frictional idealized walls currently in LAMMPS are flat or
 curved surfaces specified by the :doc:`fix wall/gran <fix_wall_gran>`
-command.  At some point we plan to allow regoin surfaces to be used as
+command.  At some point we plan to allow region surfaces to be used as
 frictional walls, as well as triangulated surfaces.

From eae9fea02615b0aaba3e0b92350e78e70f302e94 Mon Sep 17 00:00:00 2001
From: Tim Bernhard <tim@bernhard-webstudio.ch>
Date: Tue, 10 Nov 2020 14:04:49 +0100
Subject: [PATCH 30/44] Consistently use  instead of

---
 doc/src/atc_output.rst          | 2 +-
 doc/src/fix_filter_corotate.rst | 2 +-
 doc/src/fix_rx.rst              | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/src/atc_output.rst b/doc/src/atc_output.rst
index 3fb1491be1..5003817daa 100644
--- a/doc/src/atc_output.rst
+++ b/doc/src/atc_output.rst
@@ -14,7 +14,7 @@ Syntax
 * AtC fixID = ID of :doc:`fix atc <fix_atc>` instance
 * *output* or *output index* = name of the AtC sub-command
 * filename_prefix = prefix for data files (for *output*)
-* frequency = frequency of output in time-steps (for *output*)
+* frequency = frequency of output in timesteps (for *output*)
 * optional keywords for *output*:
 
   - text = creates text output of index, step and nodal variable values for unique nodes
diff --git a/doc/src/fix_filter_corotate.rst b/doc/src/fix_filter_corotate.rst
index ee608e5361..e33fc0ac4a 100644
--- a/doc/src/fix_filter_corotate.rst
+++ b/doc/src/fix_filter_corotate.rst
@@ -56,7 +56,7 @@ is slightly modified only for the computation of long-range forces. A
 good cluster decomposition constitutes in building clusters which
 contain the fastest covalent bonds inside clusters.
 
-If the clusters are chosen suitably, the :doc:`run_style respa <run_style>` is stable for outer time-steps of at least 8fs.
+If the clusters are chosen suitably, the :doc:`run_style respa <run_style>` is stable for outer timesteps of at least 8fs.
 
 ----------
 
diff --git a/doc/src/fix_rx.rst b/doc/src/fix_rx.rst
index c1a1d0950c..9eab06ffad 100644
--- a/doc/src/fix_rx.rst
+++ b/doc/src/fix_rx.rst
@@ -90,10 +90,10 @@ accepted, *h* is increased by a proportional amount, and the next ODE step is be
 Otherwise, *h* is shrunk and the ODE step is repeated.
 
 Run-time diagnostics are available for the rkf45 ODE solver. The frequency
-(in time-steps) that diagnostics are reported is controlled by the last (optional)
+(in timesteps) that diagnostics are reported is controlled by the last (optional)
 12th argument. A negative frequency means that diagnostics are reported once at the
 end of each run. A positive value N means that the diagnostics are reported once
-per N time-steps.
+per N timesteps.
 
 The diagnostics report the average # of integrator steps and RHS function evaluations
 and run-time per ODE as well as the average/RMS/min/max per process. If the

From ad56e0ca9ff75b7129c1386dc615e490aefcb6f6 Mon Sep 17 00:00:00 2001
From: Tim Bernhard <tim@bernhard-webstudio.ch>
Date: Tue, 10 Nov 2020 14:16:12 +0100
Subject: [PATCH 31/44] Fix casing of the word

---
 .github/CONTRIBUTING.md            | 2 +-
 doc/github-development-workflow.md | 2 +-
 doc/src/Howto_github.rst           | 4 ++--
 lib/kokkos/README.md               | 4 ++--
 lib/quip/README                    | 2 +-
 lib/scafacos/README                | 2 +-
 src/USER-PLUMED/README             | 2 +-
 tools/replica/reorder_remd_traj.py | 2 +-
 8 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 60fe82d86c..62e7186360 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -108,7 +108,7 @@ For bug reports, the next step is that one of the core LAMMPS developers will se
 
 For submitting pull requests, there is a [detailed tutorial](https://lammps.sandia.gov/doc/Howto_github.html) in the LAMMPS manual. Thus only a brief breakdown of the steps is presented here. Please note, that the LAMMPS developers are still reviewing and trying to improve the process. If you are unsure about something, do not hesitate to post a question on the lammps-users mailing list or contact one fo the core LAMMPS developers.
 Immediately after the submission, the LAMMPS continuing integration server at ci.lammps.org will download your submitted branch and perform a simple compilation test, i.e. will test whether your submitted code can be compiled under various conditions. It will also do a check on whether your included documentation translates cleanly. Whether these tests are successful or fail will be recorded. If a test fails, please inspect the corresponding output on the CI server and take the necessary steps, if needed, so that the code can compile cleanly again. The test will be re-run each the pull request is updated with a push to the remote branch on GitHub.
-Next a LAMMPS core developer will self-assign and do an overall technical assessment of the submission. If you are not yet registered as a LAMMPS collaborator, you will receive an invitation for that. As part of the assesment, the pull request will be categorized with labels. There are two special labels: `needs_work` (indicates that work from the submitter of the pull request is needed) and `work_in_progress` (indicates, that the assigned LAMMPS developer will make changes, if not done by the contributor who made the submit). 
+Next a LAMMPS core developer will self-assign and do an overall technical assessment of the submission. If you are not yet registered as a LAMMPS collaborator, you will receive an invitation for that. As part of the assessment, the pull request will be categorized with labels. There are two special labels: `needs_work` (indicates that work from the submitter of the pull request is needed) and `work_in_progress` (indicates, that the assigned LAMMPS developer will make changes, if not done by the contributor who made the submit). 
 You may also receive comments and suggestions on the overall submission or specific details and on occasion specific requests for changes as part of the review. If permitted, also additional changes may be pushed into your pull request branch or a pull request may be filed in your LAMMPS fork on GitHub to include those changes.
 The LAMMPS developer may then decide to assign the pull request to another developer (e.g. when that developer is more knowledgeable about the submitted feature or enhancement or has written the modified code). It may also happen, that additional developers are requested to provide a review and approve the changes. For submissions, that may change the general behavior of LAMMPS, or where a possibility of unwanted side effects exists, additional tests may be requested by the assigned developer.
 If the assigned developer is satisfied and considers the submission ready for inclusion into LAMMPS, the pull request will receive approvals and be merged into the master branch by one of the core LAMMPS developers. After the pull request is merged, you may delete the feature branch used for the pull request in your personal LAMMPS fork.
diff --git a/doc/github-development-workflow.md b/doc/github-development-workflow.md
index a7d41dd32a..503a33be4e 100644
--- a/doc/github-development-workflow.md
+++ b/doc/github-development-workflow.md
@@ -95,7 +95,7 @@ on the pull request discussion page on GitHub, so that other developers
 can later review the entire discussion after the fact and understand the
 rationale behind choices made.  Exceptions to this policy are technical
 discussions, that are centered on tools or policies themselves
-(git, github, c++) rather than on the content of the pull request.
+(git, c++) rather than on the content of the pull request.
 
 ### Checklist for Pull Requests
 
diff --git a/doc/src/Howto_github.rst b/doc/src/Howto_github.rst
index 63cb8945e8..6303feb407 100644
--- a/doc/src/Howto_github.rst
+++ b/doc/src/Howto_github.rst
@@ -72,7 +72,7 @@ explained in more detail here: `feature branch workflow <https://www.atlassian.c
 
 **Feature branches**
 
-First of all, create a clone of your version on github on your local
+First of all, create a clone of your version onon your local
 machine via HTTPS:
 
 .. code-block:: bash
@@ -155,7 +155,7 @@ useful message that explains the change.
 
 .. code-block:: bash
 
-     $ git commit -m 'Finally updated the github tutorial'
+     $ git commit -m 'Finally updated thetutorial'
 
 After the commit, the changes can be pushed to the same branch on GitHub:
 
diff --git a/lib/kokkos/README.md b/lib/kokkos/README.md
index a08d238e5d..f9facbe96d 100644
--- a/lib/kokkos/README.md
+++ b/lib/kokkos/README.md
@@ -18,7 +18,7 @@ profiling and debugging tools (https://github.com/kokkos/kokkos-tools).
 
 A programming guide can be found on the Wiki, the API reference is under development.
 
-For questions find us on Slack: https://kokkosteam.slack.com or open a github issue.
+For questions find us on Slack: https://kokkosteam.slack.com or open aissue.
 
 For non-public questions send an email to
 crtrott(at)sandia.gov
@@ -44,7 +44,7 @@ To learn more about Kokkos consider watching one of our presentations:
 We are open and try to encourage contributions from external developers.
 To do so please first open an issue describing the contribution and then issue
 a pull request against the develop branch. For larger features it may be good
-to get guidance from the core development team first through the github issue.
+to get guidance from the core development team first through theissue.
 
 Note that Kokkos Core is licensed under standard 3-clause BSD terms of use.
 Which means contributing to Kokkos allows anyone else to use your contributions
diff --git a/lib/quip/README b/lib/quip/README
index e6cc3903bd..bf316d036a 100644
--- a/lib/quip/README
+++ b/lib/quip/README
@@ -17,7 +17,7 @@ Building LAMMPS with QUIP support:
 1) Building QUIP
 1.1) Obtaining QUIP
 
-The most current release of QUIP can be obtained from github:
+The most current release of QUIP can be obtained from
 
 $ git clone https://github.com/libAtoms/QUIP.git QUIP
 
diff --git a/lib/scafacos/README b/lib/scafacos/README
index 86335d9f98..9d202d704b 100644
--- a/lib/scafacos/README
+++ b/lib/scafacos/README
@@ -3,7 +3,7 @@ is required to use the KSPACE scafacos and its kspace_style
 scafacos command in a LAMMPS input script.
 
 The ScaFaCoS library is available at http://scafacos.de or
-on github at https://github.com/scafacos, the library was
+onat https://github.com/scafacos, the library was
 developed by a consortium of different universities in
 Germany (Bonn, Chemnitz, Stuttgart, Wuppertal) and
 the Research Centre Juelich (Juelich Supercomputing Centre).
diff --git a/src/USER-PLUMED/README b/src/USER-PLUMED/README
index f46b2cd9bd..ed166cda90 100644
--- a/src/USER-PLUMED/README
+++ b/src/USER-PLUMED/README
@@ -30,7 +30,7 @@ even if PLUMED is not in the path if as long as the input does not contain a fix
 plumed command.
 
 If you wish to statically link PLUMED you must download PLUMED to the /lib/plumed directory before compiling LAMMPS.  You can
-download a tar ball into that directory or you can clone the plumed2 repository from github there.  Once you have created a
+download a tar ball into that directory or you can clone the plumed2 repository fromthere.  Once you have created a
 directory containing a distribution of PLUMED within /lib/plumed you then must build PLUMED within that directory by issuing
 the usual commands.  It is worth noting that we have provided a script that will download and build PLUMED for you with
 a minimal set of options.  To run this script you need to issue the following command:
diff --git a/tools/replica/reorder_remd_traj.py b/tools/replica/reorder_remd_traj.py
index 5033ae1e53..ff525c06b2 100644
--- a/tools/replica/reorder_remd_traj.py
+++ b/tools/replica/reorder_remd_traj.py
@@ -325,7 +325,7 @@ def get_canonical_logw(enefn, frametuple_dict, temps, nprod, writefreq,
               pip install --user pymbar
               sudo pip install pymbar
 
-              To install the dev. version directly from github, use:
+              To install the dev. version directly from use:
               pip install pip install git+https://github.com/choderalab/pymbar.git
               """)
 

From 4be2a99977bb4620b8494c3d1f59af902b9a57aa Mon Sep 17 00:00:00 2001
From: Tim Bernhard <tim@bernhard-webstudio.ch>
Date: Tue, 10 Nov 2020 14:20:52 +0100
Subject: [PATCH 32/44] Fix casing of the word GitHub

---
 .github/CONTRIBUTING.md            |   2 +-
 doc/github-development-workflow.md |   2 +-
 doc/src/Howto_github.rst           |   4 +-
 lib/kokkos/README.md               |   4 +-
 lib/quip/README                    |   2 +-
 lib/scafacos/README                |   2 +-
 src/USER-PLUMED/README             |   2 +-
 tools/replica/reorder_remd_traj.py | 231 +++++++++++++++--------------
 8 files changed, 131 insertions(+), 118 deletions(-)

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 60fe82d86c..62e7186360 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -108,7 +108,7 @@ For bug reports, the next step is that one of the core LAMMPS developers will se
 
 For submitting pull requests, there is a [detailed tutorial](https://lammps.sandia.gov/doc/Howto_github.html) in the LAMMPS manual. Thus only a brief breakdown of the steps is presented here. Please note, that the LAMMPS developers are still reviewing and trying to improve the process. If you are unsure about something, do not hesitate to post a question on the lammps-users mailing list or contact one fo the core LAMMPS developers.
 Immediately after the submission, the LAMMPS continuing integration server at ci.lammps.org will download your submitted branch and perform a simple compilation test, i.e. will test whether your submitted code can be compiled under various conditions. It will also do a check on whether your included documentation translates cleanly. Whether these tests are successful or fail will be recorded. If a test fails, please inspect the corresponding output on the CI server and take the necessary steps, if needed, so that the code can compile cleanly again. The test will be re-run each the pull request is updated with a push to the remote branch on GitHub.
-Next a LAMMPS core developer will self-assign and do an overall technical assessment of the submission. If you are not yet registered as a LAMMPS collaborator, you will receive an invitation for that. As part of the assesment, the pull request will be categorized with labels. There are two special labels: `needs_work` (indicates that work from the submitter of the pull request is needed) and `work_in_progress` (indicates, that the assigned LAMMPS developer will make changes, if not done by the contributor who made the submit). 
+Next a LAMMPS core developer will self-assign and do an overall technical assessment of the submission. If you are not yet registered as a LAMMPS collaborator, you will receive an invitation for that. As part of the assessment, the pull request will be categorized with labels. There are two special labels: `needs_work` (indicates that work from the submitter of the pull request is needed) and `work_in_progress` (indicates, that the assigned LAMMPS developer will make changes, if not done by the contributor who made the submit). 
 You may also receive comments and suggestions on the overall submission or specific details and on occasion specific requests for changes as part of the review. If permitted, also additional changes may be pushed into your pull request branch or a pull request may be filed in your LAMMPS fork on GitHub to include those changes.
 The LAMMPS developer may then decide to assign the pull request to another developer (e.g. when that developer is more knowledgeable about the submitted feature or enhancement or has written the modified code). It may also happen, that additional developers are requested to provide a review and approve the changes. For submissions, that may change the general behavior of LAMMPS, or where a possibility of unwanted side effects exists, additional tests may be requested by the assigned developer.
 If the assigned developer is satisfied and considers the submission ready for inclusion into LAMMPS, the pull request will receive approvals and be merged into the master branch by one of the core LAMMPS developers. After the pull request is merged, you may delete the feature branch used for the pull request in your personal LAMMPS fork.
diff --git a/doc/github-development-workflow.md b/doc/github-development-workflow.md
index a7d41dd32a..c34a67dfcf 100644
--- a/doc/github-development-workflow.md
+++ b/doc/github-development-workflow.md
@@ -95,7 +95,7 @@ on the pull request discussion page on GitHub, so that other developers
 can later review the entire discussion after the fact and understand the
 rationale behind choices made.  Exceptions to this policy are technical
 discussions, that are centered on tools or policies themselves
-(git, github, c++) rather than on the content of the pull request.
+(git, GitHub, c++) rather than on the content of the pull request.
 
 ### Checklist for Pull Requests
 
diff --git a/doc/src/Howto_github.rst b/doc/src/Howto_github.rst
index 63cb8945e8..311d716f18 100644
--- a/doc/src/Howto_github.rst
+++ b/doc/src/Howto_github.rst
@@ -72,7 +72,7 @@ explained in more detail here: `feature branch workflow <https://www.atlassian.c
 
 **Feature branches**
 
-First of all, create a clone of your version on github on your local
+First of all, create a clone of your version on GitHub on your local
 machine via HTTPS:
 
 .. code-block:: bash
@@ -155,7 +155,7 @@ useful message that explains the change.
 
 .. code-block:: bash
 
-     $ git commit -m 'Finally updated the github tutorial'
+     $ git commit -m 'Finally updated the GitHub tutorial'
 
 After the commit, the changes can be pushed to the same branch on GitHub:
 
diff --git a/lib/kokkos/README.md b/lib/kokkos/README.md
index a08d238e5d..f820b7be10 100644
--- a/lib/kokkos/README.md
+++ b/lib/kokkos/README.md
@@ -18,7 +18,7 @@ profiling and debugging tools (https://github.com/kokkos/kokkos-tools).
 
 A programming guide can be found on the Wiki, the API reference is under development.
 
-For questions find us on Slack: https://kokkosteam.slack.com or open a github issue.
+For questions find us on Slack: https://kokkosteam.slack.com or open a GitHub issue.
 
 For non-public questions send an email to
 crtrott(at)sandia.gov
@@ -44,7 +44,7 @@ To learn more about Kokkos consider watching one of our presentations:
 We are open and try to encourage contributions from external developers.
 To do so please first open an issue describing the contribution and then issue
 a pull request against the develop branch. For larger features it may be good
-to get guidance from the core development team first through the github issue.
+to get guidance from the core development team first through the GitHub issue.
 
 Note that Kokkos Core is licensed under standard 3-clause BSD terms of use.
 Which means contributing to Kokkos allows anyone else to use your contributions
diff --git a/lib/quip/README b/lib/quip/README
index e6cc3903bd..5e737db11d 100644
--- a/lib/quip/README
+++ b/lib/quip/README
@@ -17,7 +17,7 @@ Building LAMMPS with QUIP support:
 1) Building QUIP
 1.1) Obtaining QUIP
 
-The most current release of QUIP can be obtained from github:
+The most current release of QUIP can be obtained from GitHub:
 
 $ git clone https://github.com/libAtoms/QUIP.git QUIP
 
diff --git a/lib/scafacos/README b/lib/scafacos/README
index 86335d9f98..c8181ac7ae 100644
--- a/lib/scafacos/README
+++ b/lib/scafacos/README
@@ -3,7 +3,7 @@ is required to use the KSPACE scafacos and its kspace_style
 scafacos command in a LAMMPS input script.
 
 The ScaFaCoS library is available at http://scafacos.de or
-on github at https://github.com/scafacos, the library was
+on GitHub at https://github.com/scafacos, the library was
 developed by a consortium of different universities in
 Germany (Bonn, Chemnitz, Stuttgart, Wuppertal) and
 the Research Centre Juelich (Juelich Supercomputing Centre).
diff --git a/src/USER-PLUMED/README b/src/USER-PLUMED/README
index f46b2cd9bd..31910bc951 100644
--- a/src/USER-PLUMED/README
+++ b/src/USER-PLUMED/README
@@ -30,7 +30,7 @@ even if PLUMED is not in the path if as long as the input does not contain a fix
 plumed command.
 
 If you wish to statically link PLUMED you must download PLUMED to the /lib/plumed directory before compiling LAMMPS.  You can
-download a tar ball into that directory or you can clone the plumed2 repository from github there.  Once you have created a
+download a tar ball into that directory or you can clone the plumed2 repository from GitHub there.  Once you have created a
 directory containing a distribution of PLUMED within /lib/plumed you then must build PLUMED within that directory by issuing
 the usual commands.  It is worth noting that we have provided a script that will download and build PLUMED for you with
 a minimal set of options.  To run this script you need to issue the following command:
diff --git a/tools/replica/reorder_remd_traj.py b/tools/replica/reorder_remd_traj.py
index 5033ae1e53..6eee4770ab 100644
--- a/tools/replica/reorder_remd_traj.py
+++ b/tools/replica/reorder_remd_traj.py
@@ -37,13 +37,17 @@ StringIO (or io if in Python 3.x)
 """
 
 
-
-import os, numpy as np, argparse, time, pickle
+import os
+import numpy as np
+import argparse
+import time
+import pickle
 from scipy.special import logsumexp
 from mpi4py import MPI
 
 from tqdm import tqdm
-import gzip, bz2
+import gzip
+import bz2
 try:
     # python-2
     from StringIO import StringIO as IOBuffer
@@ -52,12 +56,11 @@ except ImportError:
     from io import BytesIO as IOBuffer
 
 
-
 #### INITIALIZE MPI ####
 # (note that all output on screen will be printed only on the ROOT proc)
 ROOT = 0
 comm = MPI.COMM_WORLD
-me = comm.rank # my proc id
+me = comm.rank  # my proc id
 nproc = comm.size
 
 
@@ -77,7 +80,8 @@ def _get_nearest_temp(temps, query_temp):
     out_temp: nearest temp from the list
     """
 
-    if isinstance(temps, list): temps = np.array(temps)
+    if isinstance(temps, list):
+        temps = np.array(temps)
     return temps[np.argmin(np.abs(temps-query_temp))]
 
 
@@ -95,10 +99,10 @@ def readwrite(trajfn, mode):
 
     if trajfn.endswith(".gz"):
         of = gzip.open(trajfn, mode)
-        #return gzip.GzipFile(trajfn, mode)
+        # return gzip.GzipFile(trajfn, mode)
     elif trajfn.endswith(".bz2"):
         of = bz2.open(trajfn, mode)
-        #return bz2.BZ2File(trajfn, mode)
+        # return bz2.BZ2File(trajfn, mode)
     else:
         of = open(trajfn, mode)
     return of
@@ -123,8 +127,8 @@ def get_replica_frames(logfn, temps, nswap, writefreq):
     """
 
     n_rep = len(temps)
-    swap_history = np.loadtxt(logfn, skiprows = 3)
-    master_frametuple_dict = dict( (n, []) for n in range(n_rep) )
+    swap_history = np.loadtxt(logfn, skiprows=3)
+    master_frametuple_dict = dict((n, []) for n in range(n_rep))
 
     # walk through the replicas
     print("Getting frames from all replicas at temperature:")
@@ -136,15 +140,15 @@ def get_replica_frames(logfn, temps, nswap, writefreq):
         if writefreq <= nswap:
             for ii, i in enumerate(rep_inds[:-1]):
                 start = int(ii * nswap / writefreq)
-                stop = int( (ii+1) * nswap / writefreq)
-                [master_frametuple_dict[n].append( (i,x) ) \
-                                        for x in range(start, stop)]
+                stop = int((ii+1) * nswap / writefreq)
+                [master_frametuple_dict[n].append((i, x))
+                 for x in range(start, stop)]
 
         # case-2: when temps. are swapped faster than dumping frames
         else:
             nskip = int(writefreq / nswap)
-            [master_frametuple_dict[n].append( (i,ii) ) \
-            for ii, i in enumerate(rep_inds[0::nskip])]
+            [master_frametuple_dict[n].append((i, ii))
+             for ii, i in enumerate(rep_inds[0::nskip])]
 
     return master_frametuple_dict
 
@@ -161,11 +165,12 @@ def get_byte_index(rep_inds, byteindfns, intrajfns):
     """
     for n in rep_inds:
         # check if the byte indices for this traj has already been computed
-        if os.path.isfile(byteindfns[n]): continue
+        if os.path.isfile(byteindfns[n]):
+            continue
 
         # extract bytes
         fobj = readwrite(intrajfns[n], "rb")
-        byteinds = [ [0,0] ]
+        byteinds = [[0, 0]]
 
         # place file pointer at first line
         nframe = 0
@@ -175,33 +180,37 @@ def get_byte_index(rep_inds, byteindfns, intrajfns):
         # status printed only for replica read on root proc
         # this assumes that each proc takes roughly the same time
         if me == ROOT:
-            pb = tqdm(desc = "Reading replicas", leave = True,
-                  position = ROOT + 2*me,
-                  unit = "B/replica", unit_scale = True,
-                  unit_divisor = 1024)
+            pb = tqdm(desc="Reading replicas", leave=True,
+                      position=ROOT + 2*me,
+                      unit="B/replica", unit_scale=True,
+                      unit_divisor=1024)
 
         # start crawling through the bytes
         while True:
             next_line = fobj.readline()
-            if len(next_line) == 0: break
+            if len(next_line) == 0:
+                break
             # this will only work with lammpstrj traj format.
             # this condition essentially checks periodic recurrences
             # of the token TIMESTEP. Each time it is found,
             # we have crawled through a frame (snapshot)
             if next_line == first_line:
                 nframe += 1
-                byteinds.append( [nframe, cur_pos] )
-                if me == ROOT: pb.update()
+                byteinds.append([nframe, cur_pos])
+                if me == ROOT:
+                    pb.update()
             cur_pos = fobj.tell()
-            if me == ROOT: pb.update(0)
-        if me == ROOT: pb.close()
+            if me == ROOT:
+                pb.update(0)
+        if me == ROOT:
+            pb.close()
 
         # take care of the EOF
         cur_pos = fobj.tell()
-        byteinds.append( [nframe+1, cur_pos] ) # dummy index for the EOF
+        byteinds.append([nframe+1, cur_pos])  # dummy index for the EOF
 
         # write to file
-        np.savetxt(byteindfns[n], np.array(byteinds), fmt = "%d")
+        np.savetxt(byteindfns[n], np.array(byteinds), fmt="%d")
 
         # close the trajfile object
         fobj.close()
@@ -247,15 +256,15 @@ def write_reordered_traj(temp_inds, byte_inds, outtemps, temps,
         of = readwrite(outtrajfns[n], "wb")
 
         # get frames
-        abs_temp_ind = np.argmin( abs(temps - outtemps[n]) )
+        abs_temp_ind = np.argmin(abs(temps - outtemps[n]))
         frametuple = frametuple_dict[abs_temp_ind][-nframes:]
 
         # write frames to buffer
         if me == ROOT:
             pb = tqdm(frametuple,
-                  desc = ("Buffering trajectories for writing"),
-                  leave = True, position = ROOT + 2*me,
-                  unit = 'frame/replica', unit_scale = True)
+                      desc=("Buffering trajectories for writing"),
+                      leave=True, position=ROOT + 2*me,
+                      unit='frame/replica', unit_scale=True)
 
             iterable = pb
         else:
@@ -263,20 +272,23 @@ def write_reordered_traj(temp_inds, byte_inds, outtemps, temps,
 
         for i, (rep, frame) in enumerate(iterable):
             infobj = infobjs[rep]
-            start_ptr = int(byte_inds[rep][frame,1])
-            stop_ptr = int(byte_inds[rep][frame+1,1])
+            start_ptr = int(byte_inds[rep][frame, 1])
+            stop_ptr = int(byte_inds[rep][frame+1, 1])
             byte_len = stop_ptr - start_ptr
             infobj.seek(start_ptr)
             buf.write(infobj.read(byte_len))
-        if me == ROOT: pb.close()
+        if me == ROOT:
+            pb.close()
 
         # write buffer to disk
-        if me == ROOT: print("Writing buffer to file")
+        if me == ROOT:
+            print("Writing buffer to file")
         of.write(buf.getvalue())
         of.close()
         buf.close()
 
-    for i in infobjs: i.close()
+    for i in infobjs:
+        i.close()
 
     return
 
@@ -325,13 +337,13 @@ def get_canonical_logw(enefn, frametuple_dict, temps, nprod, writefreq,
               pip install --user pymbar
               sudo pip install pymbar
 
-              To install the dev. version directly from github, use:
+              To install the dev. version directly from GitHub, use:
               pip install pip install git+https://github.com/choderalab/pymbar.git
               """)
 
     u_rn = np.loadtxt(enefn)
-    ntemps = u_rn.shape[0] # number of temps.
-    nframes = int(nprod / writefreq) # number of frames at each temp.
+    ntemps = u_rn.shape[0]  # number of temps.
+    nframes = int(nprod / writefreq)  # number of frames at each temp.
 
     # reorder the temps
     u_kn = np.zeros([ntemps, nframes], float)
@@ -341,91 +353,90 @@ def get_canonical_logw(enefn, frametuple_dict, temps, nprod, writefreq,
             u_kn[k, i] = u_rn[rep, frame]
 
     # prep input for pymbar
-    #1) array of frames at each temp.
+    # 1) array of frames at each temp.
     nframes_k = nframes * np.ones(ntemps, np.uint8)
 
-    #2) inverse temps. for chosen energy scale
+    # 2) inverse temps. for chosen energy scale
     beta_k = 1.0 / (kB * temps)
 
-    #3) get reduced energies (*ONLY FOR THE CANONICAL ENSEMBLE*)
+    # 3) get reduced energies (*ONLY FOR THE CANONICAL ENSEMBLE*)
     u_kln = np.zeros([ntemps, ntemps, nframes], float)
     for k in range(ntemps):
         u_kln[k] = np.outer(beta_k, u_kn[k])
 
     # run pymbar and extract the free energies
     print("\nRunning pymbar...")
-    mbar = pymbar.mbar.MBAR(u_kln, nframes_k, verbose = True)
-    f_k = mbar.f_k # (1 x k array)
+    mbar = pymbar.mbar.MBAR(u_kln, nframes_k, verbose=True)
+    f_k = mbar.f_k  # (1 x k array)
 
     # calculate the log-weights
     print("\nExtracting log-weights...")
     log_nframes = np.log(nframes)
-    logw = dict( (k, np.zeros([ntemps, nframes], float)) for k in range(ntemps) )
+    logw = dict((k, np.zeros([ntemps, nframes], float)) for k in range(ntemps))
     # get log-weights to reweight to this temp.
     for k in range(ntemps):
         for n in range(nframes):
-            num = -beta_k[k] * u_kn[k,n]
-            denom = f_k - beta_k[k] * u_kn[k,n]
+            num = -beta_k[k] * u_kn[k, n]
+            denom = f_k - beta_k[k] * u_kn[k, n]
             for l in range(ntemps):
-                logw[l][k,n] = num - logsumexp(denom) - log_nframes
+                logw[l][k, n] = num - logsumexp(denom) - log_nframes
 
     return logw
 
 
-
 #### MAIN WORKFLOW ####
 if __name__ == "__main__":
     # accept user inputs
-    parser = argparse.ArgumentParser(description = __doc__,
-             formatter_class = argparse.RawDescriptionHelpFormatter)
+    parser = argparse.ArgumentParser(description=__doc__,
+                                     formatter_class=argparse.RawDescriptionHelpFormatter)
 
     parser.add_argument("prefix",
-                        help = "Prefix of REMD LAMMPS trajectories.\
+                        help="Prefix of REMD LAMMPS trajectories.\
                         Supply full path. Trajectories assumed to be named as \
                         <prefix>.%%d.lammpstrj. \
                         Can be in compressed (.gz or .bz2) format. \
                         This is a required argument")
 
-    parser.add_argument("-logfn", "--logfn", default = "log.lammps",
-                        help = "LAMMPS log file that contains swap history \
+    parser.add_argument("-logfn", "--logfn", default="log.lammps",
+                        help="LAMMPS log file that contains swap history \
                         of temperatures among replicas. \
                         Default = 'lammps.log'")
 
-    parser.add_argument("-tfn", "--tempfn", default = "temps.txt",
-                        help = "ascii file (readable by numpy.loadtxt) with \
+    parser.add_argument("-tfn", "--tempfn", default="temps.txt",
+                        help="ascii file (readable by numpy.loadtxt) with \
                         the temperatures used in the REMD simulation.")
 
-    parser.add_argument("-ns", "--nswap", type = int,
-                        help = "Swap frequency used in LAMMPS temper command")
+    parser.add_argument("-ns", "--nswap", type=int,
+                        help="Swap frequency used in LAMMPS temper command")
 
-    parser.add_argument("-nw", "--nwrite", type = int, default = 1,
-                        help = "Trajectory writing frequency used \
+    parser.add_argument("-nw", "--nwrite", type=int, default=1,
+                        help="Trajectory writing frequency used \
                         in LAMMPS dump command")
 
-    parser.add_argument("-np", "--nprod", type = int, default = 0,
-                        help = "Number of timesteps to save in the reordered\
+    parser.add_argument("-np", "--nprod", type=int, default=0,
+                        help="Number of timesteps to save in the reordered\
                         trajectories.\
                         This should be in units of the LAMMPS timestep")
 
-    parser.add_argument("-logw", "--logw", action = 'store_true',
-                        help = "Supplying this flag \
+    parser.add_argument("-logw", "--logw", action='store_true',
+                        help="Supplying this flag \
                         calculates *canonical* (NVT ensemble) log weights")
 
     parser.add_argument("-e", "--enefn",
-                        help = "File that has n_replica x n_frames array\
+                        help="File that has n_replica x n_frames array\
                         of total potential energies")
 
     parser.add_argument("-kB", "--boltzmann_const",
-                        type = float, default = 0.001987,
-                        help = "Boltzmann constant in appropriate units. \
+                        type=float, default=0.001987,
+                        help="Boltzmann constant in appropriate units. \
                         Default is kcal/mol")
 
-    parser.add_argument("-ot", "--out_temps", nargs = '+', type = np.float64,
-                        help = "Reorder trajectories at these temperatures.\n \
+    parser.add_argument("-ot", "--out_temps", nargs='+', type=np.float64,
+                        help="Reorder trajectories at these temperatures.\n \
                         Default is all temperatures used in the simulation")
 
-    parser.add_argument("-od", "--outdir", default = ".",
-                        help = "All output will be saved to this directory")
+    parser.add_argument("-od", "--outdir", default=".",
+                        help="All output will be saved to this directory")
 
     # parse inputs
     args = parser.parse_args()
@@ -438,14 +449,16 @@ if __name__ == "__main__":
     nprod = args.nprod
 
     enefn = args.enefn
-    if not enefn is None: enefn = os.path.abspath(enefn)
+    if not enefn is None:
+        enefn = os.path.abspath(enefn)
     get_logw = args.logw
     kB = args.boltzmann_const
 
     out_temps = args.out_temps
     outdir = os.path.abspath(args.outdir)
     if not os.path.isdir(outdir):
-        if me == ROOT: os.mkdir(outdir)
+        if me == ROOT:
+            os.mkdir(outdir)
 
     # check that all input files are present (only on the ROOT proc)
     if me == ROOT:
@@ -465,7 +478,8 @@ if __name__ == "__main__":
     for i in range(ntemps):
         this_intrajfn = intrajfns[i]
         x = this_intrajfn + ".gz"
-        if os.path.isfile(this_intrajfn): continue
+        if os.path.isfile(this_intrajfn):
+            continue
         elif os.path.isfile(this_intrajfn + ".gz"):
             intrajfns[i] = this_intrajfn + ".gz"
         elif os.path.isfile(this_intrajfn + ".bz2"):
@@ -476,42 +490,41 @@ if __name__ == "__main__":
 
     # set output filenames
     outprefix = os.path.join(outdir, traj_prefix.split('/')[-1])
-    outtrajfns = ["%s.%3.2f.lammpstrj.gz" % \
-                 (outprefix, _get_nearest_temp(temps, t)) \
+    outtrajfns = ["%s.%3.2f.lammpstrj.gz" %
+                  (outprefix, _get_nearest_temp(temps, t))
                   for t in out_temps]
-    byteindfns = [os.path.join(outdir, ".byteind_%d.gz" % k) \
+    byteindfns = [os.path.join(outdir, ".byteind_%d.gz" % k)
                   for k in range(ntemps)]
     frametuplefn = outprefix + '.frametuple.pickle'
     if get_logw:
         logwfn = outprefix + ".logw.pickle"
 
-
     # get a list of all frames at a particular temp visited by each replica
     # this is fast so run only on ROOT proc.
     master_frametuple_dict = {}
     if me == ROOT:
-        master_frametuple_dict = get_replica_frames(logfn = logfn,
-                                                    temps = temps,
-                                                    nswap = nswap,
-                                                    writefreq = writefreq)
+        master_frametuple_dict = get_replica_frames(logfn=logfn,
+                                                    temps=temps,
+                                                    nswap=nswap,
+                                                    writefreq=writefreq)
         # save to a pickle from the ROOT proc
         with open(frametuplefn, 'wb') as of:
             pickle.dump(master_frametuple_dict, of)
 
     # broadcast to all procs
-    master_frametuple_dict = comm.bcast(master_frametuple_dict, root = ROOT)
+    master_frametuple_dict = comm.bcast(master_frametuple_dict, root=ROOT)
 
     # define a chunk of replicas  to process on each proc
     CHUNKSIZE_1 = int(ntemps/nproc)
     if me < nproc - 1:
-        my_rep_inds = range( (me*CHUNKSIZE_1), (me+1)*CHUNKSIZE_1 )
+        my_rep_inds = range((me*CHUNKSIZE_1), (me+1)*CHUNKSIZE_1)
     else:
-        my_rep_inds = range( (me*CHUNKSIZE_1), ntemps )
+        my_rep_inds = range((me*CHUNKSIZE_1), ntemps)
 
     # get byte indices from replica (un-ordered) trajs. in parallel
-    get_byte_index(rep_inds = my_rep_inds,
-                   byteindfns = byteindfns,
-                   intrajfns = intrajfns)
+    get_byte_index(rep_inds=my_rep_inds,
+                   byteindfns=byteindfns,
+                   intrajfns=intrajfns)
 
     # block until all procs have finished
     comm.barrier()
@@ -520,7 +533,7 @@ if __name__ == "__main__":
     infobjs = [readwrite(i, "rb") for i in intrajfns]
 
     # open all byteindex files
-    byte_inds = dict( (i, np.loadtxt(fn)) for i, fn in enumerate(byteindfns) )
+    byte_inds = dict((i, np.loadtxt(fn)) for i, fn in enumerate(byteindfns))
 
     # define a chunk of output trajs. to process for each proc.
     # # of reordered trajs. to write may be less than the total # of replicas
@@ -536,38 +549,38 @@ if __name__ == "__main__":
     else:
         nproc_active = nproc
     if me < nproc_active-1:
-        my_temp_inds = range( (me*CHUNKSIZE_2), (me+1)*CHUNKSIZE_1 )
+        my_temp_inds = range((me*CHUNKSIZE_2), (me+1)*CHUNKSIZE_1)
     else:
-        my_temp_inds = range( (me*CHUNKSIZE_2), n_out_temps)
+        my_temp_inds = range((me*CHUNKSIZE_2), n_out_temps)
 
     # retire the excess procs
     # dont' forget to close any open file objects
     if me >= nproc_active:
-        for fobj in infobjs: fobj.close()
+        for fobj in infobjs:
+            fobj.close()
         exit()
 
     # write reordered trajectories to disk from active procs in parallel
-    write_reordered_traj(temp_inds = my_temp_inds,
-                         byte_inds = byte_inds,
-                         outtemps = out_temps, temps = temps,
-                         frametuple_dict = master_frametuple_dict,
-                         nprod = nprod, writefreq = writefreq,
-                         outtrajfns = outtrajfns,
-                         infobjs = infobjs)
+    write_reordered_traj(temp_inds=my_temp_inds,
+                         byte_inds=byte_inds,
+                         outtemps=out_temps, temps=temps,
+                         frametuple_dict=master_frametuple_dict,
+                         nprod=nprod, writefreq=writefreq,
+                         outtrajfns=outtrajfns,
+                         infobjs=infobjs)
 
     # calculate canonical log-weights if requested
     # usually this is very fast so retire all but the ROOT proc
-    if not get_logw: exit()
-    if not me == ROOT: exit()
-
-    logw = get_canonical_logw(enefn = enefn, temps = temps,
-                              frametuple_dict = master_frametuple_dict,
-                              nprod = nprod, writefreq = writefreq,
-                              kB = kB)
+    if not get_logw:
+        exit()
+    if not me == ROOT:
+        exit()
 
+    logw = get_canonical_logw(enefn=enefn, temps=temps,
+                              frametuple_dict=master_frametuple_dict,
+                              nprod=nprod, writefreq=writefreq,
+                              kB=kB)
 
     # save the logweights to a pickle
     with open(logwfn, 'wb') as of:
         pickle.dump(logw, of)
-
-

From 2c65df1bc2efd9c39aae3a3ceeca06fecf25b698 Mon Sep 17 00:00:00 2001
From: Tim Bernhard <tim@bernhard-webstudio.ch>
Date: Tue, 10 Nov 2020 16:29:02 +0100
Subject: [PATCH 33/44] Revert typo fix in python due to auto-formatter
 changing too much

---
 tools/replica/reorder_remd_traj.py | 231 ++++++++++++++---------------
 1 file changed, 109 insertions(+), 122 deletions(-)

diff --git a/tools/replica/reorder_remd_traj.py b/tools/replica/reorder_remd_traj.py
index 6eee4770ab..5033ae1e53 100644
--- a/tools/replica/reorder_remd_traj.py
+++ b/tools/replica/reorder_remd_traj.py
@@ -37,17 +37,13 @@ StringIO (or io if in Python 3.x)
 """
 
 
-import os
-import numpy as np
-import argparse
-import time
-import pickle
+
+import os, numpy as np, argparse, time, pickle
 from scipy.special import logsumexp
 from mpi4py import MPI
 
 from tqdm import tqdm
-import gzip
-import bz2
+import gzip, bz2
 try:
     # python-2
     from StringIO import StringIO as IOBuffer
@@ -56,11 +52,12 @@ except ImportError:
     from io import BytesIO as IOBuffer
 
 
+
 #### INITIALIZE MPI ####
 # (note that all output on screen will be printed only on the ROOT proc)
 ROOT = 0
 comm = MPI.COMM_WORLD
-me = comm.rank  # my proc id
+me = comm.rank # my proc id
 nproc = comm.size
 
 
@@ -80,8 +77,7 @@ def _get_nearest_temp(temps, query_temp):
     out_temp: nearest temp from the list
     """
 
-    if isinstance(temps, list):
-        temps = np.array(temps)
+    if isinstance(temps, list): temps = np.array(temps)
     return temps[np.argmin(np.abs(temps-query_temp))]
 
 
@@ -99,10 +95,10 @@ def readwrite(trajfn, mode):
 
     if trajfn.endswith(".gz"):
         of = gzip.open(trajfn, mode)
-        # return gzip.GzipFile(trajfn, mode)
+        #return gzip.GzipFile(trajfn, mode)
     elif trajfn.endswith(".bz2"):
         of = bz2.open(trajfn, mode)
-        # return bz2.BZ2File(trajfn, mode)
+        #return bz2.BZ2File(trajfn, mode)
     else:
         of = open(trajfn, mode)
     return of
@@ -127,8 +123,8 @@ def get_replica_frames(logfn, temps, nswap, writefreq):
     """
 
     n_rep = len(temps)
-    swap_history = np.loadtxt(logfn, skiprows=3)
-    master_frametuple_dict = dict((n, []) for n in range(n_rep))
+    swap_history = np.loadtxt(logfn, skiprows = 3)
+    master_frametuple_dict = dict( (n, []) for n in range(n_rep) )
 
     # walk through the replicas
     print("Getting frames from all replicas at temperature:")
@@ -140,15 +136,15 @@ def get_replica_frames(logfn, temps, nswap, writefreq):
         if writefreq <= nswap:
             for ii, i in enumerate(rep_inds[:-1]):
                 start = int(ii * nswap / writefreq)
-                stop = int((ii+1) * nswap / writefreq)
-                [master_frametuple_dict[n].append((i, x))
-                 for x in range(start, stop)]
+                stop = int( (ii+1) * nswap / writefreq)
+                [master_frametuple_dict[n].append( (i,x) ) \
+                                        for x in range(start, stop)]
 
         # case-2: when temps. are swapped faster than dumping frames
         else:
             nskip = int(writefreq / nswap)
-            [master_frametuple_dict[n].append((i, ii))
-             for ii, i in enumerate(rep_inds[0::nskip])]
+            [master_frametuple_dict[n].append( (i,ii) ) \
+            for ii, i in enumerate(rep_inds[0::nskip])]
 
     return master_frametuple_dict
 
@@ -165,12 +161,11 @@ def get_byte_index(rep_inds, byteindfns, intrajfns):
     """
     for n in rep_inds:
         # check if the byte indices for this traj has already been computed
-        if os.path.isfile(byteindfns[n]):
-            continue
+        if os.path.isfile(byteindfns[n]): continue
 
         # extract bytes
         fobj = readwrite(intrajfns[n], "rb")
-        byteinds = [[0, 0]]
+        byteinds = [ [0,0] ]
 
         # place file pointer at first line
         nframe = 0
@@ -180,37 +175,33 @@ def get_byte_index(rep_inds, byteindfns, intrajfns):
         # status printed only for replica read on root proc
         # this assumes that each proc takes roughly the same time
         if me == ROOT:
-            pb = tqdm(desc="Reading replicas", leave=True,
-                      position=ROOT + 2*me,
-                      unit="B/replica", unit_scale=True,
-                      unit_divisor=1024)
+            pb = tqdm(desc = "Reading replicas", leave = True,
+                  position = ROOT + 2*me,
+                  unit = "B/replica", unit_scale = True,
+                  unit_divisor = 1024)
 
         # start crawling through the bytes
         while True:
             next_line = fobj.readline()
-            if len(next_line) == 0:
-                break
+            if len(next_line) == 0: break
             # this will only work with lammpstrj traj format.
             # this condition essentially checks periodic recurrences
             # of the token TIMESTEP. Each time it is found,
             # we have crawled through a frame (snapshot)
             if next_line == first_line:
                 nframe += 1
-                byteinds.append([nframe, cur_pos])
-                if me == ROOT:
-                    pb.update()
+                byteinds.append( [nframe, cur_pos] )
+                if me == ROOT: pb.update()
             cur_pos = fobj.tell()
-            if me == ROOT:
-                pb.update(0)
-        if me == ROOT:
-            pb.close()
+            if me == ROOT: pb.update(0)
+        if me == ROOT: pb.close()
 
         # take care of the EOF
         cur_pos = fobj.tell()
-        byteinds.append([nframe+1, cur_pos])  # dummy index for the EOF
+        byteinds.append( [nframe+1, cur_pos] ) # dummy index for the EOF
 
         # write to file
-        np.savetxt(byteindfns[n], np.array(byteinds), fmt="%d")
+        np.savetxt(byteindfns[n], np.array(byteinds), fmt = "%d")
 
         # close the trajfile object
         fobj.close()
@@ -256,15 +247,15 @@ def write_reordered_traj(temp_inds, byte_inds, outtemps, temps,
         of = readwrite(outtrajfns[n], "wb")
 
         # get frames
-        abs_temp_ind = np.argmin(abs(temps - outtemps[n]))
+        abs_temp_ind = np.argmin( abs(temps - outtemps[n]) )
         frametuple = frametuple_dict[abs_temp_ind][-nframes:]
 
         # write frames to buffer
         if me == ROOT:
             pb = tqdm(frametuple,
-                      desc=("Buffering trajectories for writing"),
-                      leave=True, position=ROOT + 2*me,
-                      unit='frame/replica', unit_scale=True)
+                  desc = ("Buffering trajectories for writing"),
+                  leave = True, position = ROOT + 2*me,
+                  unit = 'frame/replica', unit_scale = True)
 
             iterable = pb
         else:
@@ -272,23 +263,20 @@ def write_reordered_traj(temp_inds, byte_inds, outtemps, temps,
 
         for i, (rep, frame) in enumerate(iterable):
             infobj = infobjs[rep]
-            start_ptr = int(byte_inds[rep][frame, 1])
-            stop_ptr = int(byte_inds[rep][frame+1, 1])
+            start_ptr = int(byte_inds[rep][frame,1])
+            stop_ptr = int(byte_inds[rep][frame+1,1])
             byte_len = stop_ptr - start_ptr
             infobj.seek(start_ptr)
             buf.write(infobj.read(byte_len))
-        if me == ROOT:
-            pb.close()
+        if me == ROOT: pb.close()
 
         # write buffer to disk
-        if me == ROOT:
-            print("Writing buffer to file")
+        if me == ROOT: print("Writing buffer to file")
         of.write(buf.getvalue())
         of.close()
         buf.close()
 
-    for i in infobjs:
-        i.close()
+    for i in infobjs: i.close()
 
     return
 
@@ -337,13 +325,13 @@ def get_canonical_logw(enefn, frametuple_dict, temps, nprod, writefreq,
               pip install --user pymbar
               sudo pip install pymbar
 
-              To install the dev. version directly from GitHub, use:
+              To install the dev. version directly from github, use:
               pip install pip install git+https://github.com/choderalab/pymbar.git
               """)
 
     u_rn = np.loadtxt(enefn)
-    ntemps = u_rn.shape[0]  # number of temps.
-    nframes = int(nprod / writefreq)  # number of frames at each temp.
+    ntemps = u_rn.shape[0] # number of temps.
+    nframes = int(nprod / writefreq) # number of frames at each temp.
 
     # reorder the temps
     u_kn = np.zeros([ntemps, nframes], float)
@@ -353,90 +341,91 @@ def get_canonical_logw(enefn, frametuple_dict, temps, nprod, writefreq,
             u_kn[k, i] = u_rn[rep, frame]
 
     # prep input for pymbar
-    # 1) array of frames at each temp.
+    #1) array of frames at each temp.
     nframes_k = nframes * np.ones(ntemps, np.uint8)
 
-    # 2) inverse temps. for chosen energy scale
+    #2) inverse temps. for chosen energy scale
     beta_k = 1.0 / (kB * temps)
 
-    # 3) get reduced energies (*ONLY FOR THE CANONICAL ENSEMBLE*)
+    #3) get reduced energies (*ONLY FOR THE CANONICAL ENSEMBLE*)
     u_kln = np.zeros([ntemps, ntemps, nframes], float)
     for k in range(ntemps):
         u_kln[k] = np.outer(beta_k, u_kn[k])
 
     # run pymbar and extract the free energies
     print("\nRunning pymbar...")
-    mbar = pymbar.mbar.MBAR(u_kln, nframes_k, verbose=True)
-    f_k = mbar.f_k  # (1 x k array)
+    mbar = pymbar.mbar.MBAR(u_kln, nframes_k, verbose = True)
+    f_k = mbar.f_k # (1 x k array)
 
     # calculate the log-weights
     print("\nExtracting log-weights...")
     log_nframes = np.log(nframes)
-    logw = dict((k, np.zeros([ntemps, nframes], float)) for k in range(ntemps))
+    logw = dict( (k, np.zeros([ntemps, nframes], float)) for k in range(ntemps) )
     # get log-weights to reweight to this temp.
     for k in range(ntemps):
         for n in range(nframes):
-            num = -beta_k[k] * u_kn[k, n]
-            denom = f_k - beta_k[k] * u_kn[k, n]
+            num = -beta_k[k] * u_kn[k,n]
+            denom = f_k - beta_k[k] * u_kn[k,n]
             for l in range(ntemps):
-                logw[l][k, n] = num - logsumexp(denom) - log_nframes
+                logw[l][k,n] = num - logsumexp(denom) - log_nframes
 
     return logw
 
 
+
 #### MAIN WORKFLOW ####
 if __name__ == "__main__":
     # accept user inputs
-    parser = argparse.ArgumentParser(description=__doc__,
-                                     formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser = argparse.ArgumentParser(description = __doc__,
+             formatter_class = argparse.RawDescriptionHelpFormatter)
 
     parser.add_argument("prefix",
-                        help="Prefix of REMD LAMMPS trajectories.\
+                        help = "Prefix of REMD LAMMPS trajectories.\
                         Supply full path. Trajectories assumed to be named as \
                         <prefix>.%%d.lammpstrj. \
                         Can be in compressed (.gz or .bz2) format. \
                         This is a required argument")
 
-    parser.add_argument("-logfn", "--logfn", default="log.lammps",
-                        help="LAMMPS log file that contains swap history \
+    parser.add_argument("-logfn", "--logfn", default = "log.lammps",
+                        help = "LAMMPS log file that contains swap history \
                         of temperatures among replicas. \
                         Default = 'lammps.log'")
 
-    parser.add_argument("-tfn", "--tempfn", default="temps.txt",
-                        help="ascii file (readable by numpy.loadtxt) with \
+    parser.add_argument("-tfn", "--tempfn", default = "temps.txt",
+                        help = "ascii file (readable by numpy.loadtxt) with \
                         the temperatures used in the REMD simulation.")
 
-    parser.add_argument("-ns", "--nswap", type=int,
-                        help="Swap frequency used in LAMMPS temper command")
+    parser.add_argument("-ns", "--nswap", type = int,
+                        help = "Swap frequency used in LAMMPS temper command")
 
-    parser.add_argument("-nw", "--nwrite", type=int, default=1,
-                        help="Trajectory writing frequency used \
+    parser.add_argument("-nw", "--nwrite", type = int, default = 1,
+                        help = "Trajectory writing frequency used \
                         in LAMMPS dump command")
 
-    parser.add_argument("-np", "--nprod", type=int, default=0,
-                        help="Number of timesteps to save in the reordered\
+    parser.add_argument("-np", "--nprod", type = int, default = 0,
+                        help = "Number of timesteps to save in the reordered\
                         trajectories.\
                         This should be in units of the LAMMPS timestep")
 
-    parser.add_argument("-logw", "--logw", action='store_true',
-                        help="Supplying this flag \
+    parser.add_argument("-logw", "--logw", action = 'store_true',
+                        help = "Supplying this flag \
                         calculates *canonical* (NVT ensemble) log weights")
 
     parser.add_argument("-e", "--enefn",
-                        help="File that has n_replica x n_frames array\
+                        help = "File that has n_replica x n_frames array\
                         of total potential energies")
 
     parser.add_argument("-kB", "--boltzmann_const",
-                        type=float, default=0.001987,
-                        help="Boltzmann constant in appropriate units. \
+                        type = float, default = 0.001987,
+                        help = "Boltzmann constant in appropriate units. \
                         Default is kcal/mol")
 
-    parser.add_argument("-ot", "--out_temps", nargs='+', type=np.float64,
-                        help="Reorder trajectories at these temperatures.\n \
+    parser.add_argument("-ot", "--out_temps", nargs = '+', type = np.float64,
+                        help = "Reorder trajectories at these temperatures.\n \
                         Default is all temperatures used in the simulation")
 
-    parser.add_argument("-od", "--outdir", default=".",
-                        help="All output will be saved to this directory")
+    parser.add_argument("-od", "--outdir", default = ".",
+                        help = "All output will be saved to this directory")
 
     # parse inputs
     args = parser.parse_args()
@@ -449,16 +438,14 @@ if __name__ == "__main__":
     nprod = args.nprod
 
     enefn = args.enefn
-    if not enefn is None:
-        enefn = os.path.abspath(enefn)
+    if not enefn is None: enefn = os.path.abspath(enefn)
     get_logw = args.logw
     kB = args.boltzmann_const
 
     out_temps = args.out_temps
     outdir = os.path.abspath(args.outdir)
     if not os.path.isdir(outdir):
-        if me == ROOT:
-            os.mkdir(outdir)
+        if me == ROOT: os.mkdir(outdir)
 
     # check that all input files are present (only on the ROOT proc)
     if me == ROOT:
@@ -478,8 +465,7 @@ if __name__ == "__main__":
     for i in range(ntemps):
         this_intrajfn = intrajfns[i]
         x = this_intrajfn + ".gz"
-        if os.path.isfile(this_intrajfn):
-            continue
+        if os.path.isfile(this_intrajfn): continue
         elif os.path.isfile(this_intrajfn + ".gz"):
             intrajfns[i] = this_intrajfn + ".gz"
         elif os.path.isfile(this_intrajfn + ".bz2"):
@@ -490,41 +476,42 @@ if __name__ == "__main__":
 
     # set output filenames
     outprefix = os.path.join(outdir, traj_prefix.split('/')[-1])
-    outtrajfns = ["%s.%3.2f.lammpstrj.gz" %
-                  (outprefix, _get_nearest_temp(temps, t))
+    outtrajfns = ["%s.%3.2f.lammpstrj.gz" % \
+                 (outprefix, _get_nearest_temp(temps, t)) \
                   for t in out_temps]
-    byteindfns = [os.path.join(outdir, ".byteind_%d.gz" % k)
+    byteindfns = [os.path.join(outdir, ".byteind_%d.gz" % k) \
                   for k in range(ntemps)]
     frametuplefn = outprefix + '.frametuple.pickle'
     if get_logw:
         logwfn = outprefix + ".logw.pickle"
 
+
     # get a list of all frames at a particular temp visited by each replica
     # this is fast so run only on ROOT proc.
     master_frametuple_dict = {}
     if me == ROOT:
-        master_frametuple_dict = get_replica_frames(logfn=logfn,
-                                                    temps=temps,
-                                                    nswap=nswap,
-                                                    writefreq=writefreq)
+        master_frametuple_dict = get_replica_frames(logfn = logfn,
+                                                    temps = temps,
+                                                    nswap = nswap,
+                                                    writefreq = writefreq)
         # save to a pickle from the ROOT proc
         with open(frametuplefn, 'wb') as of:
             pickle.dump(master_frametuple_dict, of)
 
     # broadcast to all procs
-    master_frametuple_dict = comm.bcast(master_frametuple_dict, root=ROOT)
+    master_frametuple_dict = comm.bcast(master_frametuple_dict, root = ROOT)
 
     # define a chunk of replicas  to process on each proc
     CHUNKSIZE_1 = int(ntemps/nproc)
     if me < nproc - 1:
-        my_rep_inds = range((me*CHUNKSIZE_1), (me+1)*CHUNKSIZE_1)
+        my_rep_inds = range( (me*CHUNKSIZE_1), (me+1)*CHUNKSIZE_1 )
     else:
-        my_rep_inds = range((me*CHUNKSIZE_1), ntemps)
+        my_rep_inds = range( (me*CHUNKSIZE_1), ntemps )
 
     # get byte indices from replica (un-ordered) trajs. in parallel
-    get_byte_index(rep_inds=my_rep_inds,
-                   byteindfns=byteindfns,
-                   intrajfns=intrajfns)
+    get_byte_index(rep_inds = my_rep_inds,
+                   byteindfns = byteindfns,
+                   intrajfns = intrajfns)
 
     # block until all procs have finished
     comm.barrier()
@@ -533,7 +520,7 @@ if __name__ == "__main__":
     infobjs = [readwrite(i, "rb") for i in intrajfns]
 
     # open all byteindex files
-    byte_inds = dict((i, np.loadtxt(fn)) for i, fn in enumerate(byteindfns))
+    byte_inds = dict( (i, np.loadtxt(fn)) for i, fn in enumerate(byteindfns) )
 
     # define a chunk of output trajs. to process for each proc.
     # # of reordered trajs. to write may be less than the total # of replicas
@@ -549,38 +536,38 @@ if __name__ == "__main__":
     else:
         nproc_active = nproc
     if me < nproc_active-1:
-        my_temp_inds = range((me*CHUNKSIZE_2), (me+1)*CHUNKSIZE_1)
+        my_temp_inds = range( (me*CHUNKSIZE_2), (me+1)*CHUNKSIZE_1 )
     else:
-        my_temp_inds = range((me*CHUNKSIZE_2), n_out_temps)
+        my_temp_inds = range( (me*CHUNKSIZE_2), n_out_temps)
 
     # retire the excess procs
     # dont' forget to close any open file objects
     if me >= nproc_active:
-        for fobj in infobjs:
-            fobj.close()
+        for fobj in infobjs: fobj.close()
         exit()
 
     # write reordered trajectories to disk from active procs in parallel
-    write_reordered_traj(temp_inds=my_temp_inds,
-                         byte_inds=byte_inds,
-                         outtemps=out_temps, temps=temps,
-                         frametuple_dict=master_frametuple_dict,
-                         nprod=nprod, writefreq=writefreq,
-                         outtrajfns=outtrajfns,
-                         infobjs=infobjs)
+    write_reordered_traj(temp_inds = my_temp_inds,
+                         byte_inds = byte_inds,
+                         outtemps = out_temps, temps = temps,
+                         frametuple_dict = master_frametuple_dict,
+                         nprod = nprod, writefreq = writefreq,
+                         outtrajfns = outtrajfns,
+                         infobjs = infobjs)
 
     # calculate canonical log-weights if requested
     # usually this is very fast so retire all but the ROOT proc
-    if not get_logw:
-        exit()
-    if not me == ROOT:
-        exit()
+    if not get_logw: exit()
+    if not me == ROOT: exit()
+
+    logw = get_canonical_logw(enefn = enefn, temps = temps,
+                              frametuple_dict = master_frametuple_dict,
+                              nprod = nprod, writefreq = writefreq,
+                              kB = kB)
 
-    logw = get_canonical_logw(enefn=enefn, temps=temps,
-                              frametuple_dict=master_frametuple_dict,
-                              nprod=nprod, writefreq=writefreq,
-                              kB=kB)
 
     # save the logweights to a pickle
     with open(logwfn, 'wb') as of:
         pickle.dump(logw, of)
+
+

From d1ce362fca80f5240ad9e36c6bd5d65e0c76fea4 Mon Sep 17 00:00:00 2001
From: Tim Bernhard <tim@bernhard-webstudio.ch>
Date: Tue, 10 Nov 2020 17:15:42 +0100
Subject: [PATCH 34/44] Remove wrong word 'regoin' from false positive list

---
 doc/utils/sphinx-config/false_positives.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt
index 3ef0b904eb..6843118686 100644
--- a/doc/utils/sphinx-config/false_positives.txt
+++ b/doc/utils/sphinx-config/false_positives.txt
@@ -2614,7 +2614,6 @@ Ree
 refactored
 refactoring
 reflectionstyle
-regoin
 Reinders
 reinit
 relaxbox

From 2f3cbfed1304d9c263ed52698fa2ea263f776a40 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Tue, 10 Nov 2020 17:58:26 -0500
Subject: [PATCH 35/44] add CMake code to download and compile libyaml if not
 found locally

---
 cmake/Modules/YAML.cmake             | 32 ++++++++++++++++++++++++++++
 unittest/force-styles/CMakeLists.txt |  4 ++--
 2 files changed, 34 insertions(+), 2 deletions(-)
 create mode 100644 cmake/Modules/YAML.cmake

diff --git a/cmake/Modules/YAML.cmake b/cmake/Modules/YAML.cmake
new file mode 100644
index 0000000000..05163675df
--- /dev/null
+++ b/cmake/Modules/YAML.cmake
@@ -0,0 +1,32 @@
+message(STATUS "Downloading and building YAML library")
+
+include(ExternalProject)
+set(YAML_URL "https://pyyaml.org/download/libyaml/yaml-0.2.5.tar.gz" CACHE STRING "URL for libyaml tarball")
+mark_as_advanced(YAML_URL)
+ExternalProject_Add(libyaml
+                    URL               ${YAML_URL}
+                    URL_MD5           bb15429d8fb787e7d3f1c83ae129a999  
+                    SOURCE_DIR        "${CMAKE_BINARY_DIR}/yaml-src"
+                    BINARY_DIR        "${CMAKE_BINARY_DIR}/yaml-build"
+                    CONFIGURE_COMMAND <SOURCE_DIR>/configure ${CONFIGURE_REQUEST_PIC}
+                                      CXX=${CMAKE_CXX_COMPILER}
+                                      CC=${CMAKE_C_COMPILER}
+                                      --prefix=<INSTALL_DIR> --disable-shared
+                    BUILD_BYPRODUCTS  <INSTALL_DIR>/lib/${CMAKE_FIND_LIBRARY_PREFIXES}yaml.a
+                    TEST_COMMAND      "")
+
+ExternalProject_Get_Property(libyaml INSTALL_DIR)
+set(YAML_INCLUDE_DIR ${INSTALL_DIR}/include)
+set(YAML_LIBRARY_DIR ${INSTALL_DIR}/lib)
+
+# workaround for CMake 3.10 on ubuntu 18.04
+file(MAKE_DIRECTORY ${YAML_INCLUDE_DIR})
+file(MAKE_DIRECTORY ${YAML_LIBRARY_DIR})
+
+set(YAML_LIBRARY_PATH ${INSTALL_DIR}/lib/${CMAKE_FIND_LIBRARY_PREFIXES}yaml.a)
+
+add_library(Yaml::Yaml UNKNOWN IMPORTED)
+set_target_properties(Yaml::Yaml PROPERTIES
+        IMPORTED_LOCATION ${YAML_LIBRARY_PATH}
+        INTERFACE_INCLUDE_DIRECTORIES ${YAML_INCLUDE_DIR})
+add_dependencies(Yaml::Yaml libyaml)
diff --git a/unittest/force-styles/CMakeLists.txt b/unittest/force-styles/CMakeLists.txt
index 128dc62cff..1d7dc937eb 100644
--- a/unittest/force-styles/CMakeLists.txt
+++ b/unittest/force-styles/CMakeLists.txt
@@ -1,8 +1,8 @@
 
 find_package(YAML)
 if(NOT YAML_FOUND)
-  message(STATUS "Skipping tests because libyaml is not found")
-  return()
+  # download and build a local copy of libyaml
+  include(YAML)
 endif()
 
 if(CMAKE_VERSION VERSION_LESS 3.12)

From 2c6ccf0d0f0da1b63221dc34f34457c3480c8223 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Tue, 10 Nov 2020 18:04:00 -0500
Subject: [PATCH 36/44] update docs for download and compilation of yaml
 sources

---
 doc/src/Build_development.rst | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/doc/src/Build_development.rst b/doc/src/Build_development.rst
index cf3e2fb750..1b076caac0 100644
--- a/doc/src/Build_development.rst
+++ b/doc/src/Build_development.rst
@@ -111,8 +111,10 @@ error margin).  The status of this automated testing can be viewed on
 The unit testing facility is integrated into the CMake build process
 of the LAMMPS source code distribution itself.  It can be enabled by
 setting ``-D ENABLE_TESTING=on`` during the CMake configuration step.
-It requires the `PyYAML <http://pyyaml.org/>`_ library and development
-headers to compile and will download and compile a recent version of the
+It requires the `YAML <http://pyyaml.org/>`_ library and development
+headers (if not found locally a recent version will be downloaded
+and compiled transparently) to compile and will download and compile
+a specific recent version of the
 `Googletest <https://github.com/google/googletest/>`_ C++ test framework
 for implementing the tests.
 

From 552dc7fba90af230c811abaedd96f180e95a2f02 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Tue, 10 Nov 2020 18:05:06 -0500
Subject: [PATCH 37/44] whitespace

---
 cmake/Modules/YAML.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/Modules/YAML.cmake b/cmake/Modules/YAML.cmake
index 05163675df..a080b566be 100644
--- a/cmake/Modules/YAML.cmake
+++ b/cmake/Modules/YAML.cmake
@@ -5,7 +5,7 @@ set(YAML_URL "https://pyyaml.org/download/libyaml/yaml-0.2.5.tar.gz" CACHE STRIN
 mark_as_advanced(YAML_URL)
 ExternalProject_Add(libyaml
                     URL               ${YAML_URL}
-                    URL_MD5           bb15429d8fb787e7d3f1c83ae129a999  
+                    URL_MD5           bb15429d8fb787e7d3f1c83ae129a999
                     SOURCE_DIR        "${CMAKE_BINARY_DIR}/yaml-src"
                     BINARY_DIR        "${CMAKE_BINARY_DIR}/yaml-build"
                     CONFIGURE_COMMAND <SOURCE_DIR>/configure ${CONFIGURE_REQUEST_PIC}

From 39bc47a4da261b5d7a5db1057cf806d1aae62664 Mon Sep 17 00:00:00 2001
From: Tim Bernhard <tim@bernhard-webstudio.ch>
Date: Thu, 12 Nov 2020 13:35:04 +0100
Subject: [PATCH 38/44] Fix inconsistent formatting in Error & Warning doc

---
 doc/src/Errors_warnings.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/src/Errors_warnings.rst b/doc/src/Errors_warnings.rst
index 306c9b7b31..4f29fad9dd 100644
--- a/doc/src/Errors_warnings.rst
+++ b/doc/src/Errors_warnings.rst
@@ -119,7 +119,6 @@ Doc page with :doc:`ERROR messages <Errors_messages>`
    :doc:`pair style zero <pair_zero>` with a suitable cutoff or use :doc:`comm_modify cutoff <comm_modify>`.
 
 *Communication cutoff is shorter than a bond length based estimate. This may lead to errors.*
-
    Since LAMMPS stores topology data with individual atoms, all atoms
    comprising a bond, angle, dihedral or improper must be present on any
    sub-domain that "owns" the atom with the information, either as a

From 3991f704e1990b827d0bfa69ef5fc425430799e0 Mon Sep 17 00:00:00 2001
From: Richard Berger <richard.berger@temple.edu>
Date: Thu, 12 Nov 2020 10:42:09 -0500
Subject: [PATCH 39/44] Fix whitespace errors

---
 doc/src/pair_spin_exchange.rst              | 78 ++++++++++-----------
 src/SPIN/compute_spin.cpp                   |  6 +-
 src/SPIN/pair_spin_dipole_cut.cpp           |  8 +--
 src/SPIN/pair_spin_dipole_long.cpp          |  2 +-
 src/SPIN/pair_spin_dmi.cpp                  |  4 +-
 src/SPIN/pair_spin_exchange.cpp             | 34 ++++-----
 src/SPIN/pair_spin_exchange_biquadratic.cpp | 40 +++++------
 src/SPIN/pair_spin_exchange_biquadratic.h   |  2 +-
 src/SPIN/pair_spin_magelec.cpp              |  4 +-
 9 files changed, 89 insertions(+), 89 deletions(-)

diff --git a/doc/src/pair_spin_exchange.rst b/doc/src/pair_spin_exchange.rst
index 72c416ac72..9e6e534280 100644
--- a/doc/src/pair_spin_exchange.rst
+++ b/doc/src/pair_spin_exchange.rst
@@ -40,53 +40,53 @@ pairs of magnetic spins:
    H_{ex} = -\sum_{i,j}^N J_{ij} (r_{ij}) \,\vec{s}_i \cdot \vec{s}_j
 
 where :math:`\vec{s}_i` and :math:`\vec{s}_j` are two unit vectors representing
-the magnetic spins of two particles (usually atoms), and 
-:math:`r_{ij} = \vert \vec{r}_i - \vec{r}_j \vert` is the inter-atomic distance 
-between those two particles. The summation is over pairs of nearest neighbors. 
-:math:`J(r_{ij})` is a function defining the intensity and the sign of the 
-exchange interaction for different neighboring shells. 
+the magnetic spins of two particles (usually atoms), and
+:math:`r_{ij} = \vert \vec{r}_i - \vec{r}_j \vert` is the inter-atomic distance
+between those two particles. The summation is over pairs of nearest neighbors.
+:math:`J(r_{ij})` is a function defining the intensity and the sign of the
+exchange interaction for different neighboring shells.
 
-Style *spin/exchange/biquadratic* computes a biquadratic exchange interaction 
+Style *spin/exchange/biquadratic* computes a biquadratic exchange interaction
 between pairs of magnetic spins:
 
 .. math::
-  
+
    H_{bi} = -\sum_{i, j}^{N} {J}_{ij} \left(r_{ij} \right)\,
-                      \vec{s}_{i}\cdot \vec{s}_{j} 
+                      \vec{s}_{i}\cdot \vec{s}_{j}
                       -\sum_{i, j}^{N} {K}_{ij} \left(r_{ij} \right)\,
-                      \left(\vec{s}_{i}\cdot 
+                      \left(\vec{s}_{i}\cdot
                       \vec{s}_{j}\right)^2
 
-where :math:`\vec{s}_i`,  :math:`\vec{s}_j`,  :math:`r_{ij}` and 
-:math:`J(r_{ij})` have the same definitions as above, and :math:`K(r_{ij})` is 
+where :math:`\vec{s}_i`,  :math:`\vec{s}_j`,  :math:`r_{ij}` and
+:math:`J(r_{ij})` have the same definitions as above, and :math:`K(r_{ij})` is
 a second function, defining the intensity and the sign of the biquadratic term.
 
-The interatomic dependence of :math:`J(r_{ij})` and :math:`K(r_{ij})` in both 
+The interatomic dependence of :math:`J(r_{ij})` and :math:`K(r_{ij})` in both
 interactions above is defined by the following function:
 
 .. math::
 
-    {f}\left( r_{ij} \right) = 4 a \left( \frac{r_{ij}}{d}  \right)^2 
-    \left( 1 - b \left( \frac{r_{ij}}{d}  \right)^2 \right) 
+    {f}\left( r_{ij} \right) = 4 a \left( \frac{r_{ij}}{d}  \right)^2
+    \left( 1 - b \left( \frac{r_{ij}}{d}  \right)^2 \right)
     e^{-\left( \frac{r_{ij}}{d} \right)^2 }\Theta (R_c - r_{ij})
 
-where :math:`a`, :math:`b` and :math:`d` are the three constant coefficients 
-defined in the associated "pair_coeff" command, and :math:`R_c` is the radius 
+where :math:`a`, :math:`b` and :math:`d` are the three constant coefficients
+defined in the associated "pair_coeff" command, and :math:`R_c` is the radius
 cutoff associated to the pair interaction (see below for more explanations).
 
-The coefficients :math:`a`, :math:`b`, and :math:`d` need to be fitted so that 
-the function above matches with the value of the exchange interaction for the 
+The coefficients :math:`a`, :math:`b`, and :math:`d` need to be fitted so that
+the function above matches with the value of the exchange interaction for the
 :math:`N` neighbor shells taken into account.
-Examples and more explanations about this function and its parameterization 
+Examples and more explanations about this function and its parameterization
 are reported in :ref:`(Tranchida) <Tranchida3>`.
 
-When a *spin/exchange/biquadratic* pair style is defined, six coefficients 
-(three for :math:`J(r_{ij})`, and three for :math:`K(r_{ij})`) have to be 
+When a *spin/exchange/biquadratic* pair style is defined, six coefficients
+(three for :math:`J(r_{ij})`, and three for :math:`K(r_{ij})`) have to be
 fitted.
 
 From this exchange interaction, each spin :math:`i` will be submitted
-to a magnetic torque :math:`\vec{\omega}_{i}`, and its associated atom can be 
-submitted to a force :math:`\vec{F}_{i}` for spin-lattice calculations (see 
+to a magnetic torque :math:`\vec{\omega}_{i}`, and its associated atom can be
+submitted to a force :math:`\vec{F}_{i}` for spin-lattice calculations (see
 :doc:`fix nve/spin <fix_nve_spin>`), such as:
 
 .. math::
@@ -94,22 +94,22 @@ submitted to a force :math:`\vec{F}_{i}` for spin-lattice calculations (see
    \vec{\omega}_{i} = \frac{1}{\hbar} \sum_{j}^{Neighb} {J}
    \left(r_{ij} \right)\,\vec{s}_{j}
    ~~{\rm and}~~
-   \vec{F}_{i} = \sum_{j}^{Neighb} \frac{\partial {J} \left(r_{ij} \right)}{ 
+   \vec{F}_{i} = \sum_{j}^{Neighb} \frac{\partial {J} \left(r_{ij} \right)}{
    \partial r_{ij}} \left( \vec{s}_{i}\cdot \vec{s}_{j} \right) \vec{e}_{ij}
 
 with :math:`\hbar` the Planck constant (in metal units), and :math:`\vec{e}_{ij}
 = \frac{\vec{r}_i - \vec{r}_j}{\vert \vec{r}_i-\vec{r}_j \vert}` the unit
 vector between sites :math:`i` and :math:`j`.
-Equivalent forces and magnetic torques are generated for the biquadratic term 
+Equivalent forces and magnetic torques are generated for the biquadratic term
 when a *spin/exchange/biquadratic* pair style is defined.
 
 More details about the derivation of these torques/forces are reported in
 :ref:`(Tranchida) <Tranchida3>`.
 
-For the *spin/exchange* and *spin/exchange/biquadratic* pair styles, the 
-following coefficients must be defined for each pair of atoms types via the 
-:doc:`pair_coeff <pair_coeff>` command as in the examples above, or in the data 
-file or restart files read by the :doc:`read_data <read_data>` or 
+For the *spin/exchange* and *spin/exchange/biquadratic* pair styles, the
+following coefficients must be defined for each pair of atoms types via the
+:doc:`pair_coeff <pair_coeff>` command as in the examples above, or in the data
+file or restart files read by the :doc:`read_data <read_data>` or
 :doc:`read_restart <read_restart>` commands, and set in the following order:
 
 * :math:`R_c` (distance units)
@@ -129,10 +129,10 @@ for the *spin/exchange* pair style, and:
 
 for the *spin/exchange/biquadratic* pair style.
 
-Note that :math:`R_c` is the radius cutoff of the considered exchange 
-interaction, and :math:`a`, :math:`b` and :math:`d` are the three coefficients 
-performing the parameterization of the function :math:`J(r_{ij})` defined 
-above (in the *biquadratic* style, :math:`a_j`, :math:`b_j`, :math:`d_j` and 
+Note that :math:`R_c` is the radius cutoff of the considered exchange
+interaction, and :math:`a`, :math:`b` and :math:`d` are the three coefficients
+performing the parameterization of the function :math:`J(r_{ij})` defined
+above (in the *biquadratic* style, :math:`a_j`, :math:`b_j`, :math:`d_j` and
 :math:`a_k`, :math:`b_k`, :math:`d_k` are the coefficients of :math:`J(r_{ij})`
 and :math:`K(r_{ij})` respectively).
 
@@ -147,7 +147,7 @@ None of those coefficients is optional. If not specified, the
 For spin-lattice simulation, it can be useful to offset the
 mechanical forces and energies generated by the exchange
 interaction.
-The *offset* keyword allows to apply this offset. 
+The *offset* keyword allows to apply this offset.
 By setting *offset* to *yes*, the energy definitions above are
 replaced by:
 
@@ -155,14 +155,14 @@ replaced by:
 
    H_{ex} = -\sum_{i,j}^N J_{ij} (r_{ij}) \,[ \vec{s}_i \cdot \vec{s}_j-1 ]
 
-for the *spin/exchange* pair style, and:  
+for the *spin/exchange* pair style, and:
 
 .. math::
-  
+
    H_{bi} = -\sum_{i, j}^{N} {J}_{ij} \left(r_{ij} \right)\,
                       [ \vec{s}_{i}\cdot \vec{s}_{j} -1 ]
                       -\sum_{i, j}^{N} {K}_{ij} \left(r_{ij} \right)\,
-                      [ \left(\vec{s}_{i}\cdot 
+                      [ \left(\vec{s}_{i}\cdot
                       \vec{s}_{j}\right)^2 -1]
 
 for the *spin/exchange/biquadratic* pair style.
@@ -173,7 +173,7 @@ precession vectors (and thus does no impact the purely magnetic
 properties).
 This ensures that when all spins are aligned, the magnetic energy
 and the associated mechanical forces (and thus the pressure
-generated by the magnetic potential) are null. 
+generated by the magnetic potential) are null.
 
 .. note::
   This offset term can be very important when calculations such as
@@ -194,7 +194,7 @@ Restrictions
 
 All the *pair/spin* styles are part of the SPIN package.  These styles
 are only enabled if LAMMPS was built with this package, and if the
-atom_style "spin" was declared.  
+atom_style "spin" was declared.
 See the :doc:`Build package <Build_package>` doc page for more info.
 
 Related commands
diff --git a/src/SPIN/compute_spin.cpp b/src/SPIN/compute_spin.cpp
index 3e4970a62b..8e44ea7b84 100644
--- a/src/SPIN/compute_spin.cpp
+++ b/src/SPIN/compute_spin.cpp
@@ -178,7 +178,7 @@ void ComputeSpin::compute_vector()
   for (i = 0; i < nlocal; i++) {
     if (mask[i] & groupbit) {
       if (atom->sp_flag) {
-        
+
         // compute first moment
 
         mag[0] += sp[i][0];
@@ -223,9 +223,9 @@ void ComputeSpin::compute_vector()
   magtot[1] *= scale;
   magtot[2] *= scale;
   magtot[3] = sqrt((magtot[0]*magtot[0])+(magtot[1]*magtot[1])+(magtot[2]*magtot[2]));
-  
+
   // compute spin temperature
-  
+
   spintemperature = hbar*tempnumtot;
   spintemperature /= (2.0*kb*tempdenomtot);
 
diff --git a/src/SPIN/pair_spin_dipole_cut.cpp b/src/SPIN/pair_spin_dipole_cut.cpp
index b4355fd640..7ba81d93f8 100644
--- a/src/SPIN/pair_spin_dipole_cut.cpp
+++ b/src/SPIN/pair_spin_dipole_cut.cpp
@@ -234,14 +234,14 @@ void PairSpinDipoleCut::compute(int eflag, int vflag)
       local_cut2 = cut_spin_long[itype][jtype]*cut_spin_long[itype][jtype];
 
       // compute dipolar interaction
-      
+
       if (rsq < local_cut2) {
         r2inv = 1.0/rsq;
         r3inv = r2inv*rinv;
 
         compute_dipolar(i,j,eij,fmi,spi,spj,r3inv);
-        
-        if (lattice_flag) 
+
+        if (lattice_flag)
           compute_dipolar_mech(i,j,eij,fi,spi,spj,r2inv);
 
         if (eflag) {
@@ -269,7 +269,7 @@ void PairSpinDipoleCut::compute(int eflag, int vflag)
       }
     }
   }
-  
+
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
diff --git a/src/SPIN/pair_spin_dipole_long.cpp b/src/SPIN/pair_spin_dipole_long.cpp
index 836b889513..3b4c861e0c 100644
--- a/src/SPIN/pair_spin_dipole_long.cpp
+++ b/src/SPIN/pair_spin_dipole_long.cpp
@@ -310,7 +310,7 @@ void PairSpinDipoleLong::compute(int eflag, int vflag)
       }
     }
   }
-  
+
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
diff --git a/src/SPIN/pair_spin_dmi.cpp b/src/SPIN/pair_spin_dmi.cpp
index 69a9873303..e6ed5e4609 100644
--- a/src/SPIN/pair_spin_dmi.cpp
+++ b/src/SPIN/pair_spin_dmi.cpp
@@ -244,7 +244,7 @@ void PairSpinDmi::compute(int eflag, int vflag)
 
       if (rsq <= local_cut2) {
         compute_dmi(i,j,eij,fmi,spj);
-        
+
         if (lattice_flag)
           compute_dmi_mech(i,j,rsq,eij,fi,spi,spj);
 
@@ -253,7 +253,7 @@ void PairSpinDmi::compute(int eflag, int vflag)
           evdwl *= 0.5*hbar;
           emag[i] += evdwl;
         } else evdwl = 0.0;
-        
+
         f[i][0] += fi[0];
         f[i][1] += fi[1];
         f[i][2] += fi[2];
diff --git a/src/SPIN/pair_spin_exchange.cpp b/src/SPIN/pair_spin_exchange.cpp
index bccde3f66b..b7dd6ffc17 100644
--- a/src/SPIN/pair_spin_exchange.cpp
+++ b/src/SPIN/pair_spin_exchange.cpp
@@ -37,8 +37,8 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairSpinExchange::PairSpinExchange(LAMMPS *lmp) : 
-  PairSpin(lmp) 
+PairSpinExchange::PairSpinExchange(LAMMPS *lmp) :
+  PairSpin(lmp)
 {
   e_offset = 0;
 }
@@ -66,7 +66,7 @@ PairSpinExchange::~PairSpinExchange()
 void PairSpinExchange::settings(int narg, char **arg)
 {
   PairSpin::settings(narg,arg);
-  
+
   if (narg != 1) error->all(FLERR,"Illegal pair_style command");
 
   cut_spin_exchange_global = utils::numeric(FLERR,arg[0],false,lmp);
@@ -112,17 +112,17 @@ void PairSpinExchange::coeff(int narg, char **arg)
 
   // read energy offset flag if specified
 
-  while (iarg < narg) { 
-    if (strcmp(arg[7],"offset") == 0) { 
+  while (iarg < narg) {
+    if (strcmp(arg[7],"offset") == 0) {
       if (strcmp(arg[8],"yes") == 0) {
         e_offset = 1;
       } else if  (strcmp(arg[8],"no") == 0) {
         e_offset = 0;
       } else error->all(FLERR,"Incorrect args for pair coefficients");
-      iarg += 2; 
+      iarg += 2;
     } else error->all(FLERR,"Incorrect args for pair coefficients");
   }
-  
+
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
@@ -252,10 +252,10 @@ void PairSpinExchange::compute(int eflag, int vflag)
 
       if (rsq <= local_cut2) {
         compute_exchange(i,j,rsq,fmi,spj);
-        
+
         if (lattice_flag)
           compute_exchange_mech(i,j,rsq,eij,fi,spi,spj);
-        
+
         if (eflag) {
           evdwl -= compute_energy(i,j,rsq,spi,spj);
           emag[i] += evdwl;
@@ -388,7 +388,7 @@ void PairSpinExchange::compute_exchange(int i, int j, double rsq, double fmi[3],
    compute the mechanical force due to the exchange interaction between atom i and atom j
 ------------------------------------------------------------------------- */
 
-void PairSpinExchange::compute_exchange_mech(int i, int j, double rsq, 
+void PairSpinExchange::compute_exchange_mech(int i, int j, double rsq,
     double eij[3], double fi[3],  double spi[3], double spj[3])
 {
   int *type = atom->type;
@@ -407,11 +407,11 @@ void PairSpinExchange::compute_exchange_mech(int i, int j, double rsq,
 
   Jex_mech = 1.0-ra-J2[itype][jtype]*ra*(2.0-ra);
   Jex_mech *= 8.0*Jex*rr*exp(-ra);
-  
+
   sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
-  
+
   // apply or not energy and force offset
-  
+
   fx = fy = fz = 0.0;
   if (e_offset == 1) { // set offset
     fx = Jex_mech*(sdots-1.0)*eij[0];
@@ -446,17 +446,17 @@ double PairSpinExchange::compute_energy(int i, int j, double rsq, double spi[3],
   Jex = 4.0*Jex*ra;
   Jex *= (1.0-J2[itype][jtype]*ra);
   Jex *= exp(-ra);
-  
-  sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);  
+
+  sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
 
   // apply or not energy and force offset
-  
+
   if (e_offset == 1) { // set offset
     energy = 0.5*Jex*(sdots-1.0);
   } else if (e_offset == 0) { // no offset ("normal" calculation)
     energy = 0.5*Jex*sdots;
   } else error->all(FLERR,"Illegal option in pair exchange/biquadratic command");
-  
+
   return energy;
 }
 
diff --git a/src/SPIN/pair_spin_exchange_biquadratic.cpp b/src/SPIN/pair_spin_exchange_biquadratic.cpp
index 36f3dbcf5e..59b959f4cc 100644
--- a/src/SPIN/pair_spin_exchange_biquadratic.cpp
+++ b/src/SPIN/pair_spin_exchange_biquadratic.cpp
@@ -37,8 +37,8 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairSpinExchangeBiquadratic::PairSpinExchangeBiquadratic(LAMMPS *lmp) : 
-  PairSpin(lmp) 
+PairSpinExchangeBiquadratic::PairSpinExchangeBiquadratic(LAMMPS *lmp) :
+  PairSpin(lmp)
 {
   e_offset = 0;
 }
@@ -119,14 +119,14 @@ void PairSpinExchangeBiquadratic::coeff(int narg, char **arg)
 
   // read energy offset flag if specified
 
-  while (iarg < narg) { 
-    if (strcmp(arg[10],"offset") == 0) { 
+  while (iarg < narg) {
+    if (strcmp(arg[10],"offset") == 0) {
       if (strcmp(arg[11],"yes") == 0) {
         e_offset = 1;
       } else if  (strcmp(arg[11],"no") == 0) {
         e_offset = 0;
       } else error->all(FLERR,"Incorrect args for pair coefficients");
-      iarg += 2; 
+      iarg += 2;
     } else error->all(FLERR,"Incorrect args for pair coefficients");
   }
 
@@ -267,10 +267,10 @@ void PairSpinExchangeBiquadratic::compute(int eflag, int vflag)
 
       if (rsq <= local_cut2) {
         compute_exchange(i,j,rsq,fmi,spi,spj);
-        
+
         if (lattice_flag)
           compute_exchange_mech(i,j,rsq,eij,fi,spi,spj);
-      
+
         if (eflag) {
           evdwl -= compute_energy(i,j,rsq,spi,spj);
           emag[i] += evdwl;
@@ -384,7 +384,7 @@ void PairSpinExchangeBiquadratic::compute_single_pair(int ii, double fmi[3])
    compute exchange interaction between spins i and j
 ------------------------------------------------------------------------- */
 
-void PairSpinExchangeBiquadratic::compute_exchange(int i, int j, double rsq, 
+void PairSpinExchangeBiquadratic::compute_exchange(int i, int j, double rsq,
     double fmi[3], double spi[3], double spj[3])
 {
   int *type = atom->type;
@@ -395,7 +395,7 @@ void PairSpinExchangeBiquadratic::compute_exchange(int i, int j, double rsq,
 
   r2j = rsq/J3[itype][jtype]/J3[itype][jtype];
   r2k = rsq/J3[itype][jtype]/J3[itype][jtype];
- 
+
   Jex = 4.0*J1_mag[itype][jtype]*r2j;
   Jex *= (1.0-J2[itype][jtype]*r2j);
   Jex *= exp(-r2j);
@@ -403,7 +403,7 @@ void PairSpinExchangeBiquadratic::compute_exchange(int i, int j, double rsq,
   Kex = 4.0*K1_mag[itype][jtype]*r2k;
   Kex *= (1.0-K2[itype][jtype]*r2k);
   Kex *= exp(-r2k);
-  
+
   sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
 
   fmi[0] += (Jex*spj[0] + 2.0*Kex*spj[0]*sdots);
@@ -415,7 +415,7 @@ void PairSpinExchangeBiquadratic::compute_exchange(int i, int j, double rsq,
    compute the mechanical force due to the exchange interaction between atom i and atom j
 ------------------------------------------------------------------------- */
 
-void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j, 
+void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j,
     double rsq, double eij[3], double fi[3],  double spi[3], double spj[3])
 {
   int *type = atom->type;
@@ -430,22 +430,22 @@ void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j,
   iJ3 = 1.0/(J3[itype][jtype]*J3[itype][jtype]);
   Kex = K1_mech[itype][jtype];
   iK3 = 1.0/(K3[itype][jtype]*K3[itype][jtype]);
-  
+
   rja = rsq*iJ3;
   rjr = sqrt(rsq)*iJ3;
   rka = rsq*iK3;
   rkr = sqrt(rsq)*iK3;
- 
+
   Jex_mech = 1.0-rja-J2[itype][jtype]*rja*(2.0-rja);
   Jex_mech *= 8.0*Jex*rjr*exp(-rja);
-  
+
   Kex_mech = 1.0-rka-K2[itype][jtype]*rka*(2.0-rka);
   Kex_mech *= 8.0*Kex*rkr*exp(-rka);
 
   sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
 
   // apply or not energy and force offset
-  
+
   fx = fy = fz = 0.0;
   if (e_offset == 1) { // set offset
     fx = (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[0];
@@ -469,7 +469,7 @@ void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j,
    compute energy of spin pair i and j
 ------------------------------------------------------------------------- */
 
-double PairSpinExchangeBiquadratic::compute_energy(int i, int j, double rsq, 
+double PairSpinExchangeBiquadratic::compute_energy(int i, int j, double rsq,
     double spi[3], double spj[3])
 {
   int *type = atom->type;
@@ -487,7 +487,7 @@ double PairSpinExchangeBiquadratic::compute_energy(int i, int j, double rsq,
   rk = ra/K3[itype][jtype];
   r2k = rsq/K3[itype][jtype]/K3[itype][jtype];
   ir3k = 1.0/(rk*rk*rk);
- 
+
   Jex = 4.0*J1_mech[itype][jtype]*r2j;
   Jex *= (1.0-J2[itype][jtype]*r2j);
   Jex *= exp(-r2j);
@@ -496,16 +496,16 @@ double PairSpinExchangeBiquadratic::compute_energy(int i, int j, double rsq,
   Kex *= (1.0-K2[itype][jtype]*r2k);
   Kex *= exp(-r2k);
 
-  sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);  
+  sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]);
 
   // apply or not energy and force offset
-  
+
   if (e_offset == 1) { // set offset
     energy = 0.5*(Jex*(sdots-1.0) + Kex*(sdots*sdots-1.0));
   } else if (e_offset == 0) { // no offset ("normal" calculation)
     energy = 0.5*(Jex*sdots + Kex*sdots*sdots);
   } else error->all(FLERR,"Illegal option in pair exchange/biquadratic command");
-  
+
   return energy;
 }
 
diff --git a/src/SPIN/pair_spin_exchange_biquadratic.h b/src/SPIN/pair_spin_exchange_biquadratic.h
index 1074b50f7b..9619416f2e 100644
--- a/src/SPIN/pair_spin_exchange_biquadratic.h
+++ b/src/SPIN/pair_spin_exchange_biquadratic.h
@@ -48,7 +48,7 @@ class PairSpinExchangeBiquadratic : public PairSpin {
   double cut_spin_exchange_global;      // global exchange cutoff distance
 
  protected:
-  
+
   int e_offset;                         // apply energy offset
   double **J1_mag;                      // H exchange coeffs in eV
   double **J1_mech;                     // mech exchange coeffs in
diff --git a/src/SPIN/pair_spin_magelec.cpp b/src/SPIN/pair_spin_magelec.cpp
index 72a52c1340..33ad364aaa 100644
--- a/src/SPIN/pair_spin_magelec.cpp
+++ b/src/SPIN/pair_spin_magelec.cpp
@@ -237,7 +237,7 @@ void PairSpinMagelec::compute(int eflag, int vflag)
 
       if (rsq <= local_cut2) {
         compute_magelec(i,j,eij,fmi,spj);
-        
+
         if (lattice_flag)
           compute_magelec_mech(i,j,fi,spi,spj);
 
@@ -246,7 +246,7 @@ void PairSpinMagelec::compute(int eflag, int vflag)
           evdwl *= 0.5*hbar;
           emag[i] += evdwl;
         } else evdwl = 0.0;
-        
+
         f[i][0] += fi[0];
         f[i][1] += fi[1];
         f[i][2] += fi[2];

From e7ccbd0ce61fa6bfea47333fc17361b1c753bab3 Mon Sep 17 00:00:00 2001
From: Richard Berger <richard.berger@temple.edu>
Date: Thu, 12 Nov 2020 10:44:04 -0500
Subject: [PATCH 40/44] Replace NULL with nullptr

---
 src/SPIN/pair_spin_exchange_biquadratic.cpp | 30 ++++++++++-----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/SPIN/pair_spin_exchange_biquadratic.cpp b/src/SPIN/pair_spin_exchange_biquadratic.cpp
index 59b959f4cc..7cdd8d0c19 100644
--- a/src/SPIN/pair_spin_exchange_biquadratic.cpp
+++ b/src/SPIN/pair_spin_exchange_biquadratic.cpp
@@ -180,7 +180,7 @@ void *PairSpinExchangeBiquadratic::extract(const char *str, int &dim)
 {
   dim = 0;
   if (strcmp(str,"cut") == 0) return (void *) &cut_spin_exchange_global;
-  return NULL;
+  return nullptr;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -576,19 +576,19 @@ void PairSpinExchangeBiquadratic::read_restart(FILE *fp)
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++) {
     for (j = i; j <= atom->ntypes; j++) {
-      if (me == 0) utils::sfread(FLERR,&setflag[i][j],sizeof(int),1,fp,NULL,error);
+      if (me == 0) utils::sfread(FLERR,&setflag[i][j],sizeof(int),1,fp,nullptr,error);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
-          utils::sfread(FLERR,&J1_mag[i][j],sizeof(double),1,fp,NULL,error);
-          utils::sfread(FLERR,&J1_mech[i][j],sizeof(double),1,fp,NULL,error);
-          utils::sfread(FLERR,&J2[i][j],sizeof(double),1,fp,NULL,error);
-          utils::sfread(FLERR,&J3[i][j],sizeof(double),1,fp,NULL,error);
-          utils::sfread(FLERR,&K1_mag[i][j],sizeof(double),1,fp,NULL,error);
-          utils::sfread(FLERR,&K1_mech[i][j],sizeof(double),1,fp,NULL,error);
-          utils::sfread(FLERR,&K2[i][j],sizeof(double),1,fp,NULL,error);
-          utils::sfread(FLERR,&K3[i][j],sizeof(double),1,fp,NULL,error);
-          utils::sfread(FLERR,&cut_spin_exchange[i][j],sizeof(double),1,fp,NULL,error);
+          utils::sfread(FLERR,&J1_mag[i][j],sizeof(double),1,fp,nullptr,error);
+          utils::sfread(FLERR,&J1_mech[i][j],sizeof(double),1,fp,nullptr,error);
+          utils::sfread(FLERR,&J2[i][j],sizeof(double),1,fp,nullptr,error);
+          utils::sfread(FLERR,&J3[i][j],sizeof(double),1,fp,nullptr,error);
+          utils::sfread(FLERR,&K1_mag[i][j],sizeof(double),1,fp,nullptr,error);
+          utils::sfread(FLERR,&K1_mech[i][j],sizeof(double),1,fp,nullptr,error);
+          utils::sfread(FLERR,&K2[i][j],sizeof(double),1,fp,nullptr,error);
+          utils::sfread(FLERR,&K3[i][j],sizeof(double),1,fp,nullptr,error);
+          utils::sfread(FLERR,&cut_spin_exchange[i][j],sizeof(double),1,fp,nullptr,error);
         }
         MPI_Bcast(&J1_mag[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&J1_mech[i][j],1,MPI_DOUBLE,0,world);
@@ -624,10 +624,10 @@ void PairSpinExchangeBiquadratic::write_restart_settings(FILE *fp)
 void PairSpinExchangeBiquadratic::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
-    utils::sfread(FLERR,&cut_spin_exchange_global,sizeof(double),1,fp,NULL,error);
-    utils::sfread(FLERR,&e_offset,sizeof(int),1,fp,NULL,error);
-    utils::sfread(FLERR,&offset_flag,sizeof(int),1,fp,NULL,error);
-    utils::sfread(FLERR,&mix_flag,sizeof(int),1,fp,NULL,error);
+    utils::sfread(FLERR,&cut_spin_exchange_global,sizeof(double),1,fp,nullptr,error);
+    utils::sfread(FLERR,&e_offset,sizeof(int),1,fp,nullptr,error);
+    utils::sfread(FLERR,&offset_flag,sizeof(int),1,fp,nullptr,error);
+    utils::sfread(FLERR,&mix_flag,sizeof(int),1,fp,nullptr,error);
   }
   MPI_Bcast(&cut_spin_exchange_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&e_offset,1,MPI_INT,0,world);

From c407d547cd702f63a1b8d0d59a0231bebb13b82e Mon Sep 17 00:00:00 2001
From: Richard Berger <richard.berger@temple.edu>
Date: Thu, 12 Nov 2020 10:54:20 -0500
Subject: [PATCH 41/44] Whitespace

---
 src/SPIN/pair_spin_exchange_biquadratic.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/SPIN/pair_spin_exchange_biquadratic.cpp b/src/SPIN/pair_spin_exchange_biquadratic.cpp
index 7cdd8d0c19..f2baf1333b 100644
--- a/src/SPIN/pair_spin_exchange_biquadratic.cpp
+++ b/src/SPIN/pair_spin_exchange_biquadratic.cpp
@@ -156,8 +156,7 @@ void PairSpinExchangeBiquadratic::coeff(int narg, char **arg)
 
 double PairSpinExchangeBiquadratic::init_one(int i, int j)
 {
-
-   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
+  if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   J1_mag[j][i] = J1_mag[i][j];
   J1_mech[j][i] = J1_mech[i][j];

From aadc66877120af9fac50d6e647bbae3dd72e9525 Mon Sep 17 00:00:00 2001
From: Richard Berger <richard.berger@temple.edu>
Date: Thu, 12 Nov 2020 10:58:59 -0500
Subject: [PATCH 42/44] Fix pair_spin_exchange doc page title

---
 doc/src/pair_spin_exchange.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/src/pair_spin_exchange.rst b/doc/src/pair_spin_exchange.rst
index 9e6e534280..630ec6608e 100644
--- a/doc/src/pair_spin_exchange.rst
+++ b/doc/src/pair_spin_exchange.rst
@@ -5,7 +5,7 @@ pair_style spin/exchange command
 ================================
 
 pair_style spin/exchange/biquadratic command
-================================
+============================================
 
 Syntax
 """"""
@@ -25,6 +25,7 @@ Examples
    pair_style spin/exchange 4.0
    pair_coeff * * exchange 4.0 0.0446928 0.003496 1.4885
    pair_coeff 1 2 exchange 6.0 -0.01575 0.0 1.965 offset yes
+
    pair_style spin/exchange/biquadratic 4.0
    pair_coeff * * biquadratic 4.0 0.05 0.03 1.48 0.05 0.03 1.48 offset no
    pair_coeff 1 2 biquadratic 6.0 -0.01 0.0 1.9 0.0 0.1 19

From 497f0dd59358093e11e157e960a6d238ae02df37 Mon Sep 17 00:00:00 2001
From: julient31 <julien.tranchida1@gmail.com>
Date: Thu, 12 Nov 2020 09:43:38 -0700
Subject: [PATCH 43/44] Removing binder and m2,m4 declarations from
 compute/spin

---
 src/SPIN/compute_spin.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/SPIN/compute_spin.cpp b/src/SPIN/compute_spin.cpp
index 3e4970a62b..c92f24f1ae 100644
--- a/src/SPIN/compute_spin.cpp
+++ b/src/SPIN/compute_spin.cpp
@@ -148,12 +148,10 @@ void ComputeSpin::compute_vector()
   int i;
   int countsp, countsptot;
   double mag[4], magtot[4];
-  double m2, m2tot;
-  double m4, m4tot;
   double magenergy, magenergytot;
   double tempnum, tempnumtot;
   double tempdenom, tempdenomtot;
-  double spintemperature,binder;
+  double spintemperature;
 
   invoked_vector = update->ntimestep;
 

From a48f463faf26cdf9af2f4af589f05138ea30b46a Mon Sep 17 00:00:00 2001
From: Stan Gerald Moore <stamoor@sandia.gov>
Date: Fri, 13 Nov 2020 13:12:50 -0700
Subject: [PATCH 44/44] Fix memory bug in Kokkos KISS FFT

---
 src/KOKKOS/fft3d_kokkos.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp
index 02f55e11fa..04a5512cc7 100644
--- a/src/KOKKOS/fft3d_kokkos.cpp
+++ b/src/KOKKOS/fft3d_kokkos.cpp
@@ -228,7 +228,7 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
     cufftExec(plan->plan_fast,d_data.data(),d_data.data(),flag);
   #else
     typename FFT_AT::t_FFT_DATA_1d d_tmp =
-     typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.extent(0));
+     typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0));
     kiss_fft_functor<DeviceType> f;
     if (flag == -1)
       f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_fast_forward,length);
@@ -236,7 +236,6 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
       f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_fast_backward,length);
     Kokkos::parallel_for(total/length,f);
     d_data = d_tmp;
-    d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.extent(0));
   #endif
 
 
@@ -273,13 +272,13 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
   #elif defined(FFT_CUFFT)
     cufftExec(plan->plan_mid,d_data.data(),d_data.data(),flag);
   #else
+    d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0));
     if (flag == -1)
       f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_mid_forward,length);
     else
       f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_mid_backward,length);
     Kokkos::parallel_for(total/length,f);
     d_data = d_tmp;
-    d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.extent(0));
   #endif
 
   // 2nd mid-remap to prepare for 3rd FFTs
@@ -315,6 +314,7 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
   #elif defined(FFT_CUFFT)
     cufftExec(plan->plan_slow,d_data.data(),d_data.data(),flag);
   #else
+    d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0));
     if (flag == -1)
       f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_slow_forward,length);
     else
@@ -866,7 +866,8 @@ void FFT3dKokkos<DeviceType>::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_
   cufftExec(plan->plan_slow,d_data.data(),d_data.data(),flag);
 #else
   kiss_fft_functor<DeviceType> f;
-  typename FFT_AT::t_FFT_DATA_1d d_tmp = typename FFT_AT::t_FFT_DATA_1d("fft_3d:tmp",d_data.extent(0));
+    typename FFT_AT::t_FFT_DATA_1d d_tmp =
+     typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0));
   if (flag == -1) {
     f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_fast_forward,length1);
     Kokkos::parallel_for(total1/length1,f);