From 9a70f4a08c6befc96c13e8609ec64709ee700827 Mon Sep 17 00:00:00 2001
From: David Nicholson <davidanich@gmail.com>
Date: Tue, 10 Jul 2018 17:10:01 -0400
Subject: [PATCH 1/7] modified UEF_utils to compute inverse change of basis

---
 src/USER-UEF/uef_utils.cpp | 63 +++++++++++++++++++++++++-------------
 src/USER-UEF/uef_utils.h   | 26 +++++++++++-----
 2 files changed, 61 insertions(+), 28 deletions(-)

diff --git a/src/USER-UEF/uef_utils.cpp b/src/USER-UEF/uef_utils.cpp
index a5498d605f..df418c755b 100644
--- a/src/USER-UEF/uef_utils.cpp
+++ b/src/USER-UEF/uef_utils.cpp
@@ -177,7 +177,9 @@ bool UEFBox::reduce()
   // further reduce the box using greedy reduction and check
   // if it changed from the last step using the change of basis
   // matrices r and r0
-  greedy(l,r);
+  int ri[3][3];
+  greedy(l,r,ri);
+  mul_m2(r,ri);
   rotation_matrix(rot,lrot, l);
   return !mat_same(r,r0);
 }
@@ -195,7 +197,8 @@ void UEFBox::set_strain(const double ex, const double ey)
     l[k][1] = eps*l0[k][1];
     l[k][2] = eps*l0[k][2];
   }
-  greedy(l,r);
+  int ri[3][3];
+  greedy(l,r,ri);
   rotation_matrix(rot,lrot, l);
 }
 
@@ -245,28 +248,31 @@ void rotation_matrix(double q[3][3], double r[3][3], const double m[3][3])
 
 
 //sort columns in order of increasing length
-void col_sort(double b[3][3],int r[3][3])
+void col_sort(double b[3][3],int r[3][3],int ri[3][3])
 {
   if (col_prod(b,0,0)>col_prod(b,1,1))
   {
     col_swap(b,0,1);
     col_swap(r,0,1);
+    col_swap(ri,0,1);
   }
   if (col_prod(b,0,0)>col_prod(b,2,2))
   {
     col_swap(b,0,2);
     col_swap(r,0,2);
+    col_swap(ri,0,2);
   }
   if (col_prod(b,1,1)>col_prod(b,2,2))
   {
     col_swap(b,1,2);
     col_swap(r,1,2);
+    col_swap(ri,1,2);
   }
 }
 
 
 // 1-2 reduction (Graham-Schmidt)
-void red12(double b[3][3],int r[3][3])
+void red12(double b[3][3],int r[3][3],int ri[3][3])
 {
   int y = round(col_prod(b,0,1)/col_prod(b,0,0));
   b[0][1] -= y*b[0][0];
@@ -276,16 +282,21 @@ void red12(double b[3][3],int r[3][3])
   r[0][1] -= y*r[0][0];
   r[1][1] -= y*r[1][0];
   r[2][1] -= y*r[2][0];
+
+  ri[0][0] += y*ri[0][1];
+  ri[1][0] += y*ri[1][1];
+  ri[2][0] += y*ri[2][1];
   if (col_prod(b,1,1) < col_prod(b,0,0))
   {
     col_swap(b,0,1);
     col_swap(r,0,1);
-    red12(b,r);
+    col_swap(ri,0,1);
+    red12(b,r,ri);
   }
 }
 
 // The Semaev condition for a 3-reduced basis
-void red3(double b[3][3], int r[3][3])
+void red3(double b[3][3], int r[3][3], int ri[3][3])
 {
   double b11 = col_prod(b,0,0);
   double b22 = col_prod(b,1,1);
@@ -326,41 +337,51 @@ void red3(double b[3][3], int r[3][3])
     r[0][2] += x1*r[0][0] + x2*r[0][1];
     r[1][2] += x1*r[1][0] + x2*r[1][1];
     r[2][2] += x1*r[2][0] + x2*r[2][1];
-    greedy_recurse(b,r); // note the recursion step is here
+
+    ri[0][0] += -x1*ri[0][2];
+    ri[1][0] += -x1*ri[1][2];
+    ri[2][0] += -x1*ri[2][2];
+    ri[0][1] += -x2*ri[0][2];
+    ri[1][1] += -x2*ri[1][2];
+    ri[2][1] += -x2*ri[2][2];
+    greedy_recurse(b,r,ri); // note the recursion step is here
   }
 }
 
 // the meat of the greedy reduction algorithm
-void greedy_recurse(double b[3][3], int r[3][3])
+void greedy_recurse(double b[3][3], int r[3][3], int ri[3][3])
 {
-  col_sort(b,r);
-  red12(b,r);
-  red3(b,r); // recursive caller
+  col_sort(b,r,ri);
+  red12(b,r,ri);
+  red3(b,r,ri); // recursive caller
 }
 
 // set r (change of basis) to be identity then reduce basis and make it unique
-void greedy(double b[3][3],int r[3][3])
+void greedy(double b[3][3],int r[3][3],int ri[3][3])
 {
   r[0][1]=r[0][2]=r[1][0]=r[1][2]=r[2][0]=r[2][1]=0;
   r[0][0]=r[1][1]=r[2][2]=1;
-  greedy_recurse(b,r);
-  make_unique(b,r);
+  ri[0][1]=ri[0][2]=ri[1][0]=ri[1][2]=ri[2][0]=ri[2][1]=0;
+  ri[0][0]=ri[1][1]=ri[2][2]=1;
+  greedy_recurse(b,r,ri);
+  make_unique(b,r,ri);
+  transpose(ri);
 }
 
 // A reduced basis isn't unique. This procedure will make it
 // "more" unique. Degenerate cases are possible, but unlikely
 // with floating point math.
-void make_unique(double b[3][3], int r[3][3])
+void make_unique(double b[3][3], int r[3][3], int ri[3][3])
 {
   if (fabs(b[0][0]) < fabs(b[0][1]))
-  { col_swap(b,0,1); col_swap(r,0,1); }
+  { col_swap(b,0,1); col_swap(r,0,1); col_swap(ri,0,1); }
   if (fabs(b[0][0]) < fabs(b[0][2]))
-  { col_swap(b,0,2); col_swap(r,0,2); }
+  { col_swap(b,0,2); col_swap(r,0,2); col_swap(ri,0,2); }
   if (fabs(b[1][1]) < fabs(b[1][2]))
-  { col_swap(b,1,2); col_swap(r,1,2); }
+  { col_swap(b,1,2); col_swap(r,1,2); col_swap(ri,1,2); }
 
-  if (b[0][0] < 0){ neg_col(b,0); neg_col(r,0); }
-  if (b[1][1] < 0){ neg_col(b,1); neg_col(r,1); }
-  if (det(b) < 0){ neg_col(b,2); neg_col(r,2); }
+  if (b[0][0] < 0){ neg_col(b,0); neg_col(r,0); neg_col(ri,0); }
+  if (b[1][1] < 0){ neg_col(b,1); neg_col(r,1); neg_col(ri,1); }
+  if (det(b) < 0){ neg_col(b,2); neg_col(r,2); neg_col(ri,2); }
 }
 }}
diff --git a/src/USER-UEF/uef_utils.h b/src/USER-UEF/uef_utils.h
index a16f6fff1a..d5b0d6453b 100644
--- a/src/USER-UEF/uef_utils.h
+++ b/src/USER-UEF/uef_utils.h
@@ -30,7 +30,6 @@ class UEFBox
   private:
     double l0[3][3]; // initial basis
     double w1[3],w2[3], winv[3][3]; // omega1 and omega2 (spectra of automorphisms)
-    //double edot[3], delta[2];
     double theta[2];
     double l[3][3], rot[3][3], lrot[3][3];
     int r[3][3],a1[3][3],a2[3][3],a1i[3][3],a2i[3][3];
@@ -38,12 +37,12 @@ class UEFBox
 
 
 // lattice reduction routines
-void greedy(double[3][3],int[3][3]);
-void col_sort(double[3][3],int[3][3]);
-void red12(double[3][3],int[3][3]);
-void greedy_recurse(double[3][3],int[3][3]);
-void red3(double [3][3],int r[3][3]);
-void make_unique(double[3][3],int[3][3]);
+void greedy(double[3][3],int[3][3],int[3][3]);
+void col_sort(double[3][3],int[3][3],int[3][3]);
+void red12(double[3][3],int[3][3],int[3][3]);
+void greedy_recurse(double[3][3],int[3][3],int[3][3]);
+void red3(double [3][3],int r[3][3],int[3][3]);
+void make_unique(double[3][3],int[3][3],int[3][3]);
 void rotation_matrix(double[3][3],double[3][3],const double [3][3]);
 
 // A few utility functions for 3x3 arrays
@@ -100,6 +99,19 @@ bool mat_same(T x1[3][3], T x2[3][3])
   return v;
 }
 
+template<typename T>
+void transpose(T m[3][3])
+{
+  T t[3][3];
+  for (int k=0;k<3;k++)
+    for (int j=k+1;j<3;j++)
+    {
+      T x = m[k][j];
+      m[k][j] = m[j][k];
+      m[j][k] = x;
+    }
+}
+
 template<typename T>
 void mul_m1(T m1[3][3], const T m2[3][3])
 {

From c3bf7d0971749c330e1f1cae2f53d57872199851 Mon Sep 17 00:00:00 2001
From: David Nicholson <davidanich@gmail.com>
Date: Tue, 10 Jul 2018 19:02:31 -0400
Subject: [PATCH 2/7] added an interface for the inverse c.o.b. matrix to
 UEF_utils

---
 src/USER-UEF/uef_utils.cpp | 29 ++++++++++++++++++++++++-----
 src/USER-UEF/uef_utils.h   | 17 +++++++++--------
 2 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/src/USER-UEF/uef_utils.cpp b/src/USER-UEF/uef_utils.cpp
index df418c755b..66164c2647 100644
--- a/src/USER-UEF/uef_utils.cpp
+++ b/src/USER-UEF/uef_utils.cpp
@@ -55,6 +55,7 @@ UEFBox::UEFBox()
     {
       l[k][j] = l0[k][j];
       r[j][k]=(j==k);
+      ri[j][k]=(j==k);
     }
 
   // get the initial rotation and upper triangular matrix
@@ -119,6 +120,14 @@ void UEFBox::get_rot(double x[3][3])
       x[k][j]=rot[k][j];
 }
 
+// get inverse change of basis matrix
+void UEFBox::get_inverse_cob(double x[3][3])
+{
+  for (int k=0;k<3;k++)
+    for (int j=0;j<3;j++)
+      x[k][j]=ri[k][j];
+}
+
 // diagonal, incompressible deformation
 void UEFBox::step_deform(const double ex, const double ey)
 {
@@ -167,26 +176,37 @@ bool UEFBox::reduce()
   if (f2 < 0) for (int k=0;k<-f2;k++) mul_m2 (a2i,r0);
 
   // robust reduction to the box defined by Dobson
+  double lc[3][3];
   for (int k=0;k<3;k++)
   {
     double eps = exp(theta[0]*w1[k]+theta[1]*w2[k]);
     l[k][0] = eps*l0[k][0];
     l[k][1] = eps*l0[k][1];
     l[k][2] = eps*l0[k][2];
+    lc[k][0] = eps*l0[k][0];
+    lc[k][1] = eps*l0[k][1];
+    lc[k][2] = eps*l0[k][2];
   }
+
   // further reduce the box using greedy reduction and check
   // if it changed from the last step using the change of basis
   // matrices r and r0
-  int ri[3][3];
+
   greedy(l,r,ri);
-  mul_m2(r,ri);
+
+  // multiplying the inverse by the old change of basis matrix gives
+  // the inverse of the transformation itself (should be identity if
+  // no reduction takes place). This is used for image flags only.
+
+  mul_m1(ri,r0);
+
   rotation_matrix(rot,lrot, l);
   return !mat_same(r,r0);
 }
 void UEFBox::set_strain(const double ex, const double ey)
 {
-  theta[0]  =winv[0][0]*ex + winv[0][1]*ey;
-  theta[1]  =winv[1][0]*ex + winv[1][1]*ey;
+  theta[0]  = winv[0][0]*ex + winv[0][1]*ey;
+  theta[1]  = winv[1][0]*ex + winv[1][1]*ey;
   theta[0] -= round(theta[0]);
   theta[1] -= round(theta[1]);
 
@@ -197,7 +217,6 @@ void UEFBox::set_strain(const double ex, const double ey)
     l[k][1] = eps*l0[k][1];
     l[k][2] = eps*l0[k][2];
   }
-  int ri[3][3];
   greedy(l,r,ri);
   rotation_matrix(rot,lrot, l);
 }
diff --git a/src/USER-UEF/uef_utils.h b/src/USER-UEF/uef_utils.h
index d5b0d6453b..ccfa0f0181 100644
--- a/src/USER-UEF/uef_utils.h
+++ b/src/USER-UEF/uef_utils.h
@@ -27,12 +27,13 @@ class UEFBox
     bool reduce();
     void get_box(double[3][3], double);
     void get_rot(double[3][3]);
+    void get_inverse_cob(double[3][3]);
   private:
     double l0[3][3]; // initial basis
-    double w1[3],w2[3], winv[3][3]; // omega1 and omega2 (spectra of automorphisms)
+    double w1[3],w2[3],winv[3][3];//omega1 and omega2 (spectra of automorphisms)
     double theta[2];
     double l[3][3], rot[3][3], lrot[3][3];
-    int r[3][3],a1[3][3],a2[3][3],a1i[3][3],a2i[3][3];
+    int r[3][3],ri[3][3],a1[3][3],a2[3][3],a1i[3][3],a2i[3][3];
 };
 
 
@@ -112,10 +113,10 @@ void transpose(T m[3][3])
     }
 }
 
-template<typename T>
-void mul_m1(T m1[3][3], const T m2[3][3])
+template<typename T1,typename T2>
+void mul_m1(T1 m1[3][3], const T2 m2[3][3])
 {
-  T t[3][3];
+  T1 t[3][3];
   for (int k=0;k<3;k++)
     for (int j=0;j<3;j++)
       t[k][j]=m1[k][j];
@@ -125,10 +126,10 @@ void mul_m1(T m1[3][3], const T m2[3][3])
       m1[k][j] = t[k][0]*m2[0][j] + t[k][1]*m2[1][j] + t[k][2]*m2[2][j];
 }
 
-template<typename T>
-void mul_m2(const T m1[3][3], T m2[3][3])
+template<typename T1, typename T2>
+void mul_m2(const T1 m1[3][3], T2 m2[3][3])
 {
-  T t[3][3];
+  T2 t[3][3];
   for (int k=0;k<3;k++)
     for (int j=0;j<3;j++)
       t[k][j]=m2[k][j];

From eaf3d1ea9eaa755000d36bb192c36499dda6aa1b Mon Sep 17 00:00:00 2001
From: David Nicholson <davidanich@gmail.com>
Date: Tue, 10 Jul 2018 19:38:18 -0400
Subject: [PATCH 3/7] added an image flag update a la domain->image_flip() to
 FixNHUef::pre_exchange()

---
 src/USER-UEF/fix_nh_uef.cpp | 20 ++++++++++++++++++--
 src/USER-UEF/uef_utils.cpp  |  2 +-
 src/USER-UEF/uef_utils.h    |  2 +-
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/src/USER-UEF/fix_nh_uef.cpp b/src/USER-UEF/fix_nh_uef.cpp
index cd0b2ba268..bfa4549286 100644
--- a/src/USER-UEF/fix_nh_uef.cpp
+++ b/src/USER-UEF/fix_nh_uef.cpp
@@ -536,10 +536,26 @@ void FixNHUef::pre_exchange()
     rotate_x(rot);
     rotate_f(rot);
 
-    // put all atoms in the new box
-    double **x = atom->x;
+    // this is a generalization of what is done in domain->image_flip(...)
+    int ri[3][3];
+    uefbox->get_inverse_cob(ri);
     imageint *image = atom->image;
     int nlocal = atom->nlocal;
+    for (int i=0; i<nlocal; i++) {
+      int iold[3],inew[3];
+      iold[0] = (image[i] & IMGMASK) - IMGMAX;
+      iold[1] = (image[i] >> IMGBITS & IMGMASK) - IMGMAX;
+      iold[2] = (image[i] >> IMG2BITS) - IMGMAX;
+      inew[0] = ri[0][0]*iold[0] + ri[0][1]*iold[1] + ri[0][2]*iold[2];
+      inew[1] = ri[1][0]*iold[0] + ri[1][1]*iold[1] + ri[1][2]*iold[2];
+      inew[2] = ri[2][0]*iold[0] + ri[2][1]*iold[1] + ri[2][2]*iold[2];
+      image[i] = ((imageint) (inew[0] + IMGMAX) & IMGMASK) |
+        (((imageint) (inew[1] + IMGMAX) & IMGMASK) << IMGBITS) |
+        (((imageint) (inew[2] + IMGMAX) & IMGMASK) << IMG2BITS);
+    }
+
+    // put all atoms in the new box
+    double **x = atom->x;
     for (int i=0; i<nlocal; i++) domain->remap(x[i],image[i]);
 
     // move atoms to the right processors
diff --git a/src/USER-UEF/uef_utils.cpp b/src/USER-UEF/uef_utils.cpp
index 66164c2647..3210a675c6 100644
--- a/src/USER-UEF/uef_utils.cpp
+++ b/src/USER-UEF/uef_utils.cpp
@@ -121,7 +121,7 @@ void UEFBox::get_rot(double x[3][3])
 }
 
 // get inverse change of basis matrix
-void UEFBox::get_inverse_cob(double x[3][3])
+void UEFBox::get_inverse_cob(int x[3][3])
 {
   for (int k=0;k<3;k++)
     for (int j=0;j<3;j++)
diff --git a/src/USER-UEF/uef_utils.h b/src/USER-UEF/uef_utils.h
index ccfa0f0181..314c7cf55f 100644
--- a/src/USER-UEF/uef_utils.h
+++ b/src/USER-UEF/uef_utils.h
@@ -27,7 +27,7 @@ class UEFBox
     bool reduce();
     void get_box(double[3][3], double);
     void get_rot(double[3][3]);
-    void get_inverse_cob(double[3][3]);
+    void get_inverse_cob(int[3][3]);
   private:
     double l0[3][3]; // initial basis
     double w1[3],w2[3],winv[3][3];//omega1 and omega2 (spectra of automorphisms)

From 930215a4b138d4da4a2e8812be772811cd66d132 Mon Sep 17 00:00:00 2001
From: David Nicholson <davidanich@gmail.com>
Date: Tue, 10 Jul 2018 23:10:04 -0400
Subject: [PATCH 4/7] superfluous code removal and formatting changes

---
 src/USER-UEF/uef_utils.cpp | 189 +++++++++++++++++++++++--------------
 src/USER-UEF/uef_utils.h   |   9 +-
 2 files changed, 120 insertions(+), 78 deletions(-)

diff --git a/src/USER-UEF/uef_utils.cpp b/src/USER-UEF/uef_utils.cpp
index 3210a675c6..a2e6cb291e 100644
--- a/src/USER-UEF/uef_utils.cpp
+++ b/src/USER-UEF/uef_utils.cpp
@@ -30,48 +30,54 @@ namespace LAMMPS_NS {
 
 UEFBox::UEFBox()
 {
+
   // initial box (also an inverse eigenvector matrix of automorphisms)
+
   double x = 0.327985277605681;
   double y = 0.591009048506103;
   double z = 0.736976229099578;
   l0[0][0]= z; l0[0][1]= y; l0[0][2]= x;
   l0[1][0]=-x; l0[1][1]= z; l0[1][2]=-y;
   l0[2][0]=-y; l0[2][1]= x; l0[2][2]= z;
+
   // spectra of the two automorpisms (log of eigenvalues)
+
   w1[0]=-1.177725211523360;
   w1[1]=-0.441448620566067;
   w1[2]= 1.619173832089425;
   w2[0]= w1[1];
   w2[1]= w1[2];
   w2[2]= w1[0];
+
   // initialize theta
   // strain = w1 * theta1 + w2 * theta2
+
   theta[0]=theta[1]=0;
 
-
   //set up the initial box l and change of basis matrix r
+
   for (int k=0;k<3;k++)
-    for (int j=0;j<3;j++)
-    {
+    for (int j=0;j<3;j++) {
       l[k][j] = l0[k][j];
       r[j][k]=(j==k);
       ri[j][k]=(j==k);
     }
 
   // get the initial rotation and upper triangular matrix
+
   rotation_matrix(rot, lrot ,l);
 
   // this is just a way to calculate the automorphisms
   // themselves, which play a minor role in the calculations
   // it's overkill, but only called once
+
   double t1[3][3];
   double t1i[3][3];
   double t2[3][3];
   double t2i[3][3];
   double l0t[3][3];
   for (int k=0; k<3; ++k)
-    for (int j=0; j<3; ++j)
-    {
+    for (int j=0; j<3; ++j) {
       t1[k][j] = exp(w1[k])*l0[k][j];
       t1i[k][j] = exp(-w1[k])*l0[k][j];
       t2[k][j] = exp(w2[k])*l0[k][j];
@@ -83,8 +89,7 @@ UEFBox::UEFBox()
   mul_m2(l0t,t2);
   mul_m2(l0t,t2i);
   for (int k=0; k<3; ++k)
-    for (int j=0; j<3; ++j)
-    {
+    for (int j=0; j<3; ++j) {
       a1[k][j] = round(t1[k][j]);
       a1i[k][j] = round(t1i[k][j]);
       a2[k][j] = round(t2[k][j]);
@@ -93,6 +98,7 @@ UEFBox::UEFBox()
 
   // winv used to transform between
   // strain increments and theta increments
+
   winv[0][0] = w2[1];
   winv[0][1] = -w2[0];
   winv[1][0] = -w1[1];
@@ -103,7 +109,9 @@ UEFBox::UEFBox()
       winv[k][j] /= d;
 }
 
-// get volume-correct r basis in: basis*cbrt(vol) = q*r
+/* ----------------------------------------------------------------------
+   get volume-correct r basis in: basis*cbrt(vol) = q*r
+------------------------------------------------------------------------- */
 void UEFBox::get_box(double x[3][3], double v)
 {
   v = cbrtf(v);
@@ -112,7 +120,9 @@ void UEFBox::get_box(double x[3][3], double v)
       x[k][j] = lrot[k][j]*v;
 }
 
-// get rotation matrix q in: basis = q*r
+/* ----------------------------------------------------------------------
+   get rotation matrix q in: basis = q*r
+------------------------------------------------------------------------- */
 void UEFBox::get_rot(double x[3][3])
 {
   for (int k=0;k<3;k++)
@@ -120,7 +130,9 @@ void UEFBox::get_rot(double x[3][3])
       x[k][j]=rot[k][j];
 }
 
-// get inverse change of basis matrix
+/* ----------------------------------------------------------------------
+   get inverse change of basis matrix
+------------------------------------------------------------------------- */
 void UEFBox::get_inverse_cob(int x[3][3])
 {
   for (int k=0;k<3;k++)
@@ -128,20 +140,22 @@ void UEFBox::get_inverse_cob(int x[3][3])
       x[k][j]=ri[k][j];
 }
 
-// diagonal, incompressible deformation
+/* ----------------------------------------------------------------------
+   apply diagonal, incompressible deformation
+------------------------------------------------------------------------- */
 void UEFBox::step_deform(const double ex, const double ey)
 {
   // increment theta values used in the reduction
+
   theta[0] +=winv[0][0]*ex + winv[0][1]*ey;
   theta[1] +=winv[1][0]*ex + winv[1][1]*ey;
 
-  // deformation of the box. reduce() needs to
-  // be called regularly or calculation will become
-  // unstable
+  // deformation of the box. reduce() needs to be called regularly or 
+  // calculation will become unstable
+
   double eps[3];
   eps[0]=ex; eps[1] = ey; eps[2] = -ex-ey;
-  for (int k=0;k<3;k++)
-  {
+  for (int k=0;k<3;k++) {
     eps[k] = exp(eps[k]);
     l[k][0] = eps[k]*l[k][0];
     l[k][1] = eps[k]*l[k][1];
@@ -149,43 +163,43 @@ void UEFBox::step_deform(const double ex, const double ey)
   }
   rotation_matrix(rot,lrot, l);
 }
-// reuduce the current basis
+
+/* ----------------------------------------------------------------------
+   reduce the current basis
+------------------------------------------------------------------------- */
 bool UEFBox::reduce()
 {
-  // determine how many times to apply the automorphisms
-  // and find new theta values
+  // determine how many times to apply the automorphisms and find new theta 
+  // values
+
   int f1 = round(theta[0]);
   int f2 = round(theta[1]);
   theta[0] -= f1;
   theta[1] -= f2;
 
-  // store old change or basis matrix to determine if it
-  // changes
+  // store old change or basis matrix to determine if it changes
+
   int r0[3][3];
   for (int k=0;k<3;k++)
     for (int j=0;j<3;j++)
       r0[k][j]=r[k][j];
 
-  // this modifies the old change basis matrix to
-  // handle the case where the automorphism transforms
-  // the box but the reduced basis doesn't change
+  // this modifies the old change basis matrix to handle the case where the 
+  // automorphism transforms the box but the reduced basis doesn't change
   // (r0 should still equal r at the end)
+
   if (f1 > 0) for (int k=0;k<f1;k++) mul_m2 (a1,r0);
   if (f1 < 0) for (int k=0;k<-f1;k++) mul_m2 (a1i,r0);
   if (f2 > 0) for (int k=0;k<f2;k++) mul_m2 (a2,r0);
   if (f2 < 0) for (int k=0;k<-f2;k++) mul_m2 (a2i,r0);
 
   // robust reduction to the box defined by Dobson
-  double lc[3][3];
-  for (int k=0;k<3;k++)
-  {
+
+  for (int k=0;k<3;k++) {
     double eps = exp(theta[0]*w1[k]+theta[1]*w2[k]);
     l[k][0] = eps*l0[k][0];
     l[k][1] = eps*l0[k][1];
     l[k][2] = eps*l0[k][2];
-    lc[k][0] = eps*l0[k][0];
-    lc[k][1] = eps*l0[k][1];
-    lc[k][2] = eps*l0[k][2];
   }
 
   // further reduce the box using greedy reduction and check
@@ -199,10 +213,13 @@ bool UEFBox::reduce()
   // no reduction takes place). This is used for image flags only.
 
   mul_m1(ri,r0);
-
   rotation_matrix(rot,lrot, l);
   return !mat_same(r,r0);
 }
+
+/* ----------------------------------------------------------------------
+   set the strain to a specific value
+------------------------------------------------------------------------- */
 void UEFBox::set_strain(const double ex, const double ey)
 {
   theta[0]  = winv[0][0]*ex + winv[0][1]*ey;
@@ -210,8 +227,7 @@ void UEFBox::set_strain(const double ex, const double ey)
   theta[0] -= round(theta[0]);
   theta[1] -= round(theta[1]);
 
-  for (int k=0;k<3;k++)
-  {
+  for (int k=0;k<3;k++) {
     double eps = exp(theta[0]*w1[k]+theta[1]*w2[k]);
     l[k][0] = eps*l0[k][0];
     l[k][1] = eps*l0[k][1];
@@ -221,9 +237,10 @@ void UEFBox::set_strain(const double ex, const double ey)
   rotation_matrix(rot,lrot, l);
 }
 
-// this is just qr reduction using householder reflections
-// m is input matrix, q is a rotation, r is upper triangular
-// q*m = r
+/* ----------------------------------------------------------------------
+   qr reduction using householder reflections
+   q*m = r. q is orthogonal. m is input matrix. r is upper triangular
+------------------------------------------------------------------------- */
 void rotation_matrix(double q[3][3], double r[3][3], const double m[3][3])
 {
   for (int k=0;k<3;k++)
@@ -239,8 +256,7 @@ void rotation_matrix(double q[3][3], double r[3][3], const double m[3][3])
   v[0] /= a; v[1] /= a; v[2] /= a;
   double qt[3][3];
   for (int k=0;k<3;k++)
-    for (int j=0;j<3;j++)
-    {
+    for (int j=0;j<3;j++) {
       qt[k][j] = (k==j) - 2*v[k]*v[j];
       q[k][j]= qt[k][j];
     }
@@ -257,40 +273,41 @@ void rotation_matrix(double q[3][3], double r[3][3], const double m[3][3])
       qt[k][j] = (k==j) - 2*v[k]*v[j];
   mul_m2(qt,r);
   mul_m2(qt,q);
+
   // this makes r have positive diagonals
   // q*m = r <==> (-q)*m = (-r) will hold row-wise
+
   if (r[0][0] < 0){ neg_row(q,0); neg_row(r,0); }
   if (r[1][1] < 0){ neg_row(q,1); neg_row(r,1); }
   if (r[2][2] < 0){ neg_row(q,2); neg_row(r,2); }
 }
 
-
-
-//sort columns in order of increasing length
+/* ----------------------------------------------------------------------
+   sort columns of b in order of increasing length
+   mimic column operations on ri and r
+------------------------------------------------------------------------- */
 void col_sort(double b[3][3],int r[3][3],int ri[3][3])
 {
-  if (col_prod(b,0,0)>col_prod(b,1,1))
-  {
+  if (col_prod(b,0,0)>col_prod(b,1,1)) {
     col_swap(b,0,1);
     col_swap(r,0,1);
     col_swap(ri,0,1);
   }
-  if (col_prod(b,0,0)>col_prod(b,2,2))
-  {
+  if (col_prod(b,0,0)>col_prod(b,2,2)) {
     col_swap(b,0,2);
     col_swap(r,0,2);
     col_swap(ri,0,2);
   }
-  if (col_prod(b,1,1)>col_prod(b,2,2))
-  {
+  if (col_prod(b,1,1)>col_prod(b,2,2)) {
     col_swap(b,1,2);
     col_swap(r,1,2);
     col_swap(ri,1,2);
   }
 }
 
-
-// 1-2 reduction (Graham-Schmidt)
+/* ----------------------------------------------------------------------
+   1-2 reduction (Graham-Schmidt)
+------------------------------------------------------------------------- */
 void red12(double b[3][3],int r[3][3],int ri[3][3])
 {
   int y = round(col_prod(b,0,1)/col_prod(b,0,0));
@@ -305,8 +322,8 @@ void red12(double b[3][3],int r[3][3],int ri[3][3])
   ri[0][0] += y*ri[0][1];
   ri[1][0] += y*ri[1][1];
   ri[2][0] += y*ri[2][1];
-  if (col_prod(b,1,1) < col_prod(b,0,0))
-  {
+
+  if (col_prod(b,1,1) < col_prod(b,0,0)) {
     col_swap(b,0,1);
     col_swap(r,0,1);
     col_swap(ri,0,1);
@@ -314,7 +331,9 @@ void red12(double b[3][3],int r[3][3],int ri[3][3])
   }
 }
 
-// The Semaev condition for a 3-reduced basis
+/* ----------------------------------------------------------------------
+   Apply the Semaev condition for a 3-reduced basis
+------------------------------------------------------------------------- */
 void red3(double b[3][3], int r[3][3], int ri[3][3])
 {
   double b11 = col_prod(b,0,0);
@@ -334,29 +353,25 @@ void red3(double b[3][3], int r[3][3], int ri[3][3])
   x1v[0] = floor(y1); x1v[1] = x1v[0]+1;
   x2v[0] = floor(y2); x2v[1] = x2v[0]+1;
   for (int k=0;k<2;k++)
-    for (int j=0;j<2;j++)
-    {
+    for (int j=0;j<2;j++) {
       double a[3];
       a[0] = b[0][2] + x1v[k]*b[0][0] + x2v[j]*b[0][1];
       a[1] = b[1][2] + x1v[k]*b[1][0] + x2v[j]*b[1][1];
       a[2] = b[2][2] + x1v[k]*b[2][0] + x2v[j]*b[2][1];
       double val=a[0]*a[0]+a[1]*a[1]+a[2]*a[2];
-      if (val<min)
-      {
+      if (val<min) {
         min = val;
         x1 = x1v[k];
         x2 = x2v[j];
       }
     }
-  if (x1 || x2)
-  {
+  if (x1 || x2) {
     b[0][2] += x1*b[0][0] + x2*b[0][1];
     b[1][2] += x1*b[1][0] + x2*b[1][1];
     b[2][2] += x1*b[2][0] + x2*b[2][1];
     r[0][2] += x1*r[0][0] + x2*r[0][1];
     r[1][2] += x1*r[1][0] + x2*r[1][1];
     r[2][2] += x1*r[2][0] + x2*r[2][1];
-
     ri[0][0] += -x1*ri[0][2];
     ri[1][0] += -x1*ri[1][2];
     ri[2][0] += -x1*ri[2][2];
@@ -367,7 +382,9 @@ void red3(double b[3][3], int r[3][3], int ri[3][3])
   }
 }
 
-// the meat of the greedy reduction algorithm
+/* ----------------------------------------------------------------------
+   the meat of the greedy reduction algorithm
+------------------------------------------------------------------------- */
 void greedy_recurse(double b[3][3], int r[3][3], int ri[3][3])
 {
   col_sort(b,r,ri);
@@ -375,7 +392,10 @@ void greedy_recurse(double b[3][3], int r[3][3], int ri[3][3])
   red3(b,r,ri); // recursive caller
 }
 
-// set r (change of basis) to be identity then reduce basis and make it unique
+/* ----------------------------------------------------------------------
+   reduce the basis b. also output the change of basis matrix r and its
+   inverse ri
+------------------------------------------------------------------------- */
 void greedy(double b[3][3],int r[3][3],int ri[3][3])
 {
   r[0][1]=r[0][2]=r[1][0]=r[1][2]=r[2][0]=r[2][1]=0;
@@ -387,20 +407,43 @@ void greedy(double b[3][3],int r[3][3],int ri[3][3])
   transpose(ri);
 }
 
-// A reduced basis isn't unique. This procedure will make it
-// "more" unique. Degenerate cases are possible, but unlikely
-// with floating point math.
+/* ----------------------------------------------------------------------
+   A reduced basis isn't unique. This procedure will make it
+   "more" unique. Degenerate cases are possible, but unlikely
+   with floating point math.
+------------------------------------------------------------------------- */
 void make_unique(double b[3][3], int r[3][3], int ri[3][3])
 {
-  if (fabs(b[0][0]) < fabs(b[0][1]))
-  { col_swap(b,0,1); col_swap(r,0,1); col_swap(ri,0,1); }
-  if (fabs(b[0][0]) < fabs(b[0][2]))
-  { col_swap(b,0,2); col_swap(r,0,2); col_swap(ri,0,2); }
-  if (fabs(b[1][1]) < fabs(b[1][2]))
-  { col_swap(b,1,2); col_swap(r,1,2); col_swap(ri,1,2); }
+  if (fabs(b[0][0]) < fabs(b[0][1])) {
+    col_swap(b,0,1);
+    col_swap(r,0,1);
+    col_swap(ri,0,1); 
+  }
+  if (fabs(b[0][0]) < fabs(b[0][2])) {
+    col_swap(b,0,2);
+    col_swap(r,0,2);
+    col_swap(ri,0,2);
+  }
+  if (fabs(b[1][1]) < fabs(b[1][2])) {
+    col_swap(b,1,2);
+    col_swap(r,1,2);
+    col_swap(ri,1,2);
+  }
 
-  if (b[0][0] < 0){ neg_col(b,0); neg_col(r,0); neg_col(ri,0); }
-  if (b[1][1] < 0){ neg_col(b,1); neg_col(r,1); neg_col(ri,1); }
-  if (det(b) < 0){ neg_col(b,2); neg_col(r,2); neg_col(ri,2); }
+  if (b[0][0] < 0) {
+    neg_col(b,0);
+    neg_col(r,0);
+    neg_col(ri,0); 
+  }
+  if (b[1][1] < 0) {
+    neg_col(b,1);
+    neg_col(r,1);
+    neg_col(ri,1);
+  }
+  if (det(b) < 0) {
+    neg_col(b,2);
+    neg_col(r,2); 
+    neg_col(ri,2); 
+  }
 }
 }}
diff --git a/src/USER-UEF/uef_utils.h b/src/USER-UEF/uef_utils.h
index 314c7cf55f..0a1cfcc9b2 100644
--- a/src/USER-UEF/uef_utils.h
+++ b/src/USER-UEF/uef_utils.h
@@ -36,8 +36,8 @@ class UEFBox
     int r[3][3],ri[3][3],a1[3][3],a2[3][3],a1i[3][3],a2i[3][3];
 };
 
-
 // lattice reduction routines
+
 void greedy(double[3][3],int[3][3],int[3][3]);
 void col_sort(double[3][3],int[3][3],int[3][3]);
 void red12(double[3][3],int[3][3],int[3][3]);
@@ -47,6 +47,7 @@ void make_unique(double[3][3],int[3][3],int[3][3]);
 void rotation_matrix(double[3][3],double[3][3],const double [3][3]);
 
 // A few utility functions for 3x3 arrays
+
 template<typename T>
 T col_prod(T x[3][3], int c1, int c2)
 {
@@ -56,8 +57,7 @@ T col_prod(T x[3][3], int c1, int c2)
 template<typename T>
 void col_swap(T x[3][3], int c1, int c2)
 {
-  for (int k=0;k<3;k++)
-  {
+  for (int k=0;k<3;k++) {
     T t = x[k][c2];
     x[k][c2]=x[k][c1];
     x[k][c1]=t;
@@ -105,8 +105,7 @@ void transpose(T m[3][3])
 {
   T t[3][3];
   for (int k=0;k<3;k++)
-    for (int j=k+1;j<3;j++)
-    {
+    for (int j=k+1;j<3;j++) {
       T x = m[k][j];
       m[k][j] = m[j][k];
       m[j][k] = x;

From 32917f4caa7d1beb8526d02e9ce2fd247704641e Mon Sep 17 00:00:00 2001
From: Stan Moore <stamoor@sandia.gov>
Date: Mon, 6 Aug 2018 10:50:06 -0600
Subject: [PATCH 5/7] Workaround for issue 1037

---
 lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
index 49b11f3ae0..11aa695f08 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
@@ -292,7 +292,8 @@ public:
 
 #if ! defined( KOKKOS_ENABLE_CUDA_LDG_INTRINSIC )
       if ( 0 == r ) {
-        Kokkos::abort("Cuda const random access View using Cuda texture memory requires Kokkos to allocate the View's memory");
+        //Kokkos::abort("Cuda const random access View using Cuda texture memory requires Kokkos to allocate the View's memory");
+        return handle_type();
       }
 #endif
 

From d27215b7e1a303defc72ad3eb1c3b27c60eb67bc Mon Sep 17 00:00:00 2001
From: "Steven J. Plimpton" <sjplimp@singsing.sandia.gov>
Date: Tue, 7 Aug 2018 15:05:07 -0600
Subject: [PATCH 6/7] enable more correct natoms computation when atoms are
 lost

---
 src/fix_group.cpp |  2 +-
 src/group.cpp     | 12 ++++++++++++
 src/group.h       |  1 +
 src/thermo.cpp    |  2 +-
 4 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/fix_group.cpp b/src/fix_group.cpp
index 2447002bc5..68d74b8b1f 100644
--- a/src/fix_group.cpp
+++ b/src/fix_group.cpp
@@ -84,7 +84,7 @@ idregion(NULL), idvar(NULL), idprop(NULL)
       idprop = new char[n];
       strcpy(idprop,arg[iarg+1]);
       iarg += 2;
-        } else if (strcmp(arg[iarg],"every") == 0) {
+    } else if (strcmp(arg[iarg],"every") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal group command");
       nevery = force->inumeric(FLERR,arg[iarg+1]);
       if (nevery <= 0) error->all(FLERR,"Illegal group command");
diff --git a/src/group.cpp b/src/group.cpp
index 9d33da9acb..dd5e53bb3c 100644
--- a/src/group.cpp
+++ b/src/group.cpp
@@ -754,6 +754,18 @@ void Group::read_restart(FILE *fp)
 // computations on a group of atoms
 // ----------------------------------------------------------------------
 
+/* ----------------------------------------------------------------------
+   count atoms in group all
+------------------------------------------------------------------------- */
+
+bigint Group::count_all()
+{
+  bigint nme = atom->nlocal;
+  bigint nall;
+  MPI_Allreduce(&nme,&nall,1,MPI_LMP_BIGINT,MPI_SUM,world);
+  return nall;
+}
+
 /* ----------------------------------------------------------------------
    count atoms in group
 ------------------------------------------------------------------------- */
diff --git a/src/group.h b/src/group.h
index 6b6cbb1def..962d37b32a 100644
--- a/src/group.h
+++ b/src/group.h
@@ -37,6 +37,7 @@ class Group : protected Pointers {
   void write_restart(FILE *);
   void read_restart(FILE *);
 
+  bigint count_all();                      // count atoms in group all
   bigint count(int);                       // count atoms in group
   bigint count(int,int);                   // count atoms in group & region
   double mass(int);                        // total mass of atoms in group
diff --git a/src/thermo.cpp b/src/thermo.cpp
index ade7a3c333..ddbbd0f496 100644
--- a/src/thermo.cpp
+++ b/src/thermo.cpp
@@ -1698,7 +1698,7 @@ void Thermo::compute_timeremain()
 
 void Thermo::compute_atoms()
 {
-  bivalue = atom->natoms;
+  bivalue = group->count_all();
 }
 
 /* ---------------------------------------------------------------------- */

From ac7aeb68626c61b484591dee82b12b95b4de0da0 Mon Sep 17 00:00:00 2001
From: Richard Berger <richard.berger@temple.edu>
Date: Tue, 7 Aug 2018 21:43:59 -0400
Subject: [PATCH 7/7] Add extra check for OpenCL timers

Fixes issue #1034 by preventing time() to access non-existent OpenCL events
---
 lib/gpu/geryon/ocl_timer.h | 40 +++++++++++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/lib/gpu/geryon/ocl_timer.h b/lib/gpu/geryon/ocl_timer.h
index 1f56aeb364..bdfec64f54 100644
--- a/lib/gpu/geryon/ocl_timer.h
+++ b/lib/gpu/geryon/ocl_timer.h
@@ -38,8 +38,8 @@ namespace ucl_opencl {
 /// Class for timing OpenCL events
 class UCL_Timer {
  public:
-  inline UCL_Timer() : _total_time(0.0f), _initialized(false) { }
-  inline UCL_Timer(UCL_Device &dev) : _total_time(0.0f), _initialized(false)
+  inline UCL_Timer() : _total_time(0.0f), _initialized(false), has_measured_time(false) { }
+  inline UCL_Timer(UCL_Device &dev) : _total_time(0.0f), _initialized(false), has_measured_time(false)
     { init(dev); }
 
   inline ~UCL_Timer() { clear(); }
@@ -52,6 +52,7 @@ class UCL_Timer {
       _initialized=false;
       _total_time=0.0;
     }
+    has_measured_time = false;
   }
 
   /// Initialize default command queue for timing
@@ -64,25 +65,39 @@ class UCL_Timer {
     _cq=cq;
     clRetainCommandQueue(_cq);
     _initialized=true;
+    has_measured_time = false;
   }
 
   /// Start timing on default command queue
-  inline void start() { UCL_OCL_MARKER(_cq,&start_event); }
+  inline void start() {
+    UCL_OCL_MARKER(_cq,&start_event);
+    has_measured_time = false;
+  }
 
   /// Stop timing on default command queue
-  inline void stop() { UCL_OCL_MARKER(_cq,&stop_event); }
+  inline void stop() {
+    UCL_OCL_MARKER(_cq,&stop_event);
+    has_measured_time = true;
+  }
 
   /// Block until the start event has been reached on device
-  inline void sync_start()
-    { CL_SAFE_CALL(clWaitForEvents(1,&start_event)); }
+  inline void sync_start() {
+    CL_SAFE_CALL(clWaitForEvents(1,&start_event));
+    has_measured_time = false;
+  }
 
   /// Block until the stop event has been reached on device
-  inline void sync_stop()
-    { CL_SAFE_CALL(clWaitForEvents(1,&stop_event)); }
+  inline void sync_stop() {
+    CL_SAFE_CALL(clWaitForEvents(1,&stop_event));
+    has_measured_time = true;
+  }
 
   /// Set the time elapsed to zero (not the total_time)
-  inline void zero()
-    { UCL_OCL_MARKER(_cq,&start_event); UCL_OCL_MARKER(_cq,&stop_event); }
+  inline void zero() {
+    has_measured_time = false;
+    UCL_OCL_MARKER(_cq,&start_event);
+    UCL_OCL_MARKER(_cq,&stop_event);
+  }
 
   /// Set the total time to zero
   inline void zero_total() { _total_time=0.0; }
@@ -97,6 +112,7 @@ class UCL_Timer {
 
   /// Return the time (ms) of last start to stop - Forces synchronization
   inline double time() {
+    if(!has_measured_time) return 0.0;
     cl_ulong tstart,tend;
     CL_SAFE_CALL(clWaitForEvents(1,&stop_event));
     CL_SAFE_CALL(clGetEventProfilingInfo(stop_event,
@@ -107,6 +123,7 @@ class UCL_Timer {
                                          sizeof(cl_ulong), &tstart, NULL));
     clReleaseEvent(start_event);
     clReleaseEvent(stop_event);
+    has_measured_time = false;
     return (tend-tstart)*t_factor;
   }
 
@@ -123,8 +140,9 @@ class UCL_Timer {
   cl_event start_event, stop_event;
   cl_command_queue _cq;
   double _total_time;
-  bool _initialized;
   double t_factor;
+  bool _initialized;
+  bool has_measured_time;
 };
 
 } // namespace