From 9a70f4a08c6befc96c13e8609ec64709ee700827 Mon Sep 17 00:00:00 2001 From: David Nicholson Date: Tue, 10 Jul 2018 17:10:01 -0400 Subject: [PATCH 1/7] modified UEF_utils to compute inverse change of basis --- src/USER-UEF/uef_utils.cpp | 63 +++++++++++++++++++++++++------------- src/USER-UEF/uef_utils.h | 26 +++++++++++----- 2 files changed, 61 insertions(+), 28 deletions(-) diff --git a/src/USER-UEF/uef_utils.cpp b/src/USER-UEF/uef_utils.cpp index a5498d605f..df418c755b 100644 --- a/src/USER-UEF/uef_utils.cpp +++ b/src/USER-UEF/uef_utils.cpp @@ -177,7 +177,9 @@ bool UEFBox::reduce() // further reduce the box using greedy reduction and check // if it changed from the last step using the change of basis // matrices r and r0 - greedy(l,r); + int ri[3][3]; + greedy(l,r,ri); + mul_m2(r,ri); rotation_matrix(rot,lrot, l); return !mat_same(r,r0); } @@ -195,7 +197,8 @@ void UEFBox::set_strain(const double ex, const double ey) l[k][1] = eps*l0[k][1]; l[k][2] = eps*l0[k][2]; } - greedy(l,r); + int ri[3][3]; + greedy(l,r,ri); rotation_matrix(rot,lrot, l); } @@ -245,28 +248,31 @@ void rotation_matrix(double q[3][3], double r[3][3], const double m[3][3]) //sort columns in order of increasing length -void col_sort(double b[3][3],int r[3][3]) +void col_sort(double b[3][3],int r[3][3],int ri[3][3]) { if (col_prod(b,0,0)>col_prod(b,1,1)) { col_swap(b,0,1); col_swap(r,0,1); + col_swap(ri,0,1); } if (col_prod(b,0,0)>col_prod(b,2,2)) { col_swap(b,0,2); col_swap(r,0,2); + col_swap(ri,0,2); } if (col_prod(b,1,1)>col_prod(b,2,2)) { col_swap(b,1,2); col_swap(r,1,2); + col_swap(ri,1,2); } } // 1-2 reduction (Graham-Schmidt) -void red12(double b[3][3],int r[3][3]) +void red12(double b[3][3],int r[3][3],int ri[3][3]) { int y = round(col_prod(b,0,1)/col_prod(b,0,0)); b[0][1] -= y*b[0][0]; @@ -276,16 +282,21 @@ void red12(double b[3][3],int r[3][3]) r[0][1] -= y*r[0][0]; r[1][1] -= y*r[1][0]; r[2][1] -= y*r[2][0]; + + ri[0][0] += y*ri[0][1]; + ri[1][0] += y*ri[1][1]; + ri[2][0] += y*ri[2][1]; if (col_prod(b,1,1) < col_prod(b,0,0)) { col_swap(b,0,1); col_swap(r,0,1); - red12(b,r); + col_swap(ri,0,1); + red12(b,r,ri); } } // The Semaev condition for a 3-reduced basis -void red3(double b[3][3], int r[3][3]) +void red3(double b[3][3], int r[3][3], int ri[3][3]) { double b11 = col_prod(b,0,0); double b22 = col_prod(b,1,1); @@ -326,41 +337,51 @@ void red3(double b[3][3], int r[3][3]) r[0][2] += x1*r[0][0] + x2*r[0][1]; r[1][2] += x1*r[1][0] + x2*r[1][1]; r[2][2] += x1*r[2][0] + x2*r[2][1]; - greedy_recurse(b,r); // note the recursion step is here + + ri[0][0] += -x1*ri[0][2]; + ri[1][0] += -x1*ri[1][2]; + ri[2][0] += -x1*ri[2][2]; + ri[0][1] += -x2*ri[0][2]; + ri[1][1] += -x2*ri[1][2]; + ri[2][1] += -x2*ri[2][2]; + greedy_recurse(b,r,ri); // note the recursion step is here } } // the meat of the greedy reduction algorithm -void greedy_recurse(double b[3][3], int r[3][3]) +void greedy_recurse(double b[3][3], int r[3][3], int ri[3][3]) { - col_sort(b,r); - red12(b,r); - red3(b,r); // recursive caller + col_sort(b,r,ri); + red12(b,r,ri); + red3(b,r,ri); // recursive caller } // set r (change of basis) to be identity then reduce basis and make it unique -void greedy(double b[3][3],int r[3][3]) +void greedy(double b[3][3],int r[3][3],int ri[3][3]) { r[0][1]=r[0][2]=r[1][0]=r[1][2]=r[2][0]=r[2][1]=0; r[0][0]=r[1][1]=r[2][2]=1; - greedy_recurse(b,r); - make_unique(b,r); + ri[0][1]=ri[0][2]=ri[1][0]=ri[1][2]=ri[2][0]=ri[2][1]=0; + ri[0][0]=ri[1][1]=ri[2][2]=1; + greedy_recurse(b,r,ri); + make_unique(b,r,ri); + transpose(ri); } // A reduced basis isn't unique. This procedure will make it // "more" unique. Degenerate cases are possible, but unlikely // with floating point math. -void make_unique(double b[3][3], int r[3][3]) +void make_unique(double b[3][3], int r[3][3], int ri[3][3]) { if (fabs(b[0][0]) < fabs(b[0][1])) - { col_swap(b,0,1); col_swap(r,0,1); } + { col_swap(b,0,1); col_swap(r,0,1); col_swap(ri,0,1); } if (fabs(b[0][0]) < fabs(b[0][2])) - { col_swap(b,0,2); col_swap(r,0,2); } + { col_swap(b,0,2); col_swap(r,0,2); col_swap(ri,0,2); } if (fabs(b[1][1]) < fabs(b[1][2])) - { col_swap(b,1,2); col_swap(r,1,2); } + { col_swap(b,1,2); col_swap(r,1,2); col_swap(ri,1,2); } - if (b[0][0] < 0){ neg_col(b,0); neg_col(r,0); } - if (b[1][1] < 0){ neg_col(b,1); neg_col(r,1); } - if (det(b) < 0){ neg_col(b,2); neg_col(r,2); } + if (b[0][0] < 0){ neg_col(b,0); neg_col(r,0); neg_col(ri,0); } + if (b[1][1] < 0){ neg_col(b,1); neg_col(r,1); neg_col(ri,1); } + if (det(b) < 0){ neg_col(b,2); neg_col(r,2); neg_col(ri,2); } } }} diff --git a/src/USER-UEF/uef_utils.h b/src/USER-UEF/uef_utils.h index a16f6fff1a..d5b0d6453b 100644 --- a/src/USER-UEF/uef_utils.h +++ b/src/USER-UEF/uef_utils.h @@ -30,7 +30,6 @@ class UEFBox private: double l0[3][3]; // initial basis double w1[3],w2[3], winv[3][3]; // omega1 and omega2 (spectra of automorphisms) - //double edot[3], delta[2]; double theta[2]; double l[3][3], rot[3][3], lrot[3][3]; int r[3][3],a1[3][3],a2[3][3],a1i[3][3],a2i[3][3]; @@ -38,12 +37,12 @@ class UEFBox // lattice reduction routines -void greedy(double[3][3],int[3][3]); -void col_sort(double[3][3],int[3][3]); -void red12(double[3][3],int[3][3]); -void greedy_recurse(double[3][3],int[3][3]); -void red3(double [3][3],int r[3][3]); -void make_unique(double[3][3],int[3][3]); +void greedy(double[3][3],int[3][3],int[3][3]); +void col_sort(double[3][3],int[3][3],int[3][3]); +void red12(double[3][3],int[3][3],int[3][3]); +void greedy_recurse(double[3][3],int[3][3],int[3][3]); +void red3(double [3][3],int r[3][3],int[3][3]); +void make_unique(double[3][3],int[3][3],int[3][3]); void rotation_matrix(double[3][3],double[3][3],const double [3][3]); // A few utility functions for 3x3 arrays @@ -100,6 +99,19 @@ bool mat_same(T x1[3][3], T x2[3][3]) return v; } +template +void transpose(T m[3][3]) +{ + T t[3][3]; + for (int k=0;k<3;k++) + for (int j=k+1;j<3;j++) + { + T x = m[k][j]; + m[k][j] = m[j][k]; + m[j][k] = x; + } +} + template void mul_m1(T m1[3][3], const T m2[3][3]) { From c3bf7d0971749c330e1f1cae2f53d57872199851 Mon Sep 17 00:00:00 2001 From: David Nicholson Date: Tue, 10 Jul 2018 19:02:31 -0400 Subject: [PATCH 2/7] added an interface for the inverse c.o.b. matrix to UEF_utils --- src/USER-UEF/uef_utils.cpp | 29 ++++++++++++++++++++++++----- src/USER-UEF/uef_utils.h | 17 +++++++++-------- 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/src/USER-UEF/uef_utils.cpp b/src/USER-UEF/uef_utils.cpp index df418c755b..66164c2647 100644 --- a/src/USER-UEF/uef_utils.cpp +++ b/src/USER-UEF/uef_utils.cpp @@ -55,6 +55,7 @@ UEFBox::UEFBox() { l[k][j] = l0[k][j]; r[j][k]=(j==k); + ri[j][k]=(j==k); } // get the initial rotation and upper triangular matrix @@ -119,6 +120,14 @@ void UEFBox::get_rot(double x[3][3]) x[k][j]=rot[k][j]; } +// get inverse change of basis matrix +void UEFBox::get_inverse_cob(double x[3][3]) +{ + for (int k=0;k<3;k++) + for (int j=0;j<3;j++) + x[k][j]=ri[k][j]; +} + // diagonal, incompressible deformation void UEFBox::step_deform(const double ex, const double ey) { @@ -167,26 +176,37 @@ bool UEFBox::reduce() if (f2 < 0) for (int k=0;k<-f2;k++) mul_m2 (a2i,r0); // robust reduction to the box defined by Dobson + double lc[3][3]; for (int k=0;k<3;k++) { double eps = exp(theta[0]*w1[k]+theta[1]*w2[k]); l[k][0] = eps*l0[k][0]; l[k][1] = eps*l0[k][1]; l[k][2] = eps*l0[k][2]; + lc[k][0] = eps*l0[k][0]; + lc[k][1] = eps*l0[k][1]; + lc[k][2] = eps*l0[k][2]; } + // further reduce the box using greedy reduction and check // if it changed from the last step using the change of basis // matrices r and r0 - int ri[3][3]; + greedy(l,r,ri); - mul_m2(r,ri); + + // multiplying the inverse by the old change of basis matrix gives + // the inverse of the transformation itself (should be identity if + // no reduction takes place). This is used for image flags only. + + mul_m1(ri,r0); + rotation_matrix(rot,lrot, l); return !mat_same(r,r0); } void UEFBox::set_strain(const double ex, const double ey) { - theta[0] =winv[0][0]*ex + winv[0][1]*ey; - theta[1] =winv[1][0]*ex + winv[1][1]*ey; + theta[0] = winv[0][0]*ex + winv[0][1]*ey; + theta[1] = winv[1][0]*ex + winv[1][1]*ey; theta[0] -= round(theta[0]); theta[1] -= round(theta[1]); @@ -197,7 +217,6 @@ void UEFBox::set_strain(const double ex, const double ey) l[k][1] = eps*l0[k][1]; l[k][2] = eps*l0[k][2]; } - int ri[3][3]; greedy(l,r,ri); rotation_matrix(rot,lrot, l); } diff --git a/src/USER-UEF/uef_utils.h b/src/USER-UEF/uef_utils.h index d5b0d6453b..ccfa0f0181 100644 --- a/src/USER-UEF/uef_utils.h +++ b/src/USER-UEF/uef_utils.h @@ -27,12 +27,13 @@ class UEFBox bool reduce(); void get_box(double[3][3], double); void get_rot(double[3][3]); + void get_inverse_cob(double[3][3]); private: double l0[3][3]; // initial basis - double w1[3],w2[3], winv[3][3]; // omega1 and omega2 (spectra of automorphisms) + double w1[3],w2[3],winv[3][3];//omega1 and omega2 (spectra of automorphisms) double theta[2]; double l[3][3], rot[3][3], lrot[3][3]; - int r[3][3],a1[3][3],a2[3][3],a1i[3][3],a2i[3][3]; + int r[3][3],ri[3][3],a1[3][3],a2[3][3],a1i[3][3],a2i[3][3]; }; @@ -112,10 +113,10 @@ void transpose(T m[3][3]) } } -template -void mul_m1(T m1[3][3], const T m2[3][3]) +template +void mul_m1(T1 m1[3][3], const T2 m2[3][3]) { - T t[3][3]; + T1 t[3][3]; for (int k=0;k<3;k++) for (int j=0;j<3;j++) t[k][j]=m1[k][j]; @@ -125,10 +126,10 @@ void mul_m1(T m1[3][3], const T m2[3][3]) m1[k][j] = t[k][0]*m2[0][j] + t[k][1]*m2[1][j] + t[k][2]*m2[2][j]; } -template -void mul_m2(const T m1[3][3], T m2[3][3]) +template +void mul_m2(const T1 m1[3][3], T2 m2[3][3]) { - T t[3][3]; + T2 t[3][3]; for (int k=0;k<3;k++) for (int j=0;j<3;j++) t[k][j]=m2[k][j]; From eaf3d1ea9eaa755000d36bb192c36499dda6aa1b Mon Sep 17 00:00:00 2001 From: David Nicholson Date: Tue, 10 Jul 2018 19:38:18 -0400 Subject: [PATCH 3/7] added an image flag update a la domain->image_flip() to FixNHUef::pre_exchange() --- src/USER-UEF/fix_nh_uef.cpp | 20 ++++++++++++++++++-- src/USER-UEF/uef_utils.cpp | 2 +- src/USER-UEF/uef_utils.h | 2 +- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/USER-UEF/fix_nh_uef.cpp b/src/USER-UEF/fix_nh_uef.cpp index cd0b2ba268..bfa4549286 100644 --- a/src/USER-UEF/fix_nh_uef.cpp +++ b/src/USER-UEF/fix_nh_uef.cpp @@ -536,10 +536,26 @@ void FixNHUef::pre_exchange() rotate_x(rot); rotate_f(rot); - // put all atoms in the new box - double **x = atom->x; + // this is a generalization of what is done in domain->image_flip(...) + int ri[3][3]; + uefbox->get_inverse_cob(ri); imageint *image = atom->image; int nlocal = atom->nlocal; + for (int i=0; i> IMGBITS & IMGMASK) - IMGMAX; + iold[2] = (image[i] >> IMG2BITS) - IMGMAX; + inew[0] = ri[0][0]*iold[0] + ri[0][1]*iold[1] + ri[0][2]*iold[2]; + inew[1] = ri[1][0]*iold[0] + ri[1][1]*iold[1] + ri[1][2]*iold[2]; + inew[2] = ri[2][0]*iold[0] + ri[2][1]*iold[1] + ri[2][2]*iold[2]; + image[i] = ((imageint) (inew[0] + IMGMAX) & IMGMASK) | + (((imageint) (inew[1] + IMGMAX) & IMGMASK) << IMGBITS) | + (((imageint) (inew[2] + IMGMAX) & IMGMASK) << IMG2BITS); + } + + // put all atoms in the new box + double **x = atom->x; for (int i=0; iremap(x[i],image[i]); // move atoms to the right processors diff --git a/src/USER-UEF/uef_utils.cpp b/src/USER-UEF/uef_utils.cpp index 66164c2647..3210a675c6 100644 --- a/src/USER-UEF/uef_utils.cpp +++ b/src/USER-UEF/uef_utils.cpp @@ -121,7 +121,7 @@ void UEFBox::get_rot(double x[3][3]) } // get inverse change of basis matrix -void UEFBox::get_inverse_cob(double x[3][3]) +void UEFBox::get_inverse_cob(int x[3][3]) { for (int k=0;k<3;k++) for (int j=0;j<3;j++) diff --git a/src/USER-UEF/uef_utils.h b/src/USER-UEF/uef_utils.h index ccfa0f0181..314c7cf55f 100644 --- a/src/USER-UEF/uef_utils.h +++ b/src/USER-UEF/uef_utils.h @@ -27,7 +27,7 @@ class UEFBox bool reduce(); void get_box(double[3][3], double); void get_rot(double[3][3]); - void get_inverse_cob(double[3][3]); + void get_inverse_cob(int[3][3]); private: double l0[3][3]; // initial basis double w1[3],w2[3],winv[3][3];//omega1 and omega2 (spectra of automorphisms) From 930215a4b138d4da4a2e8812be772811cd66d132 Mon Sep 17 00:00:00 2001 From: David Nicholson Date: Tue, 10 Jul 2018 23:10:04 -0400 Subject: [PATCH 4/7] superfluous code removal and formatting changes --- src/USER-UEF/uef_utils.cpp | 189 +++++++++++++++++++++++-------------- src/USER-UEF/uef_utils.h | 9 +- 2 files changed, 120 insertions(+), 78 deletions(-) diff --git a/src/USER-UEF/uef_utils.cpp b/src/USER-UEF/uef_utils.cpp index 3210a675c6..a2e6cb291e 100644 --- a/src/USER-UEF/uef_utils.cpp +++ b/src/USER-UEF/uef_utils.cpp @@ -30,48 +30,54 @@ namespace LAMMPS_NS { UEFBox::UEFBox() { + // initial box (also an inverse eigenvector matrix of automorphisms) + double x = 0.327985277605681; double y = 0.591009048506103; double z = 0.736976229099578; l0[0][0]= z; l0[0][1]= y; l0[0][2]= x; l0[1][0]=-x; l0[1][1]= z; l0[1][2]=-y; l0[2][0]=-y; l0[2][1]= x; l0[2][2]= z; + // spectra of the two automorpisms (log of eigenvalues) + w1[0]=-1.177725211523360; w1[1]=-0.441448620566067; w1[2]= 1.619173832089425; w2[0]= w1[1]; w2[1]= w1[2]; w2[2]= w1[0]; + // initialize theta // strain = w1 * theta1 + w2 * theta2 + theta[0]=theta[1]=0; - //set up the initial box l and change of basis matrix r + for (int k=0;k<3;k++) - for (int j=0;j<3;j++) - { + for (int j=0;j<3;j++) { l[k][j] = l0[k][j]; r[j][k]=(j==k); ri[j][k]=(j==k); } // get the initial rotation and upper triangular matrix + rotation_matrix(rot, lrot ,l); // this is just a way to calculate the automorphisms // themselves, which play a minor role in the calculations // it's overkill, but only called once + double t1[3][3]; double t1i[3][3]; double t2[3][3]; double t2i[3][3]; double l0t[3][3]; for (int k=0; k<3; ++k) - for (int j=0; j<3; ++j) - { + for (int j=0; j<3; ++j) { t1[k][j] = exp(w1[k])*l0[k][j]; t1i[k][j] = exp(-w1[k])*l0[k][j]; t2[k][j] = exp(w2[k])*l0[k][j]; @@ -83,8 +89,7 @@ UEFBox::UEFBox() mul_m2(l0t,t2); mul_m2(l0t,t2i); for (int k=0; k<3; ++k) - for (int j=0; j<3; ++j) - { + for (int j=0; j<3; ++j) { a1[k][j] = round(t1[k][j]); a1i[k][j] = round(t1i[k][j]); a2[k][j] = round(t2[k][j]); @@ -93,6 +98,7 @@ UEFBox::UEFBox() // winv used to transform between // strain increments and theta increments + winv[0][0] = w2[1]; winv[0][1] = -w2[0]; winv[1][0] = -w1[1]; @@ -103,7 +109,9 @@ UEFBox::UEFBox() winv[k][j] /= d; } -// get volume-correct r basis in: basis*cbrt(vol) = q*r +/* ---------------------------------------------------------------------- + get volume-correct r basis in: basis*cbrt(vol) = q*r +------------------------------------------------------------------------- */ void UEFBox::get_box(double x[3][3], double v) { v = cbrtf(v); @@ -112,7 +120,9 @@ void UEFBox::get_box(double x[3][3], double v) x[k][j] = lrot[k][j]*v; } -// get rotation matrix q in: basis = q*r +/* ---------------------------------------------------------------------- + get rotation matrix q in: basis = q*r +------------------------------------------------------------------------- */ void UEFBox::get_rot(double x[3][3]) { for (int k=0;k<3;k++) @@ -120,7 +130,9 @@ void UEFBox::get_rot(double x[3][3]) x[k][j]=rot[k][j]; } -// get inverse change of basis matrix +/* ---------------------------------------------------------------------- + get inverse change of basis matrix +------------------------------------------------------------------------- */ void UEFBox::get_inverse_cob(int x[3][3]) { for (int k=0;k<3;k++) @@ -128,20 +140,22 @@ void UEFBox::get_inverse_cob(int x[3][3]) x[k][j]=ri[k][j]; } -// diagonal, incompressible deformation +/* ---------------------------------------------------------------------- + apply diagonal, incompressible deformation +------------------------------------------------------------------------- */ void UEFBox::step_deform(const double ex, const double ey) { // increment theta values used in the reduction + theta[0] +=winv[0][0]*ex + winv[0][1]*ey; theta[1] +=winv[1][0]*ex + winv[1][1]*ey; - // deformation of the box. reduce() needs to - // be called regularly or calculation will become - // unstable + // deformation of the box. reduce() needs to be called regularly or + // calculation will become unstable + double eps[3]; eps[0]=ex; eps[1] = ey; eps[2] = -ex-ey; - for (int k=0;k<3;k++) - { + for (int k=0;k<3;k++) { eps[k] = exp(eps[k]); l[k][0] = eps[k]*l[k][0]; l[k][1] = eps[k]*l[k][1]; @@ -149,43 +163,43 @@ void UEFBox::step_deform(const double ex, const double ey) } rotation_matrix(rot,lrot, l); } -// reuduce the current basis + +/* ---------------------------------------------------------------------- + reduce the current basis +------------------------------------------------------------------------- */ bool UEFBox::reduce() { - // determine how many times to apply the automorphisms - // and find new theta values + // determine how many times to apply the automorphisms and find new theta + // values + int f1 = round(theta[0]); int f2 = round(theta[1]); theta[0] -= f1; theta[1] -= f2; - // store old change or basis matrix to determine if it - // changes + // store old change or basis matrix to determine if it changes + int r0[3][3]; for (int k=0;k<3;k++) for (int j=0;j<3;j++) r0[k][j]=r[k][j]; - // this modifies the old change basis matrix to - // handle the case where the automorphism transforms - // the box but the reduced basis doesn't change + // this modifies the old change basis matrix to handle the case where the + // automorphism transforms the box but the reduced basis doesn't change // (r0 should still equal r at the end) + if (f1 > 0) for (int k=0;k 0) for (int k=0;k (-q)*m = (-r) will hold row-wise + if (r[0][0] < 0){ neg_row(q,0); neg_row(r,0); } if (r[1][1] < 0){ neg_row(q,1); neg_row(r,1); } if (r[2][2] < 0){ neg_row(q,2); neg_row(r,2); } } - - -//sort columns in order of increasing length +/* ---------------------------------------------------------------------- + sort columns of b in order of increasing length + mimic column operations on ri and r +------------------------------------------------------------------------- */ void col_sort(double b[3][3],int r[3][3],int ri[3][3]) { - if (col_prod(b,0,0)>col_prod(b,1,1)) - { + if (col_prod(b,0,0)>col_prod(b,1,1)) { col_swap(b,0,1); col_swap(r,0,1); col_swap(ri,0,1); } - if (col_prod(b,0,0)>col_prod(b,2,2)) - { + if (col_prod(b,0,0)>col_prod(b,2,2)) { col_swap(b,0,2); col_swap(r,0,2); col_swap(ri,0,2); } - if (col_prod(b,1,1)>col_prod(b,2,2)) - { + if (col_prod(b,1,1)>col_prod(b,2,2)) { col_swap(b,1,2); col_swap(r,1,2); col_swap(ri,1,2); } } - -// 1-2 reduction (Graham-Schmidt) +/* ---------------------------------------------------------------------- + 1-2 reduction (Graham-Schmidt) +------------------------------------------------------------------------- */ void red12(double b[3][3],int r[3][3],int ri[3][3]) { int y = round(col_prod(b,0,1)/col_prod(b,0,0)); @@ -305,8 +322,8 @@ void red12(double b[3][3],int r[3][3],int ri[3][3]) ri[0][0] += y*ri[0][1]; ri[1][0] += y*ri[1][1]; ri[2][0] += y*ri[2][1]; - if (col_prod(b,1,1) < col_prod(b,0,0)) - { + + if (col_prod(b,1,1) < col_prod(b,0,0)) { col_swap(b,0,1); col_swap(r,0,1); col_swap(ri,0,1); @@ -314,7 +331,9 @@ void red12(double b[3][3],int r[3][3],int ri[3][3]) } } -// The Semaev condition for a 3-reduced basis +/* ---------------------------------------------------------------------- + Apply the Semaev condition for a 3-reduced basis +------------------------------------------------------------------------- */ void red3(double b[3][3], int r[3][3], int ri[3][3]) { double b11 = col_prod(b,0,0); @@ -334,29 +353,25 @@ void red3(double b[3][3], int r[3][3], int ri[3][3]) x1v[0] = floor(y1); x1v[1] = x1v[0]+1; x2v[0] = floor(y2); x2v[1] = x2v[0]+1; for (int k=0;k<2;k++) - for (int j=0;j<2;j++) - { + for (int j=0;j<2;j++) { double a[3]; a[0] = b[0][2] + x1v[k]*b[0][0] + x2v[j]*b[0][1]; a[1] = b[1][2] + x1v[k]*b[1][0] + x2v[j]*b[1][1]; a[2] = b[2][2] + x1v[k]*b[2][0] + x2v[j]*b[2][1]; double val=a[0]*a[0]+a[1]*a[1]+a[2]*a[2]; - if (val T col_prod(T x[3][3], int c1, int c2) { @@ -56,8 +57,7 @@ T col_prod(T x[3][3], int c1, int c2) template void col_swap(T x[3][3], int c1, int c2) { - for (int k=0;k<3;k++) - { + for (int k=0;k<3;k++) { T t = x[k][c2]; x[k][c2]=x[k][c1]; x[k][c1]=t; @@ -105,8 +105,7 @@ void transpose(T m[3][3]) { T t[3][3]; for (int k=0;k<3;k++) - for (int j=k+1;j<3;j++) - { + for (int j=k+1;j<3;j++) { T x = m[k][j]; m[k][j] = m[j][k]; m[j][k] = x; From 32917f4caa7d1beb8526d02e9ce2fd247704641e Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 6 Aug 2018 10:50:06 -0600 Subject: [PATCH 5/7] Workaround for issue 1037 --- lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp index 49b11f3ae0..11aa695f08 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp @@ -292,7 +292,8 @@ public: #if ! defined( KOKKOS_ENABLE_CUDA_LDG_INTRINSIC ) if ( 0 == r ) { - Kokkos::abort("Cuda const random access View using Cuda texture memory requires Kokkos to allocate the View's memory"); + //Kokkos::abort("Cuda const random access View using Cuda texture memory requires Kokkos to allocate the View's memory"); + return handle_type(); } #endif From d27215b7e1a303defc72ad3eb1c3b27c60eb67bc Mon Sep 17 00:00:00 2001 From: "Steven J. Plimpton" Date: Tue, 7 Aug 2018 15:05:07 -0600 Subject: [PATCH 6/7] enable more correct natoms computation when atoms are lost --- src/fix_group.cpp | 2 +- src/group.cpp | 12 ++++++++++++ src/group.h | 1 + src/thermo.cpp | 2 +- 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/fix_group.cpp b/src/fix_group.cpp index 2447002bc5..68d74b8b1f 100644 --- a/src/fix_group.cpp +++ b/src/fix_group.cpp @@ -84,7 +84,7 @@ idregion(NULL), idvar(NULL), idprop(NULL) idprop = new char[n]; strcpy(idprop,arg[iarg+1]); iarg += 2; - } else if (strcmp(arg[iarg],"every") == 0) { + } else if (strcmp(arg[iarg],"every") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal group command"); nevery = force->inumeric(FLERR,arg[iarg+1]); if (nevery <= 0) error->all(FLERR,"Illegal group command"); diff --git a/src/group.cpp b/src/group.cpp index 9d33da9acb..dd5e53bb3c 100644 --- a/src/group.cpp +++ b/src/group.cpp @@ -754,6 +754,18 @@ void Group::read_restart(FILE *fp) // computations on a group of atoms // ---------------------------------------------------------------------- +/* ---------------------------------------------------------------------- + count atoms in group all +------------------------------------------------------------------------- */ + +bigint Group::count_all() +{ + bigint nme = atom->nlocal; + bigint nall; + MPI_Allreduce(&nme,&nall,1,MPI_LMP_BIGINT,MPI_SUM,world); + return nall; +} + /* ---------------------------------------------------------------------- count atoms in group ------------------------------------------------------------------------- */ diff --git a/src/group.h b/src/group.h index 6b6cbb1def..962d37b32a 100644 --- a/src/group.h +++ b/src/group.h @@ -37,6 +37,7 @@ class Group : protected Pointers { void write_restart(FILE *); void read_restart(FILE *); + bigint count_all(); // count atoms in group all bigint count(int); // count atoms in group bigint count(int,int); // count atoms in group & region double mass(int); // total mass of atoms in group diff --git a/src/thermo.cpp b/src/thermo.cpp index ade7a3c333..ddbbd0f496 100644 --- a/src/thermo.cpp +++ b/src/thermo.cpp @@ -1698,7 +1698,7 @@ void Thermo::compute_timeremain() void Thermo::compute_atoms() { - bivalue = atom->natoms; + bivalue = group->count_all(); } /* ---------------------------------------------------------------------- */ From ac7aeb68626c61b484591dee82b12b95b4de0da0 Mon Sep 17 00:00:00 2001 From: Richard Berger Date: Tue, 7 Aug 2018 21:43:59 -0400 Subject: [PATCH 7/7] Add extra check for OpenCL timers Fixes issue #1034 by preventing time() to access non-existent OpenCL events --- lib/gpu/geryon/ocl_timer.h | 40 +++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/lib/gpu/geryon/ocl_timer.h b/lib/gpu/geryon/ocl_timer.h index 1f56aeb364..bdfec64f54 100644 --- a/lib/gpu/geryon/ocl_timer.h +++ b/lib/gpu/geryon/ocl_timer.h @@ -38,8 +38,8 @@ namespace ucl_opencl { /// Class for timing OpenCL events class UCL_Timer { public: - inline UCL_Timer() : _total_time(0.0f), _initialized(false) { } - inline UCL_Timer(UCL_Device &dev) : _total_time(0.0f), _initialized(false) + inline UCL_Timer() : _total_time(0.0f), _initialized(false), has_measured_time(false) { } + inline UCL_Timer(UCL_Device &dev) : _total_time(0.0f), _initialized(false), has_measured_time(false) { init(dev); } inline ~UCL_Timer() { clear(); } @@ -52,6 +52,7 @@ class UCL_Timer { _initialized=false; _total_time=0.0; } + has_measured_time = false; } /// Initialize default command queue for timing @@ -64,25 +65,39 @@ class UCL_Timer { _cq=cq; clRetainCommandQueue(_cq); _initialized=true; + has_measured_time = false; } /// Start timing on default command queue - inline void start() { UCL_OCL_MARKER(_cq,&start_event); } + inline void start() { + UCL_OCL_MARKER(_cq,&start_event); + has_measured_time = false; + } /// Stop timing on default command queue - inline void stop() { UCL_OCL_MARKER(_cq,&stop_event); } + inline void stop() { + UCL_OCL_MARKER(_cq,&stop_event); + has_measured_time = true; + } /// Block until the start event has been reached on device - inline void sync_start() - { CL_SAFE_CALL(clWaitForEvents(1,&start_event)); } + inline void sync_start() { + CL_SAFE_CALL(clWaitForEvents(1,&start_event)); + has_measured_time = false; + } /// Block until the stop event has been reached on device - inline void sync_stop() - { CL_SAFE_CALL(clWaitForEvents(1,&stop_event)); } + inline void sync_stop() { + CL_SAFE_CALL(clWaitForEvents(1,&stop_event)); + has_measured_time = true; + } /// Set the time elapsed to zero (not the total_time) - inline void zero() - { UCL_OCL_MARKER(_cq,&start_event); UCL_OCL_MARKER(_cq,&stop_event); } + inline void zero() { + has_measured_time = false; + UCL_OCL_MARKER(_cq,&start_event); + UCL_OCL_MARKER(_cq,&stop_event); + } /// Set the total time to zero inline void zero_total() { _total_time=0.0; } @@ -97,6 +112,7 @@ class UCL_Timer { /// Return the time (ms) of last start to stop - Forces synchronization inline double time() { + if(!has_measured_time) return 0.0; cl_ulong tstart,tend; CL_SAFE_CALL(clWaitForEvents(1,&stop_event)); CL_SAFE_CALL(clGetEventProfilingInfo(stop_event, @@ -107,6 +123,7 @@ class UCL_Timer { sizeof(cl_ulong), &tstart, NULL)); clReleaseEvent(start_event); clReleaseEvent(stop_event); + has_measured_time = false; return (tend-tstart)*t_factor; } @@ -123,8 +140,9 @@ class UCL_Timer { cl_event start_event, stop_event; cl_command_queue _cq; double _total_time; - bool _initialized; double t_factor; + bool _initialized; + bool has_measured_time; }; } // namespace