Merge pull request #1598 from alxvov/OSO

Adding the conjugate gradient algorithm and L-BFGS to the SPIN package
2019-10-05 14:11:17 +02:00
parent 043606d351 ed467d8867
commit 65edb2a601
35 changed files with 2068 additions and 108 deletions
--- a/doc/src/Eqs/norm_inf.jpg
+++ b/doc/src/Eqs/norm_inf.jpg
--- a/doc/src/Eqs/norm_inf.tex
+++ b/doc/src/Eqs/norm_inf.tex
@ -0,0 +1,15 @@
+\documentclass[preview]{standalone}
+\usepackage{varwidth}
+\usepackage[utf8x]{inputenc}
+\usepackage{amsmath, amssymb, graphics, setspace}
+
+\begin{document}
+\begin{varwidth}{50in}
+  \begin{equation}
+    || \vec{F} ||_{inf} 
+    = {\rm max}\left(|F_1^1|, |F_1^2|, |F_1^3| \cdots, 
+    |F_N^1|, |F_N^2|, |F_N^3|\right)
+    \nonumber
+  \end{equation}
+\end{varwidth}
+\end{document}
--- a/doc/src/Eqs/norm_max.jpg
+++ b/doc/src/Eqs/norm_max.jpg
--- a/doc/src/Eqs/norm_max.tex
+++ b/doc/src/Eqs/norm_max.tex
@ -0,0 +1,15 @@
+\documentclass[preview]{standalone}
+\usepackage{varwidth}
+\usepackage[utf8x]{inputenc}
+\usepackage{amsmath, amssymb, graphics, setspace}
+
+\begin{document}
+\begin{varwidth}{50in}
+  \begin{equation}
+    % \left| \left| \vec{F} \right| \right|_2 
+    || \vec{F} ||_{max} 
+    = {\rm max}\left(||\vec{F}_1||, \cdots, ||\vec{F}_N||\right)
+    \nonumber
+  \end{equation}
+\end{varwidth}
+\end{document}
--- a/doc/src/Eqs/norm_two.jpg
+++ b/doc/src/Eqs/norm_two.jpg
--- a/doc/src/Eqs/norm_two.tex
+++ b/doc/src/Eqs/norm_two.tex
@ -0,0 +1,15 @@
+\documentclass[preview]{standalone}
+\usepackage{varwidth}
+\usepackage[utf8x]{inputenc}
+\usepackage{amsmath, amssymb, graphics, setspace}
+
+\begin{document}
+\begin{varwidth}{50in}
+  \begin{equation}
+    % \left| \left| \vec{F} \right| \right|_2 
+    || \vec{F} ||_{2} 
+    = \sqrt{\vec{F}_1+ \cdots + \vec{F}_N}
+    \nonumber
+  \end{equation}
+\end{varwidth}
+\end{document}
--- a/doc/src/min_modify.txt
+++ b/doc/src/min_modify.txt
@ -13,11 +13,15 @@ min_modify command :h3
 min_modify keyword values ... :pre

 one or more keyword/value pairs may be listed :ulb,l
-keyword = {dmax} or {line} or {alpha_damp} or {discrete_factor}
+keyword = {dmax} or {line} or {norm} or {alpha_damp} or {discrete_factor}
  {dmax} value = max
    max = maximum distance for line search to move (distance units)
-  {line} value = {backtrack} or {quadratic} or {forcezero}
-    backtrack,quadratic,forcezero = style of linesearch to use
+  {line} value = {backtrack} or {quadratic} or {forcezero} or {spin_cubic} or {spin_none}
+    backtrack,quadratic,forcezero,spin_cubic,spin_none = style of linesearch to use 
+  {norm} value = {two} or {max}
+    two = Euclidean two-norm (length of 3N vector)
+    inf = max force component across all 3-vectors
+    max = max force norm across all 3-vectors
  {alpha_damp} value = damping
    damping = fictitious Gilbert damping for spin minimization (adim)
  {discrete_factor} value = factor
@ -69,6 +73,28 @@ difference of two large values (energy before and energy after) and
 that difference may be smaller than machine epsilon even if atoms
 could move in the gradient direction to reduce forces further.

+The choice of a norm can be modified for the min styles {cg}, {sd}, 
+{quickmin}, {fire}, {spin}, {spin/cg} and {spin/lbfgs} using 
+the {norm} keyword.
+The default {two} norm computes the 2-norm (Euclidean length) of the
+global force vector:
+
+:c,image(Eqs/norm_two.jpg) 
+
+The {max} norm computes the length of the 3-vector force 
+for each atom  (2-norm), and takes the maximum value of those across 
+all atoms
+
+:c,image(Eqs/norm_max.jpg) 
+
+The {inf} norm takes the maximum component across the forces of
+all atoms in the system: 
+
+:c,image(Eqs/norm_inf.jpg) 
+
+For the min styles {spin}, {spin/cg} and {spin/lbfgs}, the force
+norm is replaced by the spin-torque norm.
+
 Keywords {alpha_damp} and {discrete_factor} only make sense when
 a "min_spin"_min_spin.html command is declared.
 Keyword {alpha_damp} defines an analog of a magnetic Gilbert
@ -77,10 +103,24 @@ a given magnetic system.
 Keyword {discrete_factor} defines a discretization factor for the
 adaptive timestep used in the {spin} minimization.
 See "min_spin"_min_spin.html for more information about those
-quantities.
-Default values are {alpha_damp} = 1.0 and {discrete_factor} = 10.0.
+quantities. 

-[Restrictions:] none
+The choice of a line search algorithm for the {spin/cg} and
+{spin/lbfgs} styles can be specified via the {line} keyword.
+The {spin_cubic} and  {spin_none} only make sense when one of those 
+two minimization styles is declared.
+The {spin_cubic} performs the line search based on a cubic interpolation
+of the energy along the search direction. The {spin_none} keyword
+deactivates the line search procedure.
+The {spin_none} is a default value for {line} keyword for both {spin/lbfgs}
+and {spin/cg}. Convergence of {spin/lbfgs} can be more robust if
+{spin_cubic} line search is used.
+
+[Restrictions:] 
+
+For magnetic GNEB calculations, only {spin_none} value for {line} keyword can be used
+when styles {spin/cg} and {spin/lbfgs} are employed.
+See "neb/spin"_neb_spin.html for more explanation.

 [Related commands:]

@ -88,4 +128,8 @@ Default values are {alpha_damp} = 1.0 and {discrete_factor} = 10.0.

 [Default:]

-The option defaults are dmax = 0.1 and line = quadratic.
+The option defaults are dmax = 0.1, line = quadratic and norm = two.
+
+For the {spin}, {spin/cg} and {spin/lbfgs} styles, the
+option defaults are alpha_damp = 1.0, discrete_factor = 10.0,
+line = spin_none, and norm = euclidean.
--- a/doc/src/min_spin.txt
+++ b/doc/src/min_spin.txt
@ -6,14 +6,19 @@
 :line

 min_style spin command :h3
+min_style spin/cg command :h3
+min_style spin/lbfgs command :h3

 [Syntax:]

-min_style spin :pre
+min_style spin 
+min_style spin/cg 
+min_style spin/lbfgs :pre

 [Examples:]

-min_style       spin :pre
+min_style 	spin/lbfgs 
+min_modify      line spin_cubic discrete_factor 10.0 :pre

 [Description:]

@ -46,10 +51,39 @@ definition of this timestep.
 {discrete_factor} can be defined with the "min_modify"_min_modify.html
 command.

-NOTE: The {spin} style replaces the force tolerance by a torque
-tolerance. See "minimize"_minimize.html for more explanation.
+Style {spin/cg} defines an orthogonal spin optimization
+(OSO) combined to a conjugate gradient (CG) algorithm. 
+The "min_modify"_min_modify.html command can be used to
+couple the {spin/cg} to a line search procedure, and to modify the 
+discretization factor {discrete_factor}.
+By default, style {spin/cg} does not employ the line search procedure 
+and uses the adaptive time-step technique in the same way as style {spin}.

-[Restrictions:]
+Style {spin/lbfgs} defines an orthogonal spin optimization
+(OSO) combined to a limited-memory Broyden-Fletcher-Goldfarb-Shanno 
+(L-BFGS) algorithm.
+By default, style {spin/lbfgs} does not employ line search procedure.
+If the line search procedure is not used then the discrete factor defines
+the maximum root mean squared rotation angle of spins by equation {pi/(5*Kappa)}.
+The default value for Kappa is 10.
+The {spin_cubic} line search can improve the convergence of the 
+{spin/lbfgs} algorithm.
+
+The "min_modify"_min_modify.html command can be used to
+activate the line search procedure, and to modify the
+discretization factor {discrete_factor}. 
+
+For more information about styles {spin/cg} and {spin/lbfgs}, 
+see their implementation reported in "(Ivanov)"_#Ivanov1.
+
+NOTE: All the {spin} styles replace the force tolerance by a torque
+tolerance. See "minimize"_minimize.html for more explanation. 
+
+NOTE: The {spin/cg} and {spin/lbfgs} styles can be used
+for magnetic NEB calculations only if the line search procedure
+is deactivated. See "neb/spin"_neb_spin.html for more explanation. 
+
+[Restrictions:] 

 This minimization procedure is only applied to spin degrees of
 freedom for a frozen lattice configuration.
@ -61,5 +95,10 @@ freedom for a frozen lattice configuration.

 [Default:]

-The option defaults are {alpha_damp} = 1.0 and {discrete_factor} =
-10.0.
+The option defaults are {alpha_damp} = 1.0, {discrete_factor} =
+10.0, {line} = spin_none and {norm} = euclidean.
+
+:line
+
+:link(Ivanov1)
+[(Ivanov)] Ivanov, Uzdin, Jonsson. arXiv preprint arXiv:1904.02669, (2019).
--- a/doc/src/min_style.txt
+++ b/doc/src/min_style.txt
@ -11,7 +11,7 @@ min_style command :h3

 min_style style :pre

-style = {cg} or {cg/kk} or {hftn} or {sd} or {quickmin} or {fire} or {spin} :ul
+style = {cg} or {hftn} or {sd} or {quickmin} or {fire} or {spin} or {spin/cg} or {spin/lbfgs} :ul

 [Examples:]

@ -64,11 +64,25 @@ a minimization.

 Style {spin} is a damped spin dynamics with an adaptive
 timestep.
-See the "min/spin"_min_spin.html doc page for more information.
+
+Style {spin/cg} uses an orthogonal spin optimization (OSO)
+combined to a conjugate gradient (CG) approach to minimize spin
+configurations. 
+
+Style {spin/lbfgs} uses an orthogonal spin optimization (OSO)
+combined to a limited-memory Broyden-Fletcher-Goldfarb-Shanno
+(LBFGS) approach to minimize spin configurations. 
+
+See the "min/spin"_min_spin.html doc page for more information
+about the {spin}, {spin/cg} and {spin/lbfgs} styles.

 Either the {quickmin} and {fire} styles are useful in the context of
 nudged elastic band (NEB) calculations via the "neb"_neb.html command.

+Either the {spin}, {spin/cg} and {spin/lbfgs} styles are useful 
+in the context of magnetic geodesic nudged elastic band (GNEB) calculations 
+via the "neb/spin"_neb_spin.html command.
+
 NOTE: The damped dynamic minimizers use whatever timestep you have
 defined via the "timestep"_timestep.html command.  Often they will
 converge more quickly if you use a timestep about 10x larger than you
--- a/doc/src/minimize.txt
+++ b/doc/src/minimize.txt
@ -104,12 +104,13 @@ the line search fails because the step distance backtracks to 0.0
 the number of outer iterations or timesteps exceeds {maxiter}
 the number of total force evaluations exceeds {maxeval} :ul

-NOTE: the "minimization style"_min_style.html {spin} replaces
+NOTE: the "minimization style"_min_style.html {spin},
+{spin/cg}, and {spin/lbfgs} replace
 the force tolerance {ftol} by a torque tolerance.
-The minimization procedure stops if the 2-norm (length) of the
-global torque vector (defined as the cross product between the
-spins and their precession vectors omega) is less than {ftol},
-or if any of the other criteria are met.
+The minimization procedure stops if the 2-norm (length) of the torque vector on atom
+(defined as the cross product between the
+atomic spin and its precession vectors omega) is less than {ftol},
+or if any of the other criteria are met. Torque have the same units as the energy.

 NOTE: You can also use the "fix halt"_fix_halt.html command to specify
 a general criterion for exiting a minimization, that is a calculation
--- a/doc/src/neb_spin.txt
+++ b/doc/src/neb_spin.txt
@ -59,9 +59,9 @@ performance speed-up you would see with one or more physical
 processors per replica.  See the "Howto replica"_Howto_replica.html
 doc page for further discussion.

-NOTE: As explained below, a GNEB calculation performs a damped dynamics
-minimization across all the replicas.  The "spin"_min_spin.html
-style minimizer has to be defined in your input script.
+NOTE: As explained below, a GNEB calculation performs a
+minimization across all the replicas. One of the "spin"_min_spin.html
+style minimizers has to be defined in your input script. 

 When a GNEB calculation is performed, it is assumed that each replica
 is running the same system, though LAMMPS does not check for this.
@ -170,10 +170,11 @@ command is issued.
 :line

 A NEB calculation proceeds in two stages, each of which is a
-minimization procedure, performed via damped dynamics.  To enable
-this, you must first define a damped spin dynamics
-"min_style"_min_style.html, using the {spin} style (see
-"min_spin"_min_spin.html for more information).
+minimization procedure.  To enable
+this, you must first define a
+"min_style"_min_style.html, using either the {spin},
+{spin/cg}, or {spin/lbfgs} style (see
+"min_spin"_min_spin.html for more information).  
 The other styles cannot be used, since they relax the lattice
 degrees of freedom instead of the spins.

@ -358,6 +359,9 @@ This command can only be used if LAMMPS was built with the SPIN
 package.  See the "Build package"_Build_package.html doc
 page for more info.

+For magnetic GNEB calculations, only {spin_none} value for {line} keyword can be used
+when styles {spin/cg} and {spin/lbfgs} are employed.
+
 :line

 [Related commands:]
--- a/doc/src/pair_spin_dipole.txt
+++ b/doc/src/pair_spin_dipole.txt
@ -25,9 +25,8 @@ pair_coeff * * 10.0
 pair_coeff 2 3 8.0 :pre

 pair_style spin/dipole/long 9.0
-pair_coeff * * 1.0 1.0
-pair_coeff 2 3 1.0 1.0 2.5 4.0 scale 0.5
-pair_coeff 2 3 1.0 1.0 2.5 4.0 :pre
+pair_coeff * * 10.0
+pair_coeff 2 3 6.0 :pre

 [Description:]

--- a/doc/utils/sphinx-config/false_positives.txt
+++ b/doc/utils/sphinx-config/false_positives.txt
@ -275,6 +275,7 @@ Broadwell
 Broglie
 brownian
 brownw
+Broyden
 Bryantsev
 Btarget
 btype
@ -988,6 +989,7 @@ gmask
 Gmask
 gneb
 GNEB
+Goldfarb
 googlemail
 Gordan
 GPa
@ -1405,6 +1407,7 @@ Laupretre
 lavenderblush
 lawngreen
 lB
+lbfgs
 lbl
 LBtype
 lcbop
@ -2041,6 +2044,7 @@ Orsi
 ortho
 orthonormal
 orthorhombic
+oso
 ot
 Otype
 Ouldridge
@ -2514,6 +2518,7 @@ setvel
 sfftw
 Sg
 Shan
+Shanno
 shapex
 shapey
 shapez
--- a/examples/SPIN/spinmin/in.spinmin_cg.bfo
+++ b/examples/SPIN/spinmin/in.spinmin_cg.bfo
@ -0,0 +1,54 @@
+# bfo in a 3d periodic box
+
+units 		metal
+dimension 	3
+boundary 	p p f
+atom_style 	spin
+
+# necessary for the serial algorithm (sametag)
+atom_modify 	map array 
+
+lattice 	sc 3.96
+region 		box block 0.0 34.0 0.0 34.0 0.0 1.0
+create_box 	1 box
+create_atoms 	1 box
+
+# setting mass, mag. moments, and interactions for bcc iron
+
+mass		1 1.0
+set 		group all spin/random 11 2.50
+
+pair_style 	hybrid/overlay spin/exchange 6.0 spin/magelec 4.5 spin/dmi 4.5
+pair_coeff 	* * spin/exchange exchange 6.0 -0.01575 0.0 1.965
+# pair_coeff 	* * spin/magelec magelec 4.5 0.000109 1.0 1.0 1.0
+pair_coeff 	* * spin/magelec magelec 4.5 0.00109 1.0 1.0 1.0
+pair_coeff 	* * spin/dmi dmi 4.5 0.00005 1.0 1.0 1.0
+
+neighbor 	0.1 bin
+neigh_modify 	every 10 check yes delay 20
+
+fix 		1 all precession/spin anisotropy 0.0000033 0.0 0.0 1.0
+fix_modify	1 energy yes
+
+timestep	0.0001
+
+compute 	out_mag    all spin
+compute 	out_pe     all pe
+compute 	out_ke     all ke
+compute 	out_temp   all temp
+
+variable 	magz      equal c_out_mag[3]
+variable 	magnorm   equal c_out_mag[4]
+variable 	emag      equal c_out_mag[5]
+variable 	tmag      equal c_out_mag[6]
+
+thermo          100
+thermo_style    custom step time v_magnorm v_emag v_tmag temp etotal
+thermo_modify   format float %20.15g
+
+compute 	outsp all property/atom spx spy spz sp fmx fmy fmz
+dump 		1 all custom 50 dump.lammpstrj type x y z c_outsp[1] c_outsp[2] c_outsp[3] c_outsp[4] c_outsp[5] c_outsp[6] c_outsp[7]
+
+min_style	spin/cg
+# min_modify 	line spin_none discrete_factor 10.0
+minimize        1.0e-10 1.0e-10 10000 10000
--- a/examples/SPIN/spinmin/in.spinmin_lbfgs.bfo
+++ b/examples/SPIN/spinmin/in.spinmin_lbfgs.bfo
@ -0,0 +1,55 @@
+# bfo in a 3d periodic box
+
+units 		metal
+dimension 	3
+boundary 	p p f
+atom_style 	spin
+
+# necessary for the serial algorithm (sametag)
+atom_modify 	map array 
+
+lattice 	sc 3.96
+region 		box block 0.0 34.0 0.0 34.0 0.0 1.0
+create_box 	1 box
+create_atoms 	1 box
+
+# setting mass, mag. moments, and interactions for bcc iron
+
+mass		1 1.0
+set 		group all spin/random 11 2.50
+
+pair_style 	hybrid/overlay spin/exchange 6.0 spin/magelec 4.5 spin/dmi 4.5
+pair_coeff 	* * spin/exchange exchange 6.0 -0.01575 0.0 1.965
+#pair_coeff 	* * spin/magelec magelec 4.5 0.000109 1.0 1.0 1.0
+pair_coeff 	* * spin/magelec magelec 4.5 0.00109 1.0 1.0 1.0
+pair_coeff 	* * spin/dmi dmi 4.5 0.00005 1.0 1.0 1.0
+
+neighbor 	0.1 bin
+neigh_modify 	every 10 check yes delay 20
+
+fix 		1 all precession/spin anisotropy 0.0000033 0.0 0.0 1.0
+fix_modify	1 energy yes
+
+timestep	0.0001
+
+compute 	out_mag    all spin
+compute 	out_pe     all pe
+compute 	out_ke     all ke
+compute 	out_temp   all temp
+
+variable 	magz      equal c_out_mag[3]
+variable 	magnorm   equal c_out_mag[4]
+variable 	emag      equal c_out_mag[5]
+variable 	tmag      equal c_out_mag[6]
+
+thermo          50
+thermo_style    custom step time v_magnorm v_emag v_tmag temp etotal
+thermo_modify   format float %20.15g
+
+compute 	outsp all property/atom spx spy spz sp fmx fmy fmz
+dump 		1 all custom 50 dump.lammpstrj type x y z c_outsp[1] c_outsp[2] c_outsp[3] c_outsp[4] c_outsp[5] c_outsp[6] c_outsp[7]
+
+min_style	spin/lbfgs
+# min_modify 	line spin_cubic discrete_factor 10.0
+min_modify	norm max
+minimize        1.0e-15 1.0e-10 10000 1000
--- a/src/.gitignore
+++ b/src/.gitignore
@ -161,6 +161,10 @@
 /fix_setforce_spin.h
 /min_spin.cpp
 /min_spin.h
+/min_spin_cg.cpp
+/min_spin_cg.h
+/min_spin_lbfgs.cpp
+/min_spin_lbfgs.h
 /neb_spin.cpp
 /neb_spin.h
 /pair_spin.cpp
--- a/src/SPIN/fix_nve_spin.h
+++ b/src/SPIN/fix_nve_spin.h
@ -52,7 +52,7 @@ friend class PairSpin;

  double dtv, dtf, dts;			// velocity, force, and spin timesteps

-  int nlocal_max;			// max value of nlocal (for lists size)
+  int nlocal_max;			// max value of nlocal (for size of lists)

  int pair_spin_flag;			// magnetic pair flags
  int long_spin_flag;			// magnetic long-range flag
--- a/src/SPIN/min_spin.cpp
+++ b/src/SPIN/min_spin.cpp
@ -41,15 +41,15 @@ using namespace MathConst;

 /* ---------------------------------------------------------------------- */

-MinSpin::MinSpin(LAMMPS *lmp) : Min(lmp) {}
+MinSpin::MinSpin(LAMMPS *lmp) : Min(lmp) {
+  alpha_damp = 1.0;
+  discrete_factor = 10.0;
+}

 /* ---------------------------------------------------------------------- */

 void MinSpin::init()
 {
-  alpha_damp = 1.0;
-  discrete_factor = 10.0;
-
  Min::init();

  dts = dt = update->dt;
@ -77,12 +77,12 @@ void MinSpin::setup_style()
 int MinSpin::modify_param(int narg, char **arg)
 {
  if (strcmp(arg[0],"alpha_damp") == 0) {
-    if (narg < 2) error->all(FLERR,"Illegal fix_modify command");
+    if (narg < 2) error->all(FLERR,"Illegal min_modify command");
    alpha_damp = force->numeric(FLERR,arg[1]);
    return 2;
  }
  if (strcmp(arg[0],"discrete_factor") == 0) {
-    if (narg < 2) error->all(FLERR,"Illegal fix_modify command");
+    if (narg < 2) error->all(FLERR,"Illegal min_modify command");
    discrete_factor = force->numeric(FLERR,arg[1]);
    return 2;
  }
@ -116,7 +116,7 @@ void MinSpin::reset_vectors()
 int MinSpin::iterate(int maxiter)
 {
  bigint ntimestep;
-  double fmdotfm;
+  double fmdotfm,fmsq;
  int flag,flagall;

  for (int iter = 0; iter < maxiter; iter++) {
@ -130,7 +130,7 @@ int MinSpin::iterate(int maxiter)
    // optimize timestep accross processes / replicas
    // need a force calculation for timestep optimization

-    energy_force(0);
+    if (iter == 0) energy_force(0);
    dts = evaluate_dt();

    // apply damped precessional dynamics to the spins
@ -163,8 +163,13 @@ int MinSpin::iterate(int maxiter)
    // magnetic torque tolerance criterion
    // sync across replicas if running multi-replica minimization

+    fmdotfm = fmsq = 0.0;
    if (update->ftol > 0.0) {
-      fmdotfm = fmnorm_sqr();
+      if (normstyle == MAX) fmsq = max_torque();	// max torque norm
+      else if (normstyle == INF) fmsq = inf_torque();	// inf torque norm
+      else if (normstyle == TWO) fmsq = total_torque();	// Euclidean torque 2-norm
+      else error->all(FLERR,"Illegal min_modify command");
+      fmdotfm = fmsq*fmsq;
      if (update->multireplica == 0) {
        if (fmdotfm < update->ftol*update->ftol) return FTOL;
      } else {
@ -242,7 +247,7 @@ void MinSpin::advance_spins(double dts)
  double **sp = atom->sp;
  double **fm = atom->fm;
  double tdampx,tdampy,tdampz;
-  double msq,scale,fm2,energy,dts2;
+  double fm2,energy,dts2;
  double cp[3],g[3];

  dts2 = dts*dts;
@ -288,37 +293,3 @@ void MinSpin::advance_spins(double dts)
    // because no need for simplecticity
  }
 }
-
-/* ----------------------------------------------------------------------
-   compute and return ||mag. torque||_2^2
------------------------------------------------------------------------- */
-
-double MinSpin::fmnorm_sqr()
-{
-  int nlocal = atom->nlocal;
-  double tx,ty,tz;
-  double **sp = atom->sp;
-  double **fm = atom->fm;
-
-  // calc. magnetic torques
-
-  double local_norm2_sqr = 0.0;
-  for (int i = 0; i < nlocal; i++) {
-    tx = (fm[i][1]*sp[i][2] - fm[i][2]*sp[i][1]);
-    ty = (fm[i][2]*sp[i][0] - fm[i][0]*sp[i][2]);
-    tz = (fm[i][0]*sp[i][1] - fm[i][1]*sp[i][0]);
-
-    local_norm2_sqr += tx*tx + ty*ty + tz*tz;
-  }
-
-  // no extra atom calc. for spins
-
-  if (nextra_atom)
-    error->all(FLERR,"extra atom option not available yet");
-
-  double norm2_sqr = 0.0;
-  MPI_Allreduce(&local_norm2_sqr,&norm2_sqr,1,MPI_DOUBLE,MPI_SUM,world);
-
-  return norm2_sqr;
-}
-
--- a/src/SPIN/min_spin.h
+++ b/src/SPIN/min_spin.h
@ -35,7 +35,6 @@ class MinSpin : public Min {
  int iterate(int);
  double evaluate_dt();
  void advance_spins(double);
-  double fmnorm_sqr();

 private:

--- a/src/SPIN/min_spin_cg.cpp
+++ b/src/SPIN/min_spin_cg.cpp
@ -0,0 +1,649 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ------------------------------------------------------------------------
+   Contributing authors: Aleksei Ivanov (University of Iceland)
+                         Julien Tranchida (SNL)
+
+   Please cite the related publication:
+   Ivanov, A. V., Uzdin, V. M., & Jónsson, H. (2019). Fast and Robust 
+   Algorithm for the Minimisation of the Energy of Spin Systems. arXiv 
+   preprint arXiv:1904.02669.
+------------------------------------------------------------------------- */
+
+#include <mpi.h>
+#include <cmath>
+#include <cstdlib>
+#include <cstring>
+#include "min_spin_cg.h"
+#include "universe.h"
+#include "atom.h"
+#include "citeme.h"
+#include "comm.h"
+#include "force.h"
+#include "update.h"
+#include "output.h"
+#include "timer.h"
+#include "error.h"
+#include "memory.h"
+#include "modify.h"
+#include "math_special.h"
+#include "math_const.h"
+#include "universe.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+static const char cite_minstyle_spin_cg[] =
+  "min_style spin/cg command:\n\n"
+  "@article{ivanov2019fast,\n"
+  "title={Fast and Robust Algorithm for the Minimisation of the Energy of "
+  "Spin Systems},\n"
+  "author={Ivanov, A. V and Uzdin, V. M. and J{\'o}nsson, H.},\n"
+  "journal={arXiv preprint arXiv:1904.02669},\n"
+  "year={2019}\n"
+  "}\n\n";
+
+// EPS_ENERGY = minimum normalization for energy tolerance
+
+#define EPS_ENERGY 1.0e-8
+
+#define DELAYSTEP 5
+
+/* ---------------------------------------------------------------------- */
+
+MinSpinCG::MinSpinCG(LAMMPS *lmp) :
+  Min(lmp), g_old(NULL), g_cur(NULL), p_s(NULL), sp_copy(NULL)
+{
+  if (lmp->citeme) lmp->citeme->add(cite_minstyle_spin_cg);
+  nlocal_max = 0;
+
+  // nreplica = number of partitions
+  // ireplica = which world I am in universe
+
+  nreplica = universe->nworlds;
+  ireplica = universe->iworld;
+  use_line_search = 0;  // no line search as default option for CG
+
+  discrete_factor = 10.0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+MinSpinCG::~MinSpinCG()
+{
+  memory->destroy(g_old);
+  memory->destroy(g_cur);
+  memory->destroy(p_s);
+  if (use_line_search)
+    memory->destroy(sp_copy);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void MinSpinCG::init()
+{
+  local_iter = 0;
+  der_e_cur = 0.0;
+  der_e_pr = 0.0;
+
+  Min::init();
+
+  // warning if line_search combined to gneb
+
+  if ((nreplica >= 1) && (linestyle != 4) && (comm->me == 0))
+    error->warning(FLERR,"Line search incompatible gneb");
+
+  // set back use_line_search to 0 if more than one replica
+  
+  if (linestyle == 3 && nreplica == 1){
+    use_line_search = 1;
+  }
+  else{
+    use_line_search = 0;
+  }
+
+  dts = dt = update->dt;
+  last_negative = update->ntimestep;
+
+  // allocate tables
+
+  nlocal_max = atom->nlocal;
+  memory->grow(g_old,3*nlocal_max,"min/spin/cg:g_old");
+  memory->grow(g_cur,3*nlocal_max,"min/spin/cg:g_cur");
+  memory->grow(p_s,3*nlocal_max,"min/spin/cg:p_s");
+  if (use_line_search)
+    memory->grow(sp_copy,nlocal_max,3,"min/spin/cg:sp_copy");
+}
+
+/* ---------------------------------------------------------------------- */
+
+void MinSpinCG::setup_style()
+{
+  double **v = atom->v;
+  int nlocal = atom->nlocal;
+
+  // check if the atom/spin style is defined
+
+  if (!atom->sp_flag)
+    error->all(FLERR,"min spin/cg requires atom/spin style");
+
+  for (int i = 0; i < nlocal; i++)
+    v[i][0] = v[i][1] = v[i][2] = 0.0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int MinSpinCG::modify_param(int narg, char **arg)
+{
+  if (strcmp(arg[0],"discrete_factor") == 0) {
+    if (narg < 2) error->all(FLERR,"Illegal fix_modify command");
+    discrete_factor = force->numeric(FLERR,arg[1]);
+    return 2;
+  }
+  return 0;
+}
+
+/* ----------------------------------------------------------------------
+   set current vector lengths and pointers
+   called after atoms have migrated
+------------------------------------------------------------------------- */
+
+void MinSpinCG::reset_vectors()
+{
+  // atomic dof
+
+  // size sp is 4N vector
+  nvec = 4 * atom->nlocal;
+  if (nvec) spvec = atom->sp[0];
+
+  nvec = 3 * atom->nlocal;
+  if (nvec) fmvec = atom->fm[0];
+
+  if (nvec) xvec = atom->x[0];
+  if (nvec) fvec = atom->f[0];
+}
+
+/* ----------------------------------------------------------------------
+   minimization via orthogonal spin optimisation
+------------------------------------------------------------------------- */
+
+int MinSpinCG::iterate(int maxiter)
+{
+  int nlocal = atom->nlocal;
+  bigint ntimestep;
+  double fmdotfm,fmsq;
+  int flag, flagall;
+  double **sp = atom->sp;
+  double der_e_cur_tmp = 0.0;
+
+  if (nlocal_max < nlocal) {
+    local_iter = 0;
+    nlocal_max = nlocal;
+    memory->grow(g_old,3*nlocal_max,"min/spin/cg:g_old");
+    memory->grow(g_cur,3*nlocal_max,"min/spin/cg:g_cur");
+    memory->grow(p_s,3*nlocal_max,"min/spin/cg:p_s");
+    if (use_line_search)
+      memory->grow(sp_copy,nlocal_max,3,"min/spin/cg:sp_copy");
+  }
+
+  for (int iter = 0; iter < maxiter; iter++) {
+
+    if (timer->check_timeout(niter))
+      return TIMEOUT;
+  
+    ntimestep = ++update->ntimestep;
+    niter++;
+  
+    // optimize timestep accross processes / replicas
+    // need a force calculation for timestep optimization
+
+    if (use_line_search) {
+
+      // here we need to do line search
+      if (local_iter == 0){
+        calc_gradient();
+      }
+
+      calc_search_direction();
+      der_e_cur = 0.0;
+      for (int i = 0; i < 3 * nlocal; i++)
+        der_e_cur += g_cur[i] * p_s[i];
+      MPI_Allreduce(&der_e_cur,&der_e_cur_tmp,1,MPI_DOUBLE,MPI_SUM,world);
+      der_e_cur = der_e_cur_tmp;
+      if (update->multireplica == 1) {
+        MPI_Allreduce(&der_e_cur_tmp,&der_e_cur,1,MPI_DOUBLE,MPI_SUM,universe->uworld);
+      }
+      for (int i = 0; i < nlocal; i++)
+        for (int j = 0; j < 3; j++)
+          sp_copy[i][j] = sp[i][j];
+
+      eprevious = ecurrent;
+      der_e_pr = der_e_cur;
+      calc_and_make_step(0.0, 1.0, 0);
+    }
+    else{
+
+      // here we don't do line search
+      // if gneb calc., nreplica > 1
+      // then calculate gradients and advance spins
+      // of intermediate replicas only
+      calc_gradient();
+      calc_search_direction();
+      advance_spins();
+      neval++;
+      eprevious = ecurrent;
+      ecurrent = energy_force(0);
+    }
+
+    // energy tolerance criterion
+    // only check after DELAYSTEP elapsed since velocties reset to 0
+    // sync across replicas if running multi-replica minimization
+  
+    if (update->etol > 0.0 && ntimestep-last_negative > DELAYSTEP) {
+      if (update->multireplica == 0) {
+        if (fabs(ecurrent-eprevious) <
+            update->etol * 0.5*(fabs(ecurrent) + fabs(eprevious) + EPS_ENERGY))
+          return ETOL;
+      } else {
+        if (fabs(ecurrent-eprevious) <
+            update->etol * 0.5*(fabs(ecurrent) + fabs(eprevious) + EPS_ENERGY))
+          flag = 0;
+        else flag = 1;
+        MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,universe->uworld);
+        if (flagall == 0) return ETOL;
+      }
+    }
+
+    // magnetic torque tolerance criterion
+    // sync across replicas if running multi-replica minimization
+
+    fmdotfm = fmsq = 0.0;
+    if (update->ftol > 0.0) {
+      if (normstyle == MAX) fmsq = max_torque();	// max torque norm
+      else if (normstyle == INF) fmsq = inf_torque();	// inf torque norm
+      else if (normstyle == TWO) fmsq = total_torque();	// Euclidean torque 2-norm
+      else error->all(FLERR,"Illegal min_modify command");
+      fmdotfm = fmsq*fmsq;
+      if (update->multireplica == 0) {
+        if (fmdotfm < update->ftol*update->ftol) return FTOL;
+      } else {
+        if (fmdotfm < update->ftol*update->ftol) flag = 0;
+        else flag = 1;
+        MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,universe->uworld);
+        if (flagall == 0) return FTOL;
+      }
+    }
+
+    // output for thermo, dump, restart files
+
+    if (output->next == ntimestep) {
+      timer->stamp();
+      output->write(ntimestep);
+      timer->stamp(Timer::OUTPUT);
+    }
+  }
+
+  return MAXITER;
+}
+
+/* ----------------------------------------------------------------------
+   calculate gradients
+---------------------------------------------------------------------- */
+
+void MinSpinCG::calc_gradient()
+{
+  int nlocal = atom->nlocal;
+  double **sp = atom->sp;
+  double **fm = atom->fm;
+  double hbar = force->hplanck/MY_2PI;
+  double factor;
+
+  if (use_line_search)
+    factor = hbar;
+  else factor = evaluate_dt();
+
+  // loop on all spins on proc.
+
+  for (int i = 0; i < nlocal; i++) {
+    g_cur[3 * i + 0] = (fm[i][0]*sp[i][1] - fm[i][1]*sp[i][0]) * factor;
+    g_cur[3 * i + 1] = -(fm[i][2]*sp[i][0] - fm[i][0]*sp[i][2]) * factor;
+    g_cur[3 * i + 2] = (fm[i][1]*sp[i][2] - fm[i][2]*sp[i][1]) * factor;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   search direction:
+   The Fletcher-Reeves conj. grad. method
+   See Jorge Nocedal and Stephen J. Wright 'Numerical
+   Optimization' Second Edition, 2006 (p. 121)
+---------------------------------------------------------------------- */
+
+void MinSpinCG::calc_search_direction()
+{
+  int nlocal = atom->nlocal;
+  double g2old = 0.0;
+  double g2 = 0.0;
+  double beta = 0.0;
+
+  double g2_global = 0.0;
+  double g2old_global = 0.0;
+
+  double factor = 1.0;
+
+  // for multiple replica do not move end points
+  if (nreplica > 1)
+    if (ireplica == 0 || ireplica == nreplica - 1)
+      factor = 0.0;
+
+
+  if (local_iter == 0 || local_iter % 5 == 0){ 	// steepest descent direction
+    for (int i = 0; i < 3 * nlocal; i++) {
+      p_s[i] = -g_cur[i] * factor;
+      g_old[i] = g_cur[i] * factor;
+    }
+  } else { 				// conjugate direction
+    for (int i = 0; i < 3 * nlocal; i++) {
+      g2old += g_old[i] * g_old[i];
+      g2 += g_cur[i] * g_cur[i];
+    }
+
+    // now we need to collect/broadcast beta on this replica
+    // need to check what is beta for GNEB
+
+    MPI_Allreduce(&g2,&g2_global,1,MPI_DOUBLE,MPI_SUM,world);
+    MPI_Allreduce(&g2old,&g2old_global,1,MPI_DOUBLE,MPI_SUM,world);
+
+    // Sum over all replicas. Good for GNEB.
+    
+    if (nreplica > 1) {
+      g2 = g2_global * factor;
+      g2old = g2old_global * factor;
+      MPI_Allreduce(&g2,&g2_global,1,MPI_DOUBLE,MPI_SUM,universe->uworld);
+      MPI_Allreduce(&g2old,&g2old_global,1,MPI_DOUBLE,MPI_SUM,universe->uworld);
+    }
+    if (fabs(g2_global) < 1.0e-60) beta = 0.0;
+    else beta = g2_global / g2old_global;
+    
+    // calculate conjugate direction
+    
+    for (int i = 0; i < 3 * nlocal; i++) {
+      p_s[i] = (beta * p_s[i] - g_cur[i]) * factor;
+      g_old[i] = g_cur[i] * factor;
+    }
+  }
+
+  local_iter++;
+}
+
+/* ----------------------------------------------------------------------
+   rotation of spins along the search direction
+---------------------------------------------------------------------- */
+
+void MinSpinCG::advance_spins()
+{
+  int nlocal = atom->nlocal;
+  double **sp = atom->sp;
+  double rot_mat[9];	// exponential of matrix made of search direction
+  double s_new[3];
+
+  // loop on all spins on proc.
+
+  for (int i = 0; i < nlocal; i++) {
+    rodrigues_rotation(p_s + 3 * i, rot_mat);
+    
+    // rotate spins
+    
+    vm3(rot_mat, sp[i], s_new);
+    for (int j = 0; j < 3; j++) sp[i][j] = s_new[j];
+  }
+}
+
+/* ----------------------------------------------------------------------
+  calculate 3x3 matrix exponential using Rodrigues' formula
+  (R. Murray, Z. Li, and S. Shankar Sastry,
+  A Mathematical Introduction to
+  Robotic Manipulation (1994), p. 28 and 30).
+  
+  upp_tr - vector x, y, z so that one calculate
+  U = exp(A) with A= [[0, x, y],
+                      [-x, 0, z],
+                      [-y, -z, 0]]
+------------------------------------------------------------------------- */
+
+void MinSpinCG::rodrigues_rotation(const double *upp_tr, double *out)
+{
+  double theta,A,B,D,x,y,z;
+  double s1,s2,s3,a1,a2,a3;
+
+  if (fabs(upp_tr[0]) < 1.0e-40 &&
+      fabs(upp_tr[1]) < 1.0e-40 &&
+      fabs(upp_tr[2]) < 1.0e-40){
+
+    // if upp_tr is zero, return unity matrix
+    
+    for(int k = 0; k < 3; k++){
+      for(int m = 0; m < 3; m++){
+        if (m == k) out[3 * k + m] = 1.0;
+        else out[3 * k + m] = 0.0;
+      }
+    }
+    return;
+  }
+
+  theta = sqrt(upp_tr[0] * upp_tr[0] +
+               upp_tr[1] * upp_tr[1] +
+               upp_tr[2] * upp_tr[2]);
+
+  A = cos(theta);
+  B = sin(theta);
+  D = 1.0 - A;
+  x = upp_tr[0]/theta;
+  y = upp_tr[1]/theta;
+  z = upp_tr[2]/theta;
+
+  // diagonal elements of U
+
+  out[0] = A + z * z * D;
+  out[4] = A + y * y * D;
+  out[8] = A + x * x * D;
+
+  // off diagonal of U
+
+  s1 = -y * z *D;
+  s2 = x * z * D;
+  s3 = -x * y * D;
+
+  a1 = x * B;
+  a2 = y * B;
+  a3 = z * B;
+
+  out[1] = s1 + a1;
+  out[3] = s1 - a1;
+  out[2] = s2 + a2;
+  out[6] = s2 - a2;
+  out[5] = s3 + a3;
+  out[7] = s3 - a3;
+
+}
+
+/* ----------------------------------------------------------------------
+  out = vector^T x m,
+  m -- 3x3 matrix , v -- 3-d vector
+------------------------------------------------------------------------- */
+
+void MinSpinCG::vm3(const double *m, const double *v, double *out)
+{
+  for(int i = 0; i < 3; i++){
+    out[i] = 0.0;
+    for(int j = 0; j < 3; j++) out[i] += *(m + 3 * j + i) * v[j];
+  }
+}
+
+/* ----------------------------------------------------------------------
+  advance spins
+------------------------------------------------------------------------- */
+
+void MinSpinCG::make_step(double c, double *energy_and_der)
+{
+  double p_scaled[3];
+  int nlocal = atom->nlocal;
+  double rot_mat[9]; // exponential of matrix made of search direction
+  double s_new[3];
+  double **sp = atom->sp;
+  double der_e_cur_tmp = 0.0;
+
+  for (int i = 0; i < nlocal; i++) {
+
+    // scale the search direction
+
+    for (int j = 0; j < 3; j++) p_scaled[j] = c * p_s[3 * i + j];
+
+    // calculate rotation matrix
+
+    rodrigues_rotation(p_scaled, rot_mat);
+
+    // rotate spins
+
+    vm3(rot_mat, sp[i], s_new);
+    for (int j = 0; j < 3; j++) sp[i][j] = s_new[j];
+  }
+
+  ecurrent = energy_force(0);
+  calc_gradient();
+  neval++;
+  der_e_cur = 0.0;
+  for (int i = 0; i < 3 * nlocal; i++) {
+    der_e_cur += g_cur[i] * p_s[i];
+  }
+  MPI_Allreduce(&der_e_cur,&der_e_cur_tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  der_e_cur = der_e_cur_tmp;
+  if (update->multireplica == 1) {
+    MPI_Allreduce(&der_e_cur_tmp,&der_e_cur,1,MPI_DOUBLE,MPI_SUM,universe->uworld);
+  }
+
+  energy_and_der[0] = ecurrent;
+  energy_and_der[1] = der_e_cur;
+}
+
+/* ----------------------------------------------------------------------
+  Calculate step length which satisfies approximate Wolfe conditions
+  using the cubic interpolation
+------------------------------------------------------------------------- */
+
+int MinSpinCG::calc_and_make_step(double a, double b, int index)
+{
+  double e_and_d[2] = {0.0,0.0};
+  double alpha,c1,c2,c3;
+  double **sp = atom->sp;
+  int nlocal = atom->nlocal;
+
+  make_step(b,e_and_d);
+  ecurrent = e_and_d[0];
+  der_e_cur = e_and_d[1];
+  index++;
+
+  if (adescent(eprevious,e_and_d[0]) || index == 5){
+    MPI_Bcast(&b,1,MPI_DOUBLE,0,world);
+    for (int i = 0; i < 3 * nlocal; i++) {
+      p_s[i] = b * p_s[i];
+    }
+    return 1;
+  }
+  else {
+    double r,f0,f1,df0,df1;
+    r = b - a;
+    f0 = eprevious;
+    f1 = ecurrent;
+    df0 = der_e_pr;
+    df1 = der_e_cur;
+
+    c1 = -2.0*(f1-f0)/(r*r*r)+(df1+df0)/(r*r);
+    c2 = 3.0*(f1-f0)/(r*r)-(df1+2.0*df0)/(r);
+    c3 = df0;
+
+    // f(x) = c1 x^3 + c2 x^2 + c3 x^1 + c4
+    // has minimum at alpha below. We do not check boundaries.
+
+    alpha = (-c2 + sqrt(c2*c2 - 3.0*c1*c3))/(3.0*c1);
+    MPI_Bcast(&alpha,1,MPI_DOUBLE,0,world);
+
+    if (alpha < 0.0) alpha = r/2.0;
+
+    for (int i = 0; i < nlocal; i++) {
+      for (int j = 0; j < 3; j++) sp[i][j] = sp_copy[i][j];
+    }
+    calc_and_make_step(0.0, alpha, index);
+   }
+
+  return 0;
+}
+
+/* ----------------------------------------------------------------------
+  Approximate descent
+------------------------------------------------------------------------- */
+
+int MinSpinCG::adescent(double phi_0, double phi_j){
+
+  double eps = 1.0e-6;
+
+  if (phi_j<=phi_0+eps*fabs(phi_0))
+    return 1;
+  else
+    return 0;
+}
+
+/* ----------------------------------------------------------------------
+   evaluate max timestep
+---------------------------------------------------------------------- */
+
+double MinSpinCG::evaluate_dt()
+{
+  double dtmax;
+  double fmsq;
+  double fmaxsqone,fmaxsqloc,fmaxsqall;
+  int nlocal = atom->nlocal;
+  double **fm = atom->fm;
+
+  // finding max fm on this proc.
+
+  fmsq = fmaxsqone = fmaxsqloc = fmaxsqall = 0.0;
+  for (int i = 0; i < nlocal; i++) {
+    fmsq = fm[i][0]*fm[i][0]+fm[i][1]*fm[i][1]+fm[i][2]*fm[i][2];
+    fmaxsqone = MAX(fmaxsqone,fmsq);
+  }
+
+  // finding max fm on this replica
+
+  fmaxsqloc = fmaxsqone;
+  MPI_Allreduce(&fmaxsqone,&fmaxsqloc,1,MPI_DOUBLE,MPI_MAX,world);
+
+  // finding max fm over all replicas, if necessary
+  // this communicator would be invalid for multiprocess replicas
+
+  fmaxsqall = fmaxsqloc;
+  if (update->multireplica == 1) {
+    fmaxsqall = fmaxsqloc;
+    MPI_Allreduce(&fmaxsqloc,&fmaxsqall,1,MPI_DOUBLE,MPI_MAX,universe->uworld);
+  }
+
+  if (fmaxsqall == 0.0)
+    error->all(FLERR,"Incorrect fmaxsqall calculation");
+
+  // define max timestep by dividing by the
+  // inverse of max frequency by discrete_factor
+
+  dtmax = MY_2PI/(discrete_factor*sqrt(fmaxsqall));
+
+  return dtmax;
+}
--- a/src/SPIN/min_spin_cg.h
+++ b/src/SPIN/min_spin_cg.h
@ -0,0 +1,71 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef MINIMIZE_CLASS
+
+MinimizeStyle(spin/cg, MinSpinCG)
+
+#else
+
+#ifndef LMP_MIN_SPIN_CG_H
+#define LMP_MIN_SPIN_CG_H
+
+#include "min.h"
+
+namespace LAMMPS_NS {
+
+class MinSpinCG: public Min {
+ public:
+  MinSpinCG(class LAMMPS *);
+  virtual ~MinSpinCG();
+  void init();
+  void setup_style();
+  void reset_vectors();
+  int modify_param(int, char **);
+  int iterate(int);
+
+ private:
+  int local_iter;		// for neb
+  int nlocal_max;		// max value of nlocal (for size of lists)
+  int use_line_search;		// use line search or not.
+  int ireplica,nreplica;	// for neb
+  double dt;			// global timestep
+  double dts;			// spin timestep
+  double discrete_factor;	// factor for spin timestep evaluation
+  double der_e_cur;		// current derivative along search dir.
+  double der_e_pr;		// previous derivative along search dir.
+  double *spvec;		// variables for atomic dof, as 1d vector
+  double *fmvec;		// variables for atomic dof, as 1d vector
+  double *g_old;  		// gradient vector at previous step
+  double *g_cur;  		// current gradient vector
+  double *p_s;  		// search direction vector
+  double **sp_copy;		// copy of the spins
+
+  void advance_spins();
+  void calc_gradient();
+  void calc_search_direction();
+  void vm3(const double *, const double *, double *);
+  void rodrigues_rotation(const double *, double *);
+  void make_step(double, double *);
+  int calc_and_make_step(double, double, int);
+  int adescent(double, double);
+  double evaluate_dt();
+  double maximum_rotation(double *);
+
+  bigint last_negative;
+};
+
+}
+
+#endif
+#endif
--- a/src/SPIN/min_spin_lbfgs.cpp
+++ b/src/SPIN/min_spin_lbfgs.cpp
@ -0,0 +1,754 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ------------------------------------------------------------------------
+   Contributing authors: Aleksei Ivanov (University of Iceland)
+                         Julien Tranchida (SNL)
+
+   Please cite the related publication:
+   Ivanov, A. V., Uzdin, V. M., & Jónsson, H. (2019). Fast and Robust 
+   Algorithm for the Minimisation of the Energy of Spin Systems. arXiv 
+   preprint arXiv:1904.02669.
+------------------------------------------------------------------------- */
+
+#include <mpi.h>
+#include <cmath>
+#include <cstdlib>
+#include <cstring>
+#include "min_spin_lbfgs.h"
+#include "atom.h"
+#include "citeme.h"
+#include "comm.h"
+#include "force.h"
+#include "update.h"
+#include "output.h"
+#include "timer.h"
+#include "error.h"
+#include "memory.h"
+#include "modify.h"
+#include "math_special.h"
+#include "math_const.h"
+#include "universe.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+static const char cite_minstyle_spin_lbfgs[] =
+  "min_style spin/lbfgs command:\n\n"
+  "@article{ivanov2019fast,\n"
+  "title={Fast and Robust Algorithm for the Minimisation of the Energy of "
+  "Spin Systems},\n"
+  "author={Ivanov, A. V and Uzdin, V. M. and J{\'o}nsson, H.},\n"
+  "journal={arXiv preprint arXiv:1904.02669},\n"
+  "year={2019}\n"
+  "}\n\n";
+
+// EPS_ENERGY = minimum normalization for energy tolerance
+
+#define EPS_ENERGY 1.0e-8
+
+#define DELAYSTEP 5
+
+/* ---------------------------------------------------------------------- */
+
+MinSpinLBFGS::MinSpinLBFGS(LAMMPS *lmp) :
+  Min(lmp), g_old(NULL), g_cur(NULL), p_s(NULL), rho(NULL), ds(NULL), dy(NULL), sp_copy(NULL)
+{
+  if (lmp->citeme) lmp->citeme->add(cite_minstyle_spin_lbfgs);
+  nlocal_max = 0;
+
+  // nreplica = number of partitions
+  // ireplica = which world I am in universe
+
+  nreplica = universe->nworlds;
+  ireplica = universe->iworld;
+  use_line_search = 0;  // no line search as default option for LBFGS
+
+  maxepsrot = MY_2PI / (100.0);
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+MinSpinLBFGS::~MinSpinLBFGS()
+{
+    memory->destroy(g_old);
+    memory->destroy(g_cur);
+    memory->destroy(p_s);
+    memory->destroy(ds);
+    memory->destroy(dy);
+    memory->destroy(rho);
+    if (use_line_search)
+      memory->destroy(sp_copy);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void MinSpinLBFGS::init()
+{
+  num_mem = 3;
+  local_iter = 0;
+  der_e_cur = 0.0;
+  der_e_pr = 0.0;
+
+  Min::init();
+
+  // warning if line_search combined to gneb
+
+  if ((nreplica >= 1) && (linestyle != 4) && (comm->me == 0))
+    error->warning(FLERR,"Line search incompatible gneb");
+
+  // set back use_line_search to 0 if more than one replica
+
+  if (linestyle == 3 && nreplica == 1){
+    use_line_search = 1;
+  }
+  else{
+    use_line_search = 0;
+  }
+
+  last_negative = update->ntimestep;
+
+  // allocate tables
+
+  nlocal_max = atom->nlocal;
+  memory->grow(g_old,3*nlocal_max,"min/spin/lbfgs:g_old");
+  memory->grow(g_cur,3*nlocal_max,"min/spin/lbfgs:g_cur");
+  memory->grow(p_s,3*nlocal_max,"min/spin/lbfgs:p_s");
+  memory->grow(rho,num_mem,"min/spin/lbfgs:rho");
+  memory->grow(ds,num_mem,3*nlocal_max,"min/spin/lbfgs:ds");
+  memory->grow(dy,num_mem,3*nlocal_max,"min/spin/lbfgs:dy");
+  if (use_line_search)
+    memory->grow(sp_copy,nlocal_max,3,"min/spin/lbfgs:sp_copy");
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+void MinSpinLBFGS::setup_style()
+{
+  double **v = atom->v;
+  int nlocal = atom->nlocal;
+
+  // check if the atom/spin style is defined
+
+  if (!atom->sp_flag)
+    error->all(FLERR,"min spin/lbfgs requires atom/spin style");
+
+  for (int i = 0; i < nlocal; i++)
+    v[i][0] = v[i][1] = v[i][2] = 0.0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int MinSpinLBFGS::modify_param(int narg, char **arg)
+{
+  if (strcmp(arg[0],"discrete_factor") == 0) {
+    if (narg < 2) error->all(FLERR,"Illegal min_modify command");
+    double discrete_factor;
+    discrete_factor = force->numeric(FLERR,arg[1]);
+    maxepsrot = MY_2PI / (10 * discrete_factor);
+    return 2;
+  }
+  return 0;
+}
+
+/* ----------------------------------------------------------------------
+   set current vector lengths and pointers
+   called after atoms have migrated
+------------------------------------------------------------------------- */
+
+void MinSpinLBFGS::reset_vectors()
+{
+  // atomic dof
+
+  // size sp is 4N vector
+  nvec = 4 * atom->nlocal;
+  if (nvec) spvec = atom->sp[0];
+
+  nvec = 3 * atom->nlocal;
+  if (nvec) fmvec = atom->fm[0];
+
+  if (nvec) xvec = atom->x[0];
+  if (nvec) fvec = atom->f[0];
+}
+
+/* ----------------------------------------------------------------------
+   minimization via damped spin dynamics
+------------------------------------------------------------------------- */
+
+int MinSpinLBFGS::iterate(int maxiter)
+{
+  int nlocal = atom->nlocal;
+  bigint ntimestep;
+  double fmdotfm,fmsq;
+  int flag, flagall;
+  double **sp = atom->sp;
+  double der_e_cur_tmp = 0.0;
+
+  if (nlocal_max < nlocal) {
+    nlocal_max = nlocal;
+    local_iter = 0;
+    memory->grow(g_old,3*nlocal_max,"min/spin/lbfgs:g_old");
+    memory->grow(g_cur,3*nlocal_max,"min/spin/lbfgs:g_cur");
+    memory->grow(p_s,3*nlocal_max,"min/spin/lbfgs:p_s");
+    memory->grow(rho,num_mem,"min/spin/lbfgs:rho");
+    memory->grow(ds,num_mem,3*nlocal_max,"min/spin/lbfgs:ds");
+    memory->grow(dy,num_mem,3*nlocal_max,"min/spin/lbfgs:dy");
+    if (use_line_search)
+      memory->grow(sp_copy,nlocal_max,3,"min/spin/lbfgs:sp_copy");
+  }
+
+  for (int iter = 0; iter < maxiter; iter++) {
+
+    if (timer->check_timeout(niter))
+      return TIMEOUT;
+  
+    ntimestep = ++update->ntimestep;
+    niter++;
+  
+    // optimize timestep accross processes / replicas
+    // need a force calculation for timestep optimization
+
+    if (use_line_search) {
+
+      // here we need to do line search
+      if (local_iter == 0){
+        eprevious = ecurrent;
+        ecurrent = energy_force(0);
+        calc_gradient();
+      }
+
+      calc_search_direction();
+      der_e_cur = 0.0;
+      for (int i = 0; i < 3 * nlocal; i++)
+        der_e_cur += g_cur[i] * p_s[i];
+      MPI_Allreduce(&der_e_cur,&der_e_cur_tmp,1,MPI_DOUBLE,MPI_SUM,world);
+      der_e_cur = der_e_cur_tmp;
+      if (update->multireplica == 1) {
+        MPI_Allreduce(&der_e_cur_tmp,&der_e_cur,1,MPI_DOUBLE,MPI_SUM,universe->uworld);
+      }
+      for (int i = 0; i < nlocal; i++)
+        for (int j = 0; j < 3; j++)
+      sp_copy[i][j] = sp[i][j];
+
+      eprevious = ecurrent;
+      der_e_pr = der_e_cur;
+      calc_and_make_step(0.0, 1.0, 0);
+    }
+    else{
+
+      // here we don't do line search
+      // but use cutoff rotation angle
+      // if gneb calc., nreplica > 1
+      // then calculate gradients and advance spins
+      // of intermediate replicas only
+      eprevious = ecurrent;
+      ecurrent = energy_force(0);
+      calc_gradient();
+      calc_search_direction();
+      advance_spins();
+      neval++;
+    }
+
+    // energy tolerance criterion
+    // only check after DELAYSTEP elapsed since velocties reset to 0
+    // sync across replicas if running multi-replica minimization
+  
+    if (update->etol > 0.0 && ntimestep-last_negative > DELAYSTEP) {
+      if (update->multireplica == 0) {
+        if (fabs(ecurrent-eprevious) <
+            update->etol * 0.5*(fabs(ecurrent) + fabs(eprevious) + EPS_ENERGY))
+          return ETOL;
+      } else {
+        if (fabs(ecurrent-eprevious) <
+            update->etol * 0.5*(fabs(ecurrent) + fabs(eprevious) + EPS_ENERGY))
+          flag = 0;
+        else flag = 1;
+        MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,universe->uworld);
+        if (flagall == 0) return ETOL;
+      }
+    }
+
+    // magnetic torque tolerance criterion
+    // sync across replicas if running multi-replica minimization
+
+    fmdotfm = fmsq = 0.0;
+    if (update->ftol > 0.0) {
+      if (normstyle == MAX) fmsq = max_torque();	// max torque norm
+      else if (normstyle == INF) fmsq = inf_torque();	// inf torque norm
+      else if (normstyle == TWO) fmsq = total_torque();	// Euclidean torque 2-norm
+      else error->all(FLERR,"Illegal min_modify command");
+      fmdotfm = fmsq*fmsq;
+      if (update->multireplica == 0) {
+        if (fmdotfm < update->ftol*update->ftol) return FTOL;
+      } else {
+        if (fmdotfm < update->ftol*update->ftol) flag = 0;
+        else flag = 1;
+        MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,universe->uworld);
+        if (flagall == 0) return FTOL;
+      }
+    }
+
+    // output for thermo, dump, restart files
+
+    if (output->next == ntimestep) {
+      timer->stamp();
+      output->write(ntimestep);
+      timer->stamp(Timer::OUTPUT);
+    }
+  }
+
+  return MAXITER;
+}
+
+/* ----------------------------------------------------------------------
+   calculate gradients
+---------------------------------------------------------------------- */
+
+void MinSpinLBFGS::calc_gradient()
+{
+  int nlocal = atom->nlocal;
+  double **sp = atom->sp;
+  double **fm = atom->fm;
+  double hbar = force->hplanck/MY_2PI;
+
+  // loop on all spins on proc.
+
+  for (int i = 0; i < nlocal; i++) {
+    g_cur[3 * i + 0] = (fm[i][0]*sp[i][1] - fm[i][1]*sp[i][0]) * hbar;
+    g_cur[3 * i + 1] = -(fm[i][2]*sp[i][0] - fm[i][0]*sp[i][2]) * hbar;
+    g_cur[3 * i + 2] = (fm[i][1]*sp[i][2] - fm[i][2]*sp[i][1]) * hbar;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   search direction:
+   Limited-memory BFGS.
+   See Jorge Nocedal and Stephen J. Wright 'Numerical
+   Optimization' Second Edition, 2006 (p. 177)
+---------------------------------------------------------------------- */
+
+void MinSpinLBFGS::calc_search_direction()
+{
+  int nlocal = atom->nlocal;
+
+  double dyds = 0.0;
+  double sq = 0.0;
+  double yy = 0.0;
+  double yr = 0.0;
+  double beta = 0.0;
+
+  double dyds_global = 0.0;
+  double sq_global = 0.0;
+  double yy_global = 0.0;
+  double yr_global = 0.0;
+
+  int m_index = local_iter % num_mem; // memory index
+  int c_ind = 0;
+  double *q;
+  double *alpha;
+
+  double factor;
+  double scaling = 1.0;
+
+  // for multiple replica do not move end points
+  if (nreplica > 1) {
+    if (ireplica == 0 || ireplica == nreplica - 1) {
+      factor = 0.0;
+    }
+    else factor = 1.0;
+  }else{
+    factor = 1.0;
+  }
+
+  if (local_iter == 0){ 	// steepest descent direction
+
+    //if no line search then calculate maximum rotation
+    if (use_line_search == 0)
+      scaling = maximum_rotation(g_cur);
+
+    for (int i = 0; i < 3 * nlocal; i++) {
+      p_s[i] = -g_cur[i] * factor * scaling;;
+      g_old[i] = g_cur[i]  * factor;
+      for (int k = 0; k < num_mem; k++){
+        ds[k][i] = 0.0;
+        dy[k][i] = 0.0;
+      }
+    }
+    for (int k = 0; k < num_mem; k++)
+      rho[k] = 0.0;
+
+    } else {
+    dyds = 0.0;
+    for (int i = 0; i < 3 * nlocal; i++) {
+      ds[m_index][i] = p_s[i];
+      dy[m_index][i] = g_cur[i] - g_old[i];
+      dyds += ds[m_index][i] * dy[m_index][i];
+    }
+    MPI_Allreduce(&dyds, &dyds_global, 1, MPI_DOUBLE, MPI_SUM, world);
+
+    if (nreplica > 1) {
+      dyds_global *= factor;
+      dyds = dyds_global;
+      MPI_Allreduce(&dyds, &dyds_global, 1,MPI_DOUBLE,MPI_SUM,universe->uworld);
+    }
+
+    if (fabs(dyds_global) > 1.0e-60) rho[m_index] = 1.0 / dyds_global;
+    else rho[m_index] = 1.0e60;
+
+    if (rho[m_index] < 0.0){
+      local_iter = 0;
+      return calc_search_direction();
+    }
+    q = (double *) calloc(3*nlocal, sizeof(double));
+    alpha = (double *) calloc(num_mem, sizeof(double));
+    // set the q vector
+
+    for (int i = 0; i < 3 * nlocal; i++) {
+      q[i] = g_cur[i];
+    }
+
+    // loop over last m indecies
+    for(int k = num_mem - 1; k > -1; k--) {
+      // this loop should run from the newest memory to the oldest one.
+
+      c_ind = (k + m_index + 1) % num_mem;
+
+      // dot product between dg and q
+
+      sq = 0.0;
+      for (int i = 0; i < 3 * nlocal; i++) {
+        sq += ds[c_ind][i] * q[i];
+      }
+      MPI_Allreduce(&sq,&sq_global,1,MPI_DOUBLE,MPI_SUM,world);
+      if (nreplica > 1) {
+        sq_global *= factor;
+        sq = sq_global;
+        MPI_Allreduce(&sq,&sq_global,1,MPI_DOUBLE,MPI_SUM,universe->uworld);
+      }
+
+      // update alpha
+
+      alpha[c_ind] = rho[c_ind] * sq_global;
+
+      // update q
+
+      for (int i = 0; i < 3 * nlocal; i++) {
+        q[i] -= alpha[c_ind] * dy[c_ind][i];
+      }
+    }
+
+    // dot product between dg with itself
+    yy = 0.0;
+    for (int i = 0; i < 3 * nlocal; i++) {
+      yy += dy[m_index][i] * dy[m_index][i];
+    }
+    MPI_Allreduce(&yy,&yy_global,1,MPI_DOUBLE,MPI_SUM,world);
+    if (nreplica > 1) {
+      yy_global *= factor;
+      yy = yy_global;
+      MPI_Allreduce(&yy,&yy_global,1,MPI_DOUBLE,MPI_SUM,universe->uworld);
+    }
+
+    // calculate now search direction
+
+    double devis = rho[m_index] * yy_global;
+
+    if (fabs(devis) > 1.0e-60) {
+      for (int i = 0; i < 3 * nlocal; i++) {
+        p_s[i] = factor * q[i] / devis;
+      }
+    }else{
+      for (int i = 0; i < 3 * nlocal; i++) {
+        p_s[i] = factor * q[i] * 1.0e60;
+      }
+    }
+
+    for (int k = 0; k < num_mem; k++){
+      // this loop should run from the oldest memory to the newest one.
+
+      if (local_iter < num_mem) c_ind = k;
+      else c_ind = (k + m_index + 1) % num_mem;
+
+      // dot product between p and da
+      yr = 0.0;
+      for (int i = 0; i < 3 * nlocal; i++) {
+        yr += dy[c_ind][i] * p_s[i];
+      }
+
+      MPI_Allreduce(&yr,&yr_global,1,MPI_DOUBLE,MPI_SUM,world);
+      if (nreplica > 1) {
+        yr_global *= factor;
+        yr = yr_global;
+        MPI_Allreduce(&yr,&yr_global,1,MPI_DOUBLE,MPI_SUM,universe->uworld);
+      }
+
+      beta = rho[c_ind] * yr_global;
+      for (int i = 0; i < 3 * nlocal; i++) {
+        p_s[i] += ds[c_ind][i] * (alpha[c_ind] - beta);
+      }
+    }
+    if (use_line_search == 0)
+      scaling = maximum_rotation(p_s);
+    for (int i = 0; i < 3 * nlocal; i++) {
+      p_s[i] = - factor * p_s[i] * scaling;
+      g_old[i] = g_cur[i] * factor;
+    }
+    free(q);
+    free(alpha);
+  }
+  local_iter++;
+}
+
+/* ----------------------------------------------------------------------
+   rotation of spins along the search direction
+---------------------------------------------------------------------- */
+
+void MinSpinLBFGS::advance_spins()
+{
+  int nlocal = atom->nlocal;
+  double **sp = atom->sp;
+  double rot_mat[9]; // exponential of matrix made of search direction
+  double s_new[3];
+
+  // loop on all spins on proc.
+
+  for (int i = 0; i < nlocal; i++) {
+    rodrigues_rotation(p_s + 3 * i, rot_mat);
+    
+    // rotate spins
+    
+    vm3(rot_mat, sp[i], s_new);
+    for (int j = 0; j < 3; j++) sp[i][j] = s_new[j];
+  }
+}
+
+/* ----------------------------------------------------------------------
+  calculate 3x3 matrix exponential using Rodrigues' formula
+  (R. Murray, Z. Li, and S. Shankar Sastry,
+  A Mathematical Introduction to
+  Robotic Manipulation (1994), p. 28 and 30).
+  
+  upp_tr - vector x, y, z so that one calculate
+  U = exp(A) with A= [[0, x, y],
+                      [-x, 0, z],
+                      [-y, -z, 0]]
+------------------------------------------------------------------------- */
+
+void MinSpinLBFGS::rodrigues_rotation(const double *upp_tr, double *out)
+{
+  double theta,A,B,D,x,y,z;
+  double s1,s2,s3,a1,a2,a3;
+
+  if (fabs(upp_tr[0]) < 1.0e-40 &&
+      fabs(upp_tr[1]) < 1.0e-40 &&
+      fabs(upp_tr[2]) < 1.0e-40){
+
+    // if upp_tr is zero, return unity matrix
+    for(int k = 0; k < 3; k++){
+      for(int m = 0; m < 3; m++){
+    if (m == k) out[3 * k + m] = 1.0;
+    else out[3 * k + m] = 0.0;
+        }
+    }
+    return;
+  }
+
+  theta = sqrt(upp_tr[0] * upp_tr[0] +
+               upp_tr[1] * upp_tr[1] +
+               upp_tr[2] * upp_tr[2]);
+
+  A = cos(theta);
+  B = sin(theta);
+  D = 1 - A;
+  x = upp_tr[0]/theta;
+  y = upp_tr[1]/theta;
+  z = upp_tr[2]/theta;
+
+  // diagonal elements of U
+
+  out[0] = A + z * z * D;
+  out[4] = A + y * y * D;
+  out[8] = A + x * x * D;
+
+  // off diagonal of U
+
+  s1 = -y * z *D;
+  s2 = x * z * D;
+  s3 = -x * y * D;
+
+  a1 = x * B;
+  a2 = y * B;
+  a3 = z * B;
+
+  out[1] = s1 + a1;
+  out[3] = s1 - a1;
+  out[2] = s2 + a2;
+  out[6] = s2 - a2;
+  out[5] = s3 + a3;
+  out[7] = s3 - a3;
+
+}
+
+/* ----------------------------------------------------------------------
+  out = vector^T x m,
+  m -- 3x3 matrix , v -- 3-d vector
+------------------------------------------------------------------------- */
+
+void MinSpinLBFGS::vm3(const double *m, const double *v, double *out)
+{
+  for(int i = 0; i < 3; i++){
+    out[i] = 0.0;
+    for(int j = 0; j < 3; j++)
+    out[i] += *(m + 3 * j + i) * v[j];
+  }
+}
+
+
+void MinSpinLBFGS::make_step(double c, double *energy_and_der)
+{
+  double p_scaled[3];
+  int nlocal = atom->nlocal;
+  double rot_mat[9]; // exponential of matrix made of search direction
+  double s_new[3];
+  double **sp = atom->sp;
+  double der_e_cur_tmp = 0.0;
+
+  for (int i = 0; i < nlocal; i++) {
+
+    // scale the search direction
+
+    for (int j = 0; j < 3; j++) p_scaled[j] = c * p_s[3 * i + j];
+
+    // calculate rotation matrix
+
+    rodrigues_rotation(p_scaled, rot_mat);
+
+    // rotate spins
+
+    vm3(rot_mat, sp[i], s_new);
+    for (int j = 0; j < 3; j++) sp[i][j] = s_new[j];
+  }
+
+  ecurrent = energy_force(0);
+  calc_gradient();
+  neval++;
+  der_e_cur = 0.0;
+  for (int i = 0; i < 3 * nlocal; i++) {
+    der_e_cur += g_cur[i] * p_s[i];
+  }
+  MPI_Allreduce(&der_e_cur,&der_e_cur_tmp, 1, MPI_DOUBLE, MPI_SUM, world);
+  der_e_cur = der_e_cur_tmp;
+  if (update->multireplica == 1) {
+    MPI_Allreduce(&der_e_cur_tmp,&der_e_cur,1,MPI_DOUBLE,MPI_SUM,universe->uworld);
+  }
+
+  energy_and_der[0] = ecurrent;
+  energy_and_der[1] = der_e_cur;
+}
+
+/* ----------------------------------------------------------------------
+  Calculate step length which satisfies approximate Wolfe conditions
+  using the cubic interpolation
+------------------------------------------------------------------------- */
+
+int MinSpinLBFGS::calc_and_make_step(double a, double b, int index)
+{
+  double e_and_d[2] = {0.0,0.0};
+  double alpha,c1,c2,c3;
+  double **sp = atom->sp;
+  int nlocal = atom->nlocal;
+
+  make_step(b,e_and_d);
+  ecurrent = e_and_d[0];
+  der_e_cur = e_and_d[1];
+  index++;
+
+  if (adescent(eprevious,e_and_d[0]) || index == 5){
+    MPI_Bcast(&b,1,MPI_DOUBLE,0,world);
+    for (int i = 0; i < 3 * nlocal; i++) {
+      p_s[i] = b * p_s[i];
+    }
+    return 1;
+  }
+  else{
+    double r,f0,f1,df0,df1;
+    r = b - a;
+    f0 = eprevious;
+    f1 = ecurrent;
+    df0 = der_e_pr;
+    df1 = der_e_cur;
+
+    c1 = -2.0*(f1-f0)/(r*r*r)+(df1+df0)/(r*r);
+    c2 = 3.0*(f1-f0)/(r*r)-(df1+2.0*df0)/(r);
+    c3 = df0;
+
+    // f(x) = c1 x^3 + c2 x^2 + c3 x^1 + c4
+    // has minimum at alpha below. We do not check boundaries.
+
+    alpha = (-c2 + sqrt(c2*c2 - 3.0*c1*c3))/(3.0*c1);
+    MPI_Bcast(&alpha,1,MPI_DOUBLE,0,world);
+
+    if (alpha < 0.0) alpha = r/2.0;
+
+    for (int i = 0; i < nlocal; i++) {
+      for (int j = 0; j < 3; j++) sp[i][j] = sp_copy[i][j];
+    }
+    calc_and_make_step(0.0, alpha, index);
+   }
+
+  return 0;
+}
+
+/* ----------------------------------------------------------------------
+  Approximate descent
+------------------------------------------------------------------------- */
+
+int MinSpinLBFGS::adescent(double phi_0, double phi_j){
+
+  double eps = 1.0e-6;
+
+  if (phi_j<=phi_0+eps*fabs(phi_0))
+    return 1;
+  else
+    return 0;
+}
+
+double MinSpinLBFGS::maximum_rotation(double *p)
+{
+  double norm2,norm2_global,scaling,alpha;
+  int nlocal = atom->nlocal;
+  int ntotal = 0;
+
+  norm2 = 0.0;
+  for (int i = 0; i < 3 * nlocal; i++) norm2 += p[i] * p[i];
+
+  MPI_Allreduce(&norm2,&norm2_global,1,MPI_DOUBLE,MPI_SUM,world);
+  if (nreplica > 1) {
+    norm2 = norm2_global;
+    MPI_Allreduce(&norm2,&norm2_global,1,MPI_DOUBLE,MPI_SUM,universe->uworld);
+  }
+  MPI_Allreduce(&nlocal,&ntotal,1,MPI_INT,MPI_SUM,world);
+  if (nreplica > 1) {
+    nlocal = ntotal;
+    MPI_Allreduce(&nlocal,&ntotal,1,MPI_INT,MPI_SUM,universe->uworld);
+  }
+
+  scaling = (maxepsrot * sqrt((double) ntotal / norm2_global));
+
+  if (scaling < 1.0) alpha = scaling;
+  else alpha = 1.0;
+
+  return alpha;
+}
--- a/src/SPIN/min_spin_lbfgs.h
+++ b/src/SPIN/min_spin_lbfgs.h
@ -0,0 +1,72 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef MINIMIZE_CLASS
+
+MinimizeStyle(spin/lbfgs, MinSpinLBFGS)
+
+#else
+
+#ifndef LMP_MIN_SPIN_LBFGS_H
+#define LMP_MIN_SPIN_LBFGS_H
+
+#include "min.h"
+
+namespace LAMMPS_NS {
+
+class MinSpinLBFGS: public Min {
+ public:
+  MinSpinLBFGS(class LAMMPS *);
+  virtual ~MinSpinLBFGS();
+  void init();
+  void setup_style();
+  int modify_param(int, char **);
+  void reset_vectors();
+  int iterate(int);
+
+ private:
+  int local_iter;     		// for neb
+  int use_line_search; 		// use line search or not.
+  int nlocal_max;		// max value of nlocal (for size of lists)
+  int ireplica,nreplica; 	// for neb
+  double der_e_cur;		// current derivative along search dir.
+  double der_e_pr;    		// previous derivative along search dir.
+  double maxepsrot;
+  double *spvec;		// variables for atomic dof, as 1d vector
+  double *fmvec;		// variables for atomic dof, as 1d vector
+  double *g_old;  		// gradient vector at previous step
+  double *g_cur;  		// current gradient vector
+  double *p_s;  		// search direction vector
+
+  void advance_spins();
+  void calc_gradient();
+  void calc_search_direction();
+  void vm3(const double *, const double *, double *);
+  void rodrigues_rotation(const double *, double *);
+  void make_step(double, double *);
+  int calc_and_make_step(double, double, int);
+  int adescent(double, double);
+  double maximum_rotation(double *);
+
+  double *rho;		// estimation of curvature
+  double **ds;		// change in rotation matrix between two iterations, da
+  double **dy;		// change in gradients between two iterations, dg
+  double **sp_copy;	// copy of the spins
+  int num_mem;		// number of stored steps
+  bigint last_negative;
+};
+
+}
+
+#endif
+#endif
--- a/src/SPIN/neb_spin.cpp
+++ b/src/SPIN/neb_spin.cpp
@ -43,6 +43,8 @@
 #include "timer.h"
 #include "memory.h"
 #include "error.h"
+#include "math_const.h"
+#include "utils.h"

 using namespace LAMMPS_NS;

@ -186,8 +188,8 @@ void NEBSpin::run()

  if (update->minimize->searchflag)
    error->all(FLERR,"NEBSpin requires damped dynamics minimizer");
-  if (strcmp(update->minimize_style,"spin") != 0)
-    error->all(FLERR,"NEBSpin requires spin minimizer");
+  if (!utils::strmatch(update->minimize_style,"^spin"))
+    error->all(FLERR,"NEBSpin requires a spin minimizer");

  // setup regular NEBSpin minimization

@ -243,6 +245,8 @@ void NEBSpin::run()
  timer->init();
  timer->barrier_start();

+  // if(ireplica != 0 && ireplica != nreplica -1)
+
  while (update->minimize->niter < n1steps) {
    update->minimize->run(nevery);
    print_status();
@ -639,7 +643,7 @@ int NEBSpin::initial_rotation(double *spi, double *sploc, double fraction)
  kcrossy = kz*spix - kx*spiz;
  kcrossz = kx*spiy - ky*spix;

-  kdots = kx*spix + ky*spiz + kz*spiz;
+  kdots = kx*spix + ky*spiy + kz*spiz;

  omega = acos(sidotsf);
  omega *= fraction;
--- a/src/SPIN/pair_spin_dipole_cut.cpp
+++ b/src/SPIN/pair_spin_dipole_cut.cpp
@ -323,7 +323,7 @@ void PairSpinDipoleCut::compute(int eflag, int vflag)
 void PairSpinDipoleCut::compute_single_pair(int ii, double fmi[3])
 {
  int j,jnum,itype,jtype,ntypes; 
-  int *ilist,*jlist,*numneigh,**firstneigh;  
+  int *jlist,*numneigh,**firstneigh;  
  double rsq,rinv,r2inv,r3inv,local_cut2;
  double xi[3],rij[3],eij[3],spi[4],spj[4];

--- a/src/SPIN/pair_spin_dipole_long.cpp
+++ b/src/SPIN/pair_spin_dipole_long.cpp
@ -354,10 +354,9 @@ void PairSpinDipoleLong::compute(int eflag, int vflag)

 void PairSpinDipoleLong::compute_single_pair(int ii, double fmi[3])
 {
-  //int i,j,jj,jnum,itype,jtype;  
  int j,jj,jnum,itype,jtype,ntypes; 
  int k,locflag;
-  int *ilist,*jlist,*numneigh,**firstneigh;  
+  int *jlist,*numneigh,**firstneigh;  
  double r,rinv,r2inv,rsq,grij,expm2,t,erfc;
  double local_cut2,pre1,pre2,pre3;
  double bij[4],xi[3],rij[3],eij[3],spi[4],spj[4];
@ -367,7 +366,6 @@ void PairSpinDipoleLong::compute_single_pair(int ii, double fmi[3])
  double **sp = atom->sp;       
  double **fm_long = atom->fm_long;

-  ilist = list->ilist;
  numneigh = list->numneigh;
  firstneigh = list->firstneigh;
  
@ -405,7 +403,6 @@ void PairSpinDipoleLong::compute_single_pair(int ii, double fmi[3])
    // computation of the exchange interaction
    // loop over neighbors of atom i
      
-    //i = ilist[ii];
    xi[0] = x[ii][0];
    xi[1] = x[ii][1];
    xi[2] = x[ii][2];
--- a/src/SPIN/pair_spin_dmi.cpp
+++ b/src/SPIN/pair_spin_dmi.cpp
@ -313,7 +313,7 @@ void PairSpinDmi::compute(int eflag, int vflag)

      if (eflag) {
        evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]);
-        evdwl *= hbar;
+        evdwl *= 0.5*hbar;
      } else evdwl = 0.0;

      if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
@ -427,9 +427,9 @@ void PairSpinDmi::compute_dmi(int i, int j, double eij[3], double fmi[3], double
  dmiy = eij[2]*v_dmx[itype][jtype] - eij[0]*v_dmz[itype][jtype];
  dmiz = eij[0]*v_dmy[itype][jtype] - eij[1]*v_dmx[itype][jtype];

-  fmi[0] -= (dmiy*spj[2] - dmiz*spj[1]);
-  fmi[1] -= (dmiz*spj[0] - dmix*spj[2]);
-  fmi[2] -= (dmix*spj[1] - dmiy*spj[0]);
+  fmi[0] -= 2.0*(dmiy*spj[2] - dmiz*spj[1]);
+  fmi[1] -= 2.0*(dmiz*spj[0] - dmix*spj[2]);
+  fmi[2] -= 2.0*(dmix*spj[1] - dmiy*spj[0]);
 }

 /* ----------------------------------------------------------------------
--- a/src/SPIN/pair_spin_exchange.cpp
+++ b/src/SPIN/pair_spin_exchange.cpp
@ -296,7 +296,7 @@ void PairSpinExchange::compute(int eflag, int vflag)

      if (eflag) {
        evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]);
-        evdwl *= hbar;
+        evdwl *= 0.5*hbar;
      } else evdwl = 0.0;

      if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
@ -405,9 +405,9 @@ void PairSpinExchange::compute_exchange(int i, int j, double rsq, double fmi[3],
  Jex *= (1.0-J2[itype][jtype]*ra);
  Jex *= exp(-ra);

-  fmi[0] += Jex*spj[0];
-  fmi[1] += Jex*spj[1];
-  fmi[2] += Jex*spj[2];
+  fmi[0] += 2.0*Jex*spj[0];
+  fmi[1] += 2.0*Jex*spj[1];
+  fmi[2] += 2.0*Jex*spj[2];
 }

 /* ----------------------------------------------------------------------
--- a/src/min.cpp
+++ b/src/min.cpp
@ -42,10 +42,12 @@
 #include "output.h"
 #include "thermo.h"
 #include "timer.h"
+#include "math_const.h"
 #include "memory.h"
 #include "error.h"

 using namespace LAMMPS_NS;
+using namespace MathConst;

 /* ---------------------------------------------------------------------- */

@ -54,6 +56,7 @@ Min::Min(LAMMPS *lmp) : Pointers(lmp)
  dmax = 0.1;
  searchflag = 0;
  linestyle = 1;
+  normstyle = TWO;

  elist_global = elist_atom = NULL;
  vlist_global = vlist_atom = NULL;
@ -659,6 +662,15 @@ void Min::modify_params(int narg, char **arg)
      if (strcmp(arg[iarg+1],"backtrack") == 0) linestyle = 0;
      else if (strcmp(arg[iarg+1],"quadratic") == 0) linestyle = 1;
      else if (strcmp(arg[iarg+1],"forcezero") == 0) linestyle = 2;
+      else if (strcmp(arg[iarg+1],"spin_cubic") == 0) linestyle = 3;
+      else if (strcmp(arg[iarg+1],"spin_none") == 0) linestyle = 4;
+      else error->all(FLERR,"Illegal min_modify command");
+      iarg += 2;
+    } else if (strcmp(arg[iarg],"norm") == 0) {
+      if (iarg+2 > narg) error->all(FLERR,"Illegal min_modify command");
+      if (strcmp(arg[iarg+1],"two") == 0) normstyle = TWO;
+      else if (strcmp(arg[iarg+1],"max") == 0) normstyle = MAX;
+      else if (strcmp(arg[iarg+1],"inf") == 0) normstyle = INF;
      else error->all(FLERR,"Illegal min_modify command");
      iarg += 2;
    } else {
@ -822,6 +834,137 @@ double Min::fnorm_inf()
  return norm_inf;
 }

+/* ----------------------------------------------------------------------
+   compute and return ||force||_max (inf norm per-vector)
+------------------------------------------------------------------------- */
+
+double Min::fnorm_max()
+{
+  int i,n;
+  double fdotf,*fatom;
+
+  double local_norm_max = 0.0;
+  for (i = 0; i < nvec; i+=3) {
+    fdotf = fvec[i]*fvec[i]+fvec[i+1]*fvec[i+1]+fvec[i+2]*fvec[i+2];
+    local_norm_max = MAX(fdotf,local_norm_max);
+  }
+  if (nextra_atom) {
+    for (int m = 0; m < nextra_atom; m++) {
+      fatom = fextra_atom[m];
+      n = extra_nlen[m];
+      for (i = 0; i < n; i+=3)
+        fdotf = fvec[i]*fvec[i]+fvec[i+1]*fvec[i+1]+fvec[i+2]*fvec[i+2];
+        local_norm_max = MAX(fdotf,local_norm_max);
+    }
+  }
+
+  double norm_max = 0.0;
+  MPI_Allreduce(&local_norm_max,&norm_max,1,MPI_DOUBLE,MPI_MAX,world);
+
+  if (nextra_global)
+    for (i = 0; i < n; i+=3)
+      fdotf = fvec[i]*fvec[i]+fvec[i+1]*fvec[i+1]+fvec[i+2]*fvec[i+2];
+      norm_max = MAX(fdotf,norm_max);
+
+  return norm_max;
+}
+
+/* ----------------------------------------------------------------------
+   compute and return  sum_i||mag. torque_i||_2 (in eV)
+------------------------------------------------------------------------- */
+
+double Min::total_torque()
+{
+  double fmsq,ftotsqone,ftotsqall;
+  int nlocal = atom->nlocal;
+  double hbar = force->hplanck/MY_2PI;
+  double tx,ty,tz;
+  double **sp = atom->sp;
+  double **fm = atom->fm;
+
+  fmsq = ftotsqone = ftotsqall = 0.0;
+  for (int i = 0; i < nlocal; i++) {
+    tx = fm[i][1]*sp[i][2] - fm[i][2]*sp[i][1];
+    ty = fm[i][2]*sp[i][0] - fm[i][0]*sp[i][2];
+    tz = fm[i][0]*sp[i][1] - fm[i][1]*sp[i][0];
+    fmsq = tx*tx + ty*ty + tz*tz;
+    ftotsqone += fmsq;
+  }
+
+  // summing all fmsqtot on this replica
+
+  MPI_Allreduce(&ftotsqone,&ftotsqall,1,MPI_DOUBLE,MPI_SUM,world);
+
+  // multiply it by hbar so that units are in eV
+  
+  return sqrt(ftotsqall) * hbar;
+}
+
+/* ----------------------------------------------------------------------
+   compute and return max_i ||mag. torque components|| (in eV)
+------------------------------------------------------------------------- */
+
+double Min::inf_torque()
+{
+  double fmsq,fmaxsqone,fmaxsqall;
+  int nlocal = atom->nlocal;
+  double hbar = force->hplanck/MY_2PI;
+  double tx,ty,tz;
+  double **sp = atom->sp;
+  double **fm = atom->fm;
+
+  fmsq = fmaxsqone = fmaxsqall = 0.0;
+  for (int i = 0; i < nlocal; i++) {
+    tx = fm[i][1]*sp[i][2] - fm[i][2]*sp[i][1];
+    ty = fm[i][2]*sp[i][0] - fm[i][0]*sp[i][2];
+    tz = fm[i][0]*sp[i][1] - fm[i][1]*sp[i][0];
+    fmaxsqone = MAX(fmaxsqone,tx*tx);
+    fmaxsqone = MAX(fmaxsqone,ty*ty);
+    fmaxsqone = MAX(fmaxsqone,tz*tz);
+  }
+
+  // finding max fm on this replica
+
+  fmaxsqall = fmaxsqone;
+  MPI_Allreduce(&fmaxsqone,&fmaxsqall,1,MPI_DOUBLE,MPI_MAX,world);
+
+  // multiply it by hbar so that units are in eV
+
+  return sqrt(fmaxsqall) * hbar;
+}
+
+/* ----------------------------------------------------------------------
+   compute and return max_i ||mag. torque_i|| (in eV)
+------------------------------------------------------------------------- */
+
+double Min::max_torque()
+{
+  double fmsq,fmaxsqone,fmaxsqall;
+  int nlocal = atom->nlocal;
+  double hbar = force->hplanck/MY_2PI;
+  double tx,ty,tz;
+  double **sp = atom->sp;
+  double **fm = atom->fm;
+
+  fmsq = fmaxsqone = fmaxsqall = 0.0;
+  for (int i = 0; i < nlocal; i++) {
+    tx = fm[i][1]*sp[i][2] - fm[i][2]*sp[i][1];
+    ty = fm[i][2]*sp[i][0] - fm[i][0]*sp[i][2];
+    tz = fm[i][0]*sp[i][1] - fm[i][1]*sp[i][0];
+    fmsq = tx*tx + ty*ty + tz*tz;
+    fmaxsqone = MAX(fmaxsqone,fmsq);
+  }
+
+  // finding max fm on this replica
+
+  fmaxsqall = fmaxsqone;
+  MPI_Allreduce(&fmaxsqone,&fmaxsqall,1,MPI_DOUBLE,MPI_MAX,world);
+
+  // multiply it by hbar so that units are in eV
+
+  return sqrt(fmaxsqall) * hbar;
+}
+
 /* ----------------------------------------------------------------------
   possible stop conditions
 ------------------------------------------------------------------------- */
--- a/src/min.h
+++ b/src/min.h
@ -39,8 +39,16 @@ class Min : protected Pointers {
  virtual bigint memory_usage() {return 0;}
  void modify_params(int, char **);
  virtual int modify_param(int, char **) {return 0;}
-  virtual double fnorm_sqr();
-  virtual double fnorm_inf();
+  double fnorm_sqr();
+  double fnorm_inf();
+  double fnorm_max();
+
+  enum{TWO,MAX,INF};
+
+  // methods for spin minimizers
+  double total_torque();
+  double inf_torque(); 
+  double max_torque(); 

  virtual void init_style() {}
  virtual void setup_style() = 0;
@ -56,8 +64,11 @@ class Min : protected Pointers {
  int virial_style;           // compute virial explicitly or implicitly
  int external_force_clear;   // clear forces locally or externally

-  double dmax;                // max dist to move any atom in one step
-  int linestyle;              // 0 = backtrack, 1 = quadratic, 2 = forcezero
+  double dmax;			// max dist to move any atom in one step
+  int linestyle;		// 0 = backtrack, 1 = quadratic, 2 = forcezero 
+  				// 3 = spin_cubic, 4 = spin_none
+
+  int normstyle;		// TWO, MAX or INF flag for force norm evaluation

  int nelist_global,nelist_atom;    // # of PE,virial computes to check
  int nvlist_global,nvlist_atom;
@ -104,9 +115,6 @@ class Min : protected Pointers {
  virtual double energy_force(int);
  virtual void force_clear();

-  double compute_force_norm_sqr();
-  double compute_force_norm_inf();
-
  void ev_setup();
  void ev_set(bigint);

--- a/src/min_cg.cpp
+++ b/src/min_cg.cpp
@ -14,6 +14,7 @@
 #include "min_cg.h"
 #include <mpi.h>
 #include <cmath>
+#include "error.h"
 #include "update.h"
 #include "output.h"
 #include "timer.h"
@ -35,7 +36,7 @@ MinCG::MinCG(LAMMPS *lmp) : MinLineSearch(lmp) {}
 int MinCG::iterate(int maxiter)
 {
  int i,m,n,fail,ntimestep;
-  double beta,gg,dot[2],dotall[2];
+  double beta,gg,dot[2],dotall[2],fmax;
  double *fatom,*gatom,*hatom;

  // nlimit = max # of CG iterations before restarting
@ -90,6 +91,7 @@ int MinCG::iterate(int maxiter)
      dot[0] += fvec[i]*fvec[i];
      dot[1] += fvec[i]*g[i];
    }
+    
    if (nextra_atom)
      for (m = 0; m < nextra_atom; m++) {
        fatom = fextra_atom[m];
@ -98,6 +100,7 @@ int MinCG::iterate(int maxiter)
        for (i = 0; i < n; i++) {
          dot[0] += fatom[i]*fatom[i];
          dot[1] += fatom[i]*gatom[i];
+	  fmax = MAX(fmax,fatom[i]*fatom[i]);
        }
      }
    MPI_Allreduce(dot,dotall,2,MPI_DOUBLE,MPI_SUM,world);
@ -107,7 +110,16 @@ int MinCG::iterate(int maxiter)
        dotall[1] += fextra[i]*gextra[i];
      }

-    if (dotall[0] < update->ftol*update->ftol) return FTOL;
+    fmax = 0.0;
+    if (normstyle == MAX) {		// max force norm
+      fmax = fnorm_max();
+      if (fmax < update->ftol*update->ftol) return FTOL;
+    } else if (normstyle == INF) {	// infinite force norm
+      fmax = fnorm_inf();
+      if (fmax < update->ftol*update->ftol) return FTOL;
+    } else if (normstyle == TWO) {	// Euclidean force 2-norm
+      if (dotall[0] < update->ftol*update->ftol) return FTOL;
+    } else error->all(FLERR,"Illegal min_modify command"); 

    // update new search direction h from new f = -Grad(x) and old g
    // this is Polak-Ribieri formulation
--- a/src/min_fire.cpp
+++ b/src/min_fire.cpp
@ -16,6 +16,7 @@
 #include <cmath>
 #include "universe.h"
 #include "atom.h"
+#include "error.h"
 #include "force.h"
 #include "update.h"
 #include "output.h"
@ -80,7 +81,7 @@ void MinFire::reset_vectors()
 int MinFire::iterate(int maxiter)
 {
  bigint ntimestep;
-  double vmax,vdotf,vdotfall,vdotv,vdotvall,fdotf,fdotfall;
+  double vmax,vdotf,vdotfall,vdotv,vdotvall,fdotf,fdotfloc,fdotfall;
  double scale1,scale2;
  double dtvone,dtv,dtf,dtfm;
  int flag,flagall;
@ -250,7 +251,10 @@ int MinFire::iterate(int maxiter)
    // sync across replicas if running multi-replica minimization

    if (update->ftol > 0.0) {
-      fdotf = fnorm_sqr();
+      if (normstyle == MAX) fdotf = fnorm_max();	// max force norm
+      else if (normstyle == INF) fdotf = fnorm_inf();	// inf force norm
+      else if (normstyle == TWO) fdotf = fnorm_sqr();	// Euclidean force 2-norm
+      else error->all(FLERR,"Illegal min_modify command");
      if (update->multireplica == 0) {
        if (fdotf < update->ftol*update->ftol) return FTOL;
      } else {
--- a/src/min_hftn.cpp
+++ b/src/min_hftn.cpp
@ -22,6 +22,7 @@
 #include <cmath>
 #include <cstring>
 #include "atom.h"
+#include "error.h"
 #include "fix_minimize.h"
 #include "modify.h"
 #include "output.h"
@ -112,6 +113,9 @@ void MinHFTN::init()
 {
  Min::init();

+  if (normstyle == MAX)
+    error->all(FLERR,"Incorrect min_modify option");
+
  for (int  i = 1; i < NUM_HFTN_ATOM_BASED_VECTORS; i++) {
    if (_daExtraGlobal[i] != NULL)
      delete [] _daExtraGlobal[i];
--- a/src/min_quickmin.cpp
+++ b/src/min_quickmin.cpp
@ -16,6 +16,7 @@
 #include <cmath>
 #include "universe.h"
 #include "atom.h"
+#include "error.h"
 #include "force.h"
 #include "update.h"
 #include "output.h"
@ -75,7 +76,7 @@ void MinQuickMin::reset_vectors()
 int MinQuickMin::iterate(int maxiter)
 {
  bigint ntimestep;
-  double vmax,vdotf,vdotfall,fdotf,fdotfall,scale;
+  double vmax,vdotf,vdotfall,fdotf,fdotfloc,fdotfall,scale;
  double dtvone,dtv,dtf,dtfm;
  int flag,flagall;

@ -215,7 +216,10 @@ int MinQuickMin::iterate(int maxiter)
    // sync across replicas if running multi-replica minimization

    if (update->ftol > 0.0) {
-      fdotf = fnorm_sqr();
+      if (normstyle == MAX) fdotfloc = fnorm_max();		// max force norm
+      else if (normstyle == INF) fdotfloc = fnorm_inf();	// inf force norm
+      else if (normstyle == TWO) fdotfloc = fnorm_sqr();	// Euclidean force 2-norm
+      else error->all(FLERR,"Illegal min_modify command");
      if (update->multireplica == 0) {
        if (fdotf < update->ftol*update->ftol) return FTOL;
      } else {
--- a/src/min_sd.cpp
+++ b/src/min_sd.cpp
@ -13,6 +13,7 @@

 #include "min_sd.h"
 #include <cmath>
+#include "error.h"
 #include "update.h"
 #include "output.h"
 #include "timer.h"
@ -34,7 +35,7 @@ MinSD::MinSD(LAMMPS *lmp) : MinLineSearch(lmp) {}
 int MinSD::iterate(int maxiter)
 {
  int i,m,n,fail,ntimestep;
-  double fdotf;
+  double fdotf,fdotfloc;
  double *fatom,*hatom;

  // initialize working vectors
@ -77,7 +78,10 @@ int MinSD::iterate(int maxiter)

    // force tolerance criterion

-    fdotf = fnorm_sqr();
+    if (normstyle == MAX) fdotf = fnorm_max();		// max force norm
+    else if (normstyle == INF) fdotf = fnorm_inf();	// infinite force norm
+    else if (normstyle == TWO) fdotf = fnorm_sqr();	// Euclidean force 2-norm
+    else error->all(FLERR,"Illegal min_modify command");
    if (fdotf < update->ftol*update->ftol) return FTOL;

    // set new search direction h to f = -Grad(x)