lammps/src/math_eigen_impl.h

// clang-format off
/* -*- c++ -*- ----------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov

   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.  (Some of the code in this file is also
   available using a more premissive license.  See below for details.)

   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
   Contributing authors: Yuya Kurebayashi (Tohoku University, Lanczos algorithm)
                         Andrew Jewett (Scripps Research, Jacobi algorithm)
------------------------------------------------------------------------- */

#ifndef LMP_MATH_EIGEN_IMPL_H
#define LMP_MATH_EIGEN_IMPL_H

//        This file contains a library of functions and classes which can
//        efficiently perform eigendecomposition for an extremely broad
//        range of matrix types: both real and complex, dense and sparse.
//        Matrices need not be of type "double **", for example.
//        In principle, almost any type of C++ container can be used.
//        Some general C++11 compatible functions for allocating matrices and
//        calculating norms of real and complex vectors are also provided.
// note
//        The "Jacobi" and "PEigenDense" classes are used for calculating
//        eigenvalues and eigenvectors of conventional dense square matrices.
// note
//        The "LambdaLanczos" class can calculate eigenalues and eigenvectors
//        of more general types of matrices, especially large, sparse matrices.
//        It uses C++ lambda expressions to simplify and generalize the way
//        matrices can be represented.  This allows it to be applied to
//        nearly any kind of sparse (or dense) matrix representation.
// note
//        The source code for Jacobi and LambdaLanczos is also available at:
//        https://github.com/jewettaij/jacobi_pd   (CC0-1.0 license)
//        https://github.com/mrcdr/lambda-lanczos  (MIT license)

//#include <cassert>
#include <numeric>
#include <complex>
#include <limits>
#include <cmath>
#include <vector>
#include <random>
#include <functional>

namespace MathEigen {

  // --- Memory allocation for matrices ---

  // Allocate an arbitrary 2-dimensional array.  (Uses row-major order.)
  // note  This function was intended for relatively small matrices (eg 4x4).
  //       For large arrays, please use the 2d create() function from "memory.h"
  template<typename Entry>
  void Alloc2D(size_t nrows,          // size of the array (number of rows)
               size_t ncols,          // size of the array (number of columns)
               Entry ***paaX);        // pointer to a 2D C-style array

  // Deallocate arrays that were created using Alloc2D().
  template<typename Entry>
  void Dealloc2D(Entry ***paaX);     // pointer to 2D multidimensional array

  // --- Complex numbers ---

  /// @brief
  /// "realTypeMap" struct is used to the define "real_t<T>" type mapper.
  /// The "real_t" type mapper is used by the "LambdaLanczos" and "PEigenDense"
  /// classes, so it is documented here to help users understand those classes.
  /// "real_t<T>" returns the C++ type corresponding to the real component of T.
  ///
  /// @details
  /// For example, suppose you have a matrix of type std::complex<double>**.
  /// The eigenvalues calculated by "LambdaLanczos" and "PEigenDense" should be
  /// of type "double" (which is the same as "real_T<std::complex<double>>"),
  /// not "std::complex<double>".  (This is because the algorithm assumes the
  /// matrix is Hermitian, and the eigenvalues of a Hermitian matrix are always
  /// real.  So if you attempt to pass a reference to a complex number as the
  /// first argument to LambdaLanczos::run(), the compiler will complain.)
  ///
  /// Implementation details: "real_t<T>" is defined using C++ template
  /// specializations.

  template <typename T>
  struct realTypeMap {
    typedef T type;
  };
  template <typename T>
  struct realTypeMap<std::complex<T>> {
    typedef T type;
  };
  template <typename T>
  using real_t = typename realTypeMap<T>::type;


  // --- Operations on vectors (of real and complex numbers) ---

  //  Calculate the inner product of two vectors.
  //  (For vectors of complex numbers, std::conj() is used.)
  template <typename T>
  T inner_prod(const std::vector<T>& v1, const std::vector<T>& v2);

  //  Compute the sum of the absolute values of the entries in v
  //  returns a real number (of type real_t<T>).
  template <typename T>
  real_t<T> l1_norm(const std::vector<T>& v);

  //  Calculate the l2_norm (Euclidian length) of vector v.
  //  returns a real number (of type real_t<T>).
  template <typename T>
  real_t<T> l2_norm(const std::vector<T>& v);

  ///  Multiply a vector (v) by a scalar (c).
  template <typename T1, typename T2>
  void scalar_mul(T1 c, std::vector<T2>& v);

  ///  Divide vector "v" in-place by it's length (l2_norm(v)).
  template <typename T>
  void normalize(std::vector<T>& v);

  // ---- Eigendecomposition of small dense symmetric matrices ----

  /// @class Jacobi
  /// @brief Calculate the eigenvalues and eigenvectors of a symmetric matrix
  ///        using the Jacobi eigenvalue algorithm.  This code (along with tests
  ///        and benchmarks) is available free of license restrictions at:
  ///        https://github.com/jewettaij/jacobi_pd
  /// @note  The "Vector", "Matrix", and "ConstMatrix" type arguments can be any
  ///     C or C++ object that supports indexing, including pointers or vectors.

  template<typename Scalar,
           typename Vector,
           typename Matrix,
           typename ConstMatrix=Matrix>

  class Jacobi
  {
    int n;            //!< the size of the matrices you want to diagonalize
    Scalar **M;       //!< local copy of the current matrix being analyzed
    // Precomputed cosine, sine, and tangent of the most recent rotation angle:
    Scalar c;         //!< = cos(θ)
    Scalar s;         //!< = sin(θ)
    Scalar t;         //!< = tan(θ),  (note |t|<=1)
    int *max_idx_row; //!< = keep track of the the maximum element in row i (>i)

  public:
    /// @brief  Specify the size of the matrices you want to diagonalize later.
    /// @param  n  the size (ie. number of rows) of the (square) matrix.
    Jacobi(int n);

    ~Jacobi();

    /// @brief  Change the size of the matrices you want to diagonalize.
    /// @param  n  the size (ie. number of rows) of the (square) matrix.
    void SetSize(int n);

    // @typedef choose the criteria for sorting eigenvalues and eigenvectors
    typedef enum eSortCriteria {
      DO_NOT_SORT,
      SORT_DECREASING_EVALS,
      SORT_INCREASING_EVALS,
      SORT_DECREASING_ABS_EVALS,
      SORT_INCREASING_ABS_EVALS
    } SortCriteria;

    /// @brief Calculate the eigenvalues and eigenvectors of a symmetric matrix
    ///        using the Jacobi eigenvalue algorithm.
    /// @returns 0 if the algorithm converged,
    ///          1 if the algorithm failed to converge. (IE, if the number of
    ///            pivot iterations exceeded max_num_sweeps * iters_per_sweep,
    ///            where iters_per_sweep = (n*(n-1)/2))
    /// @note  To reduce the computation time further, set calc_evecs=false.
    int
    Diagonalize(ConstMatrix mat, //!< the matrix you wish to diagonalize (size n)
                Vector eval,     //!< store the eigenvalues here
                Matrix evec,     //!< store the eigenvectors here (in rows)
                SortCriteria sort_criteria=SORT_DECREASING_EVALS,//!<sort results?
                bool calc_evecs=true,    //!< calculate the eigenvectors?
                int max_num_sweeps=50    //!< limit the number of iterations
                );

    //        An alternative constructor is provided, if, for some reason,
    //        you want to avoid allocating memory on the heap during
    //        initialization.  In that case, you can allocate space for the "M"
    //        and "max_idx_row" arrays on the stack in advance, and pass them
    //        to the constructor.  (The vast majority of users probably
    //        do not need this feature.  Unless you do, I encourage you
    //        to use the regular constructor instead.)
    //        n  the size (ie. number of rows) of the (square) matrix.
    //        M            optional preallocated n x n array
    //        max_idx_row  optional preallocated array of size n
    //  note  If either the "M" or "max_idx_row" arguments are specified,
    //        they both must be specified.
    Jacobi(int n, Scalar **M, int *max_idx_row);

  private:
    bool is_preallocated;

    // (Descriptions of private functions can be found in their implementation.)
    void _Jacobi(int n, Scalar **M, int *max_idx_row);
    void CalcRot(Scalar const *const *M, int i, int j);
    void ApplyRot(Scalar **M, int i, int j);
    void ApplyRotLeft(Matrix E, int i, int j);
    int MaxEntryRow(Scalar const *const *M, int i) const;
    void MaxEntry(Scalar const *const *M, int& i_max, int& j_max) const;
    void SortRows(Vector v, Matrix M, int n, SortCriteria s=SORT_DECREASING_EVALS) const;
    void Init();
    void Alloc(int n);
    void Dealloc();
  public:
    // C++ boilerplate: copy and move constructor, swap, and assignment operator
    Jacobi(const Jacobi<Scalar, Vector, Matrix, ConstMatrix>& source);
    Jacobi(Jacobi<Scalar, Vector, Matrix, ConstMatrix>&& other);
    void swap(Jacobi<Scalar, Vector, Matrix, ConstMatrix> &other);
    Jacobi<Scalar, Vector, Matrix, ConstMatrix>& operator = (Jacobi<Scalar, Vector, Matrix, ConstMatrix> source);

  }; // class Jacobi

  // ---- Eigendecomposition of large sparse (or dense) matrices ----

  // The "LambdaLanczos" is a class useful for calculating eigenvalues
  // and eigenvectors of large sparse matrices.  Unfortunately, before the
  // LambdaLanczos class can be declared, several additional expressions,
  // classes and functions that it depends on must be declared first.

  // Create random vectors used at the beginning of the Lanczos algorithm.
  // note "Partially specialization of function" is not allowed, so
  //       it is mimicked by wrapping the "init" function with a class template.
  template <typename T>
  struct VectorRandomInitializer {
  public:
    static void init(std::vector<T>&);
  };

  template <typename T>
  struct VectorRandomInitializer<std::complex<T>> {
  public:
    static void init(std::vector<std::complex<T>>&);
  };

  //  Return the number of significant decimal digits of type T.
  template <typename T>
  inline constexpr int sig_decimal_digit() {
    return (int)(std::numeric_limits<T>::digits *
                 std::log10(std::numeric_limits<T>::radix));
  }

  //  Return 10^-n where n=number of significant decimal digits of type T.
  template <typename T>
  inline constexpr T minimum_effective_decimal() {
    return std::pow(10, -sig_decimal_digit<T>());
  }


  /// @class LambdaLanczos
  /// @brief The LambdaLanczos class provides a general way to calculate
  ///        the smallest or largest eigenvalue and the corresponding eigenvector
  ///        of a Hermitian matrix using the Lanczos algorithm.
  ///        The characteristic feature of LambdaLanczos is that the matrix-vector
  ///        multiplication routine used in the Lanczos algorithm is adaptable.
  ///        This code (along with automatic unit tests) is also distributed
  ///        under the MIT license at https://github.com/mrcdr/lambda-lanczos
  ///
  /// @note
  ///   If the matrices you want to analyze are ordinary square matrices, (as in
  ///   the example) it might be easier to use "PEigenDense" instead.  (It is a
  ///   wrapper which takes care of all of the LambdaLanczos details for you.)
  ///
  /// @note
  ///  IMPORTANT:
  /// Unless the matrix you are solving is positive or negative definite, you
  /// MUST set the "eigenvalue_offset" parameter.  See the "pg_developer" docs.

  template <typename T>
  class LambdaLanczos {
  public:
    LambdaLanczos();
    LambdaLanczos(std::function<void(const std::vector<T>&, std::vector<T>&)> mv_mul, int matrix_size, bool find_maximum);
    LambdaLanczos(std::function<void(const std::vector<T>&, std::vector<T>&)> mv_mul, int matrix_size) : LambdaLanczos(mv_mul, matrix_size, true) {}

    /// @brief  Calculate the principal (largest or smallest) eigenvalue
    ///         of the matrix (and its corresponding eigenvector).
    int run(real_t<T>&, std::vector<T>&) const;

    // --- public data members ---

    /// @brief  Specify the size of the matrix you will analyze.
    ///         (This equals the size of the eigenvector which will be returned.)
    int matrix_size;

    /// @brief  Specify the function used for matrix*vector multiplication
    ///         used by the Lanczos algorithm.  For an ordinary dense matrix,
    ///         this function is the ordinary matrix*vector product. (See the
    ///         example above.  For a sparse matrix, it will be something else.)
    std::function<void(const std::vector<T>&, std::vector<T>&)> mv_mul;

    /// @brief  Are we searching for the maximum or minimum eigenvalue?
    /// @note   (Usually, you must also specify eigenvalue_offset.)
    bool find_maximum = false;

    /// @brief Shift all the eigenvalues by "eigenvalue_offset" during the Lanczos
    ///        iteration (ie. during LambdaLanczos::run()).  The goal is to insure
    ///        that the correct eigenvalue is selected (the one with the maximum
    ///        magnitude).
    /// @note  Unless your matrix is positive definite or negative definite, you
    ///        MUST specify eigenvalue_offset.  See comment above for details.
    /// @note  After LambdaLanczos::run() has finished, it will subtract this
    ///        number from the eigenvalue before it is returned to the caller.
    real_t<T> eigenvalue_offset = 0.0;

    /// @brief This function sets "eigenvalue_offset" automatically.
    /// @note  Using this function is not recommended because it is very slow.
    ///        For efficiency, set the "eigenvalue_offset" yourself.
    void ChooseOffset();

    // The remaining data members usually can be left alone:
    int max_iteration;
    real_t<T> eps = minimum_effective_decimal<real_t<T>>() * 1e3;
    real_t<T> tridiag_eps_ratio = 1e-1;
    int initial_vector_size = 200;
    std::function<void(std::vector<T>&)> init_vector =
      VectorRandomInitializer<T>::init;

    // (for those who prefer using "Set" functions...)
    int SetSize(int matrix_size);
    void SetMul(std::function<void(const std::vector<T>&,
                                   std::vector<T>&)> mv_mul);
    void SetInitVec(std::function<void(std::vector<T>&)> init_vector);
    void SetFindMax(bool find_maximum);
    void SetEvalOffset(T eigenvalue_offset);
    void SetEpsilon(T eps);
    void SetTriEpsRatio(T tridiag_eps_ratio);

  private:
    static void schmidt_orth(std::vector<T>&, const std::vector<std::vector<T>>&);
    real_t<T> find_minimum_eigenvalue(const std::vector<real_t<T>>&,
                                      const std::vector<real_t<T>>&) const;
    real_t<T> find_maximum_eigenvalue(const std::vector<real_t<T>>&,
                                      const std::vector<real_t<T>>&) const;
    static real_t<T> tridiagonal_eigen_limit(const std::vector<real_t<T>>&,
                                             const std::vector<real_t<T>>&);
    static int num_of_eigs_smaller_than(real_t<T>,
                                        const std::vector<real_t<T>>&,
                                        const std::vector<real_t<T>>&);
    real_t<T> UpperBoundEvals() const;
  };


  /// @class PEigenDense
  /// @brief
  ///   PEigenDense is a class containing only one useful member function:
  ///   PrincipalEigen().  This function calculates the principal (largest
  ///   or smallest) eigenvalue and corresponding eigenvector of a square
  ///   n x n matrix.  This can be faster than diagonalizing the entire matrix.
  /// @note
  ///   This code is a wrapper. Internally, it uses the "LambdaLanczos" class.
  /// @note
  ///   For dense matrices smaller than 13x13, Jacobi::Diagonalize(),
  ///   is usually faster than PEigenDense::PrincipleEigen().

  template<typename Scalar, typename Vector, typename ConstMatrix>
  class PEigenDense
  {
    size_t n;                 // the size of the matrix
    std::vector<Scalar> evec; // preallocated vector

  public:
    PEigenDense(int matrix_size=0);

    /// @brief  Calculate the principal eigenvalue and eigenvector of a matrix.
    /// @return Return the principal eigenvalue of the matrix.
    ///         If you want the eigenvector, pass a non-null "evector" argument.
    real_t<Scalar>
    PrincipalEigen(ConstMatrix matrix,   //!< the input matrix
                   Vector evector,       //!< the eigenvector is stored here
                   bool find_max=false); //!< want the max or min eigenvalue?

    void SetSize(int matrix_size); //change matrix size after instantiation

  }; // class PEigenDense


// --------------------------------------
// ----------- IMPLEMENTATION -----------
// --------------------------------------

// --- Implementation: Memory allocation for matrices ---
template<typename Entry>
void Alloc2D(size_t nrows,          // size of the array (number of rows)
             size_t ncols,          // size of the array (number of columns)
             Entry ***paaX)         // pointer to a 2D C-style array
{
  //assert(paaX);
  *paaX = new Entry* [nrows];  //conventional 2D C array (pointer-to-pointer)
  (*paaX)[0] = new Entry [nrows * ncols];  // 1D C array (contiguous memor)
  for (size_t iy=0; iy<nrows; iy++)
    (*paaX)[iy] = (*paaX)[0] + iy*ncols;
  // The caller can access the contents using (*paaX)[i][j]
}

template<typename Entry>
void Dealloc2D(Entry ***paaX)         // pointer to a 2D C-style array
{
  if (paaX && *paaX) {
    delete [] (*paaX)[0];
    delete [] (*paaX);
    *paaX = nullptr;
  }
}

/// @brief ConjugateProduct::prod(a,b) is a function which returns a*b by
/// default. If the arguments are complex numbers, it returns conj(a)*b instead.
template <typename T>
struct ConjugateProduct {
public:
  static T prod(T a, T b) { return a*b; }
};

template <typename T>
struct ConjugateProduct<std::complex<T>> {
public:
  static std::complex<T> prod(std::complex<T> a, std::complex<T> b) {
    return std::conj(a)*b;
  }
};


// --- Implementation: Operations on vectors (of real and complex numbers) ---

template <typename T>
inline T inner_prod(const std::vector<T>& v1, const std::vector<T>& v2) {
  return std::inner_product(std::begin(v1), std::end(v1),
                            std::begin(v2), T(),
                            [](T a, T b) -> T { return a+b; },
                            ConjugateProduct<T>::prod);
  // T() means zero value of type T
  // This spec is required because std::inner_product calculates
  // v1*v2 not conj(v1)*v2
}

template <typename T>
inline real_t<T> l2_norm(const std::vector<T>& vec) {
  return std::sqrt(std::real(inner_prod(vec, vec)));
  // The norm of any complex vector <v|v> is real by definition.
}

template <typename T1, typename T2>
inline void scalar_mul(T1 a, std::vector<T2>& vec) {
  int n = vec.size();
  for (int i = 0;i < n;i++)
    vec[i] *= a;
}

template <typename T>
inline void normalize(std::vector<T>& vec) {
  scalar_mul(1.0/l2_norm(vec), vec);
}

template <typename T>
inline real_t<T> l1_norm(const std::vector<T>& vec) {
  real_t<T> norm = real_t<T>(); // Zero initialization
  for (const T& element : vec)
    norm += std::abs(element);
  return norm;
}


// --- Implementation: Eigendecomposition of small dense matrices ---


template<typename Scalar,typename Vector,typename Matrix,typename ConstMatrix>
Jacobi<Scalar, Vector, Matrix, ConstMatrix>::
Jacobi(int n) {
  _Jacobi(n, nullptr, nullptr);
}


template<typename Scalar,typename Vector,typename Matrix,typename ConstMatrix>
Jacobi<Scalar, Vector, Matrix, ConstMatrix>::
Jacobi(int n, Scalar **M, int *max_idx_row) {
  _Jacobi(n, M, max_idx_row);
}


// _Jacobi() is a function which is invoked by the two constructors and it
// does all of the work.  (Splitting the constructor into multiple functions
// was technically unnecessary, but it makes documenting the code easier.)

template<typename Scalar,typename Vector,typename Matrix,typename ConstMatrix>
void
Jacobi<Scalar, Vector, Matrix, ConstMatrix>::
_Jacobi(int n, Scalar **M, int *max_idx_row) {
  Init();
  if (M) { // if caller supplies their own "M" array, don't allocate M
    is_preallocated = true;
    this->n = n;
    this->M = M;
    this->max_idx_row = max_idx_row;
    //assert(this->max_idx_row);
  } else {
    is_preallocated = false;
    SetSize(n); // allocate the "M" and "max_int_row" arrays
  }
}


template<typename Scalar,typename Vector,typename Matrix,typename ConstMatrix>
Jacobi<Scalar, Vector, Matrix, ConstMatrix>::
~Jacobi() {
  if (! is_preallocated)
    Dealloc();
}


template<typename Scalar,typename Vector,typename Matrix,typename ConstMatrix>
int Jacobi<Scalar, Vector, Matrix, ConstMatrix>::
Diagonalize(ConstMatrix mat,    // the matrix you wish to diagonalize (size n)
            Vector eval,        // store the eigenvalues here
            Matrix evec,        // store the eigenvectors here (in rows)
            SortCriteria sort_criteria, // sort results?
            bool calc_evec,     // calculate the eigenvectors?
            int max_num_sweeps) // limit the number of iterations ("sweeps")
{
  // -- Initialization --
  for (int i = 0; i < n; i++)
    for (int j = i; j < n; j++)          //copy mat[][] into M[][]
      M[i][j] = mat[i][j];               //(M[][] is a local copy we can modify)

  if (calc_evec)
    for (int i = 0; i < n; i++)
      for (int j = 0; j < n; j++)
        evec[i][j] = (i==j) ? 1.0 : 0.0; //Set evec equal to the identity matrix

  for (int i = 0; i < n-1; i++)          //Initialize the "max_idx_row[]" array
    max_idx_row[i] = MaxEntryRow(M, i);  //(which is needed by MaxEntry())

  // -- Iteration --
  int n_iters;
  int max_num_iters = max_num_sweeps*n*(n-1)/2; //"sweep" = n*(n-1)/2 iters
  for (n_iters=0; n_iters < max_num_iters; n_iters++) {
    int i,j;
    MaxEntry(M, i, j); // Find the maximum entry in the matrix. Store in i,j

    // If M[i][j] is small compared to M[i][i] and M[j][j], set it to 0.
    if ((M[i][i] + M[i][j] == M[i][i]) && (M[j][j] + M[i][j] == M[j][j])) {
      M[i][j] = 0.0;
      max_idx_row[i] = MaxEntryRow(M,i); //must also update max_idx_row[i]
    }

    if (M[i][j] == 0.0)
      break;

    // Otherwise, apply a rotation to make M[i][j] = 0
    CalcRot(M, i, j);  // Calculate the parameters of the rotation matrix.
    ApplyRot(M, i, j); // Apply this rotation to the M matrix.
    if (calc_evec)     // Optional: If the caller wants the eigenvectors, then
      ApplyRotLeft(evec,i,j); // apply the rotation to the eigenvector matrix

  } //for (int n_iters=0; n_iters < max_num_iters; n_iters++)

  // -- Post-processing --
  for (int i = 0; i < n; i++)
    eval[i] = M[i][i];

  // Optional: Sort results by eigenvalue.
  SortRows(eval, evec, n, sort_criteria);

  return (n_iters == max_num_iters);
}


/// brief  Calculate the components of a rotation matrix which performs a
///        rotation in the i,j plane by an angle (θ) that (when multiplied on
///        both sides) will zero the ij'th element of M, so that afterwards
///        M[i][j] = 0.  The results will be stored in c, s, and t
///        (which store cos(θ), sin(θ), and tan(θ), respectively).

template<typename Scalar,typename Vector,typename Matrix,typename ConstMatrix>
void Jacobi<Scalar, Vector, Matrix, ConstMatrix>::
CalcRot(Scalar const *const *M,    //!< matrix
        int i,       //!< row index
        int j)       //!< column index
{
  t = 1.0; // = tan(θ)
  Scalar M_jj_ii = (M[j][j] - M[i][i]);
  if (M_jj_ii != 0.0) {
    // kappa = (M[j][j] - M[i][i]) / (2*M[i][j])
    Scalar kappa = M_jj_ii;
    t = 0.0;
    Scalar M_ij = M[i][j];
    if (M_ij != 0.0) {
      kappa /= (2.0*M_ij);
      // t satisfies: t^2 + 2*t*kappa - 1 = 0
      // (choose the root which has the smaller absolute value)
      t = 1.0 / (std::sqrt(1 + kappa*kappa) + std::abs(kappa));
      if (kappa < 0.0)
        t = -t;
    }
  }
  //assert(std::abs(t) <= 1.0);
  c = 1.0 / std::sqrt(1 + t*t);
  s = c*t;
}


/// brief   Perform a similarity transformation by multiplying matrix M on both
///         sides by a rotation matrix (and its transpose) to eliminate M[i][j].
/// details This rotation matrix performs a rotation in the i,j plane by
///         angle θ.  This function assumes that c=cos(θ). s=som(θ), t=tan(θ)
///         have been calculated previously (using the CalcRot() function).
///         It also assumes that i<j.  The max_idx_row[] array is also updated.
///         To save time, since the matrix is symmetric, the elements
///         below the diagonal (ie. M[u][v] where u>v) are not computed.
///
/// verbatim
///
///   M' = R^T * M * R
/// where R the rotation in the i,j plane and ^T denotes the transpose.
///                 i         j
///       _                             _
///      |  1                            |
///      |    .                          |
///      |      .                        |
///      |        1                      |
///      |          c   ...   s          |
///      |          .  .      .          |
/// R  = |          .    1    .          |
///      |          .      .  .          |
///      |          -s  ...   c          |
///      |                      1        |
///      |                        .      |
///      |                          .    |
///      |_                           1 _|
///
/// endverbatim
///
/// Let M' denote the matrix M after multiplication by R^T and R.
/// The components of M' are:
///   M'_uv =  Σ_w  Σ_z   R_wu * M_wz * R_zv
///
/// note
/// The rotation at location i,j will modify all of the matrix
/// elements containing at least one index which is either i or j
/// such as: M[w][i], M[i][w], M[w][j], M[j][w].
/// Check and see whether these modified matrix elements exceed the
/// corresponding values in max_idx_row[] array for that row.
/// If so, then update max_idx_row for that row.
/// This is somewhat complicated by the fact that we must only consider
/// matrix elements in the upper-right triangle strictly above the diagonal.
/// (ie. matrix elements whose second index is > the first index).

template<typename Scalar,typename Vector,typename Matrix,typename ConstMatrix>
void Jacobi<Scalar, Vector, Matrix, ConstMatrix>::
ApplyRot(Scalar **M,  // matrix
         int i,       // row index
         int j)       // column index
{
  // Recall that c = cos(θ), s = sin(θ), t = tan(θ) (and t <= 1.0)

  // Compute the diagonal elements of M which have changed:
  M[i][i] -= t * M[i][j];
  M[j][j] += t * M[i][j];

  //Update the off-diagonal elements of M which will change (above the diagonal)
  //assert(i < j);
  M[i][j] = 0.0;

  //compute M[w][i] and M[i][w] for all w!=i,considering above-diagonal elements
  for (int w=0; w < i; w++) {        // 0 <= w <  i  <  j < n
    M[i][w] = M[w][i]; //backup the previous value. store below diagonal (i>w)
    M[w][i] = c*M[w][i] - s*M[w][j]; //M[w][i], M[w][j] from previous iteration
    if (i == max_idx_row[w]) max_idx_row[w] = MaxEntryRow(M, w);
    else if (std::abs(M[w][i])>std::abs(M[w][max_idx_row[w]])) max_idx_row[w]=i;
    //assert(max_idx_row[w] == MaxEntryRow(M, w));
  }
  for (int w=i+1; w < j; w++) {      // 0 <= i <  w  <  j < n
    M[w][i] = M[i][w]; //backup the previous value. store below diagonal (w>i)
    M[i][w] = c*M[i][w] - s*M[w][j]; //M[i][w], M[w][j] from previous iteration
  }
  for (int w=j+1; w < n; w++) {      // 0 <= i < j+1 <= w < n
    M[w][i] = M[i][w]; //backup the previous value. store below diagonal (w>i)
    M[i][w] = c*M[i][w] - s*M[j][w]; //M[i][w], M[j][w] from previous iteration
  }

  // now that we're done modifying row i, we can update max_idx_row[i]
  max_idx_row[i] = MaxEntryRow(M, i);

  //compute M[w][j] and M[j][w] for all w!=j,considering above-diagonal elements
  for (int w=0; w < i; w++) {        // 0 <=  w  <  i <  j < n
    M[w][j] = s*M[i][w] + c*M[w][j]; //M[i][w], M[w][j] from previous iteration
    if (j == max_idx_row[w]) max_idx_row[w] = MaxEntryRow(M, w);
    else if (std::abs(M[w][j])>std::abs(M[w][max_idx_row[w]])) max_idx_row[w]=j;
    //assert(max_idx_row[w] == MaxEntryRow(M, w));
  }
  for (int w=i+1; w < j; w++) {      // 0 <= i+1 <= w <  j < n
    M[w][j] = s*M[w][i] + c*M[w][j]; //M[w][i], M[w][j] from previous iteration
    if (j == max_idx_row[w]) max_idx_row[w] = MaxEntryRow(M, w);
    else if (std::abs(M[w][j])>std::abs(M[w][max_idx_row[w]])) max_idx_row[w]=j;
    //assert(max_idx_row[w] == MaxEntryRow(M, w));
  }
  for (int w=j+1; w < n; w++) {      // 0 <=  i  <  j <  w < n
    M[j][w] = s*M[w][i] + c*M[j][w]; //M[w][i], M[j][w] from previous iteration
  }
  // now that we're done modifying row j, we can update max_idx_row[j]
  max_idx_row[j] = MaxEntryRow(M, j);
} //Jacobi::ApplyRot()


/// brief  Multiply matrix E on the left by the (previously calculated)
///        rotation matrix.
///
/// details
/// Multiply matrix M on the LEFT side by a transposed rotation matrix, R^T.
/// This matrix performs a rotation in the i,j plane by angle θ
/// (where the arguments "s" and "c" refer to cos(θ) and sin(θ), respectively).
///
/// verbatim
///   E'_uv = Σ_w  R_wu * E_wv
/// endverbatim

template<typename Scalar,typename Vector,typename Matrix,typename ConstMatrix>
void Jacobi<Scalar, Vector, Matrix, ConstMatrix>::
ApplyRotLeft(Matrix E,  // matrix
             int i,     // row index
             int j)     // column index
{
  // recall that c = cos(θ) and s = sin(θ)
  for (int v = 0; v < n; v++) {
    Scalar Eiv = E[i][v]; //backup E[i][v]
    E[i][v] = c*E[i][v] - s*E[j][v];
    E[j][v] = s*Eiv     + c*E[j][v];
  }
}

/// brief  Find the off-diagonal index in row i whose absolute value is largest
template<typename Scalar,typename Vector,typename Matrix,typename ConstMatrix>
int Jacobi<Scalar, Vector, Matrix, ConstMatrix>::
MaxEntryRow(Scalar const *const *M, int i) const {
  int j_max = i+1;
  for (int j = i+2; j < n; j++)
    if (std::abs(M[i][j]) > std::abs(M[i][j_max]))
      j_max = j;
  return j_max;
}

/// brief  Find the indices (i_max, j_max) marking the location of the
///        entry in the matrix with the largest absolute value.  This
///        uses the max_idx_row[] array to find the answer in O(n) time.
/// returns  This function does not return a avalue.  However after it is
///          invoked, the location of the largest matrix element will be
///          stored in the i_max and j_max arguments.
template<typename Scalar,typename Vector,typename Matrix,typename ConstMatrix>
void Jacobi<Scalar, Vector, Matrix, ConstMatrix>::
MaxEntry(Scalar const *const *M, int& i_max, int& j_max) const {
  // find the maximum entry in the matrix M in O(n) time
  i_max = 0;
  j_max = max_idx_row[i_max];
  Scalar max_entry = std::abs(M[i_max][j_max]);
  int nm1 = n-1;
  for (int i=1; i < nm1; i++) {
    int j = max_idx_row[i];
    if (std::abs(M[i][j]) > max_entry) {
      max_entry = std::abs(M[i][j]);
      i_max = i;
      j_max = j;
    }
  }
}

/// brief  Sort the rows in matrix "evec" according to the numbers in "eval".
template<typename Scalar,typename Vector,typename Matrix,typename ConstMatrix>
void Jacobi<Scalar, Vector, Matrix, ConstMatrix>::
SortRows(Vector eval,        // vector containing the keys used for sorting
         Matrix evec,        // matrix whose rows will be sorted according to v
         int n,              // size of the vector and matrix
         SortCriteria sort_criteria) const // sort eigenvalues?
{
  for (int i = 0; i < n-1; i++) {
    int i_max = i;
    for (int j = i+1; j < n; j++) {
      // find the "maximum" element in the array starting at position i+1
      switch (sort_criteria) {
      case SORT_DECREASING_EVALS:
        if (eval[j] > eval[i_max])
          i_max = j;
        break;
      case SORT_INCREASING_EVALS:
        if (eval[j] < eval[i_max])
          i_max = j;
        break;
      case SORT_DECREASING_ABS_EVALS:
        if (std::abs(eval[j]) > std::abs(eval[i_max]))
          i_max = j;
        break;
      case SORT_INCREASING_ABS_EVALS:
        if (std::abs(eval[j]) < std::abs(eval[i_max]))
          i_max = j;
        break;
      default:
        break;
      }
    }
    std::swap(eval[i], eval[i_max]); // sort "eval"
    for (int k = 0; k < n; k++)
      std::swap(evec[i][k], evec[i_max][k]); // sort "evec"
  }
}

template<typename Scalar,typename Vector,typename Matrix,typename ConstMatrix>
void Jacobi<Scalar, Vector, Matrix, ConstMatrix>::
Init() {
  n = 0;
  M = nullptr;
  max_idx_row = nullptr;
  is_preallocated = false;
}

template<typename Scalar,typename Vector,typename Matrix,typename ConstMatrix>
void Jacobi<Scalar, Vector, Matrix, ConstMatrix>::
SetSize(int n) {
  Dealloc();
  Alloc(n);
}

// Implementation: Jacobi memory management:

template<typename Scalar,typename Vector,typename Matrix,typename ConstMatrix>
void Jacobi<Scalar, Vector, Matrix, ConstMatrix>::
Alloc(int n) {
  //assert(! is_preallocated);
  this->n = n;
  if (n > 0) {
    max_idx_row = new int[n];
    Alloc2D(n, n, &M);
  }
}

template<typename Scalar,typename Vector,typename Matrix,typename ConstMatrix>
void Jacobi<Scalar, Vector, Matrix, ConstMatrix>::
Dealloc() {
  //assert(! is_preallocated);
  Dealloc2D(&M);
  delete[] max_idx_row;
  max_idx_row = nullptr;
  Init();
}

// Jacobi copy and move constructor, swap, and assignment operator:

template<typename Scalar,typename Vector,typename Matrix,typename ConstMatrix>
Jacobi<Scalar, Vector, Matrix, ConstMatrix>::
Jacobi(const Jacobi<Scalar, Vector, Matrix, ConstMatrix>& source)
{
  Init();
  SetSize(source.n);

  //assert(n == source.n);
  // The following lines aren't really necessary, because the contents
  // of source.M and source.max_idx_row are not needed (since they are
  // overwritten every time Jacobi::Diagonalize() is invoked).
  std::copy(source.max_idx_row,
            source.max_idx_row + n,
            max_idx_row);
  for (int i = 0; i < n; i++)
    std::copy(source.M[i],
              source.M[i] + n,
              M[i]);
}

template<typename Scalar,typename Vector,typename Matrix,typename ConstMatrix>
void Jacobi<Scalar, Vector, Matrix, ConstMatrix>::
swap(Jacobi<Scalar, Vector, Matrix, ConstMatrix> &other) {
  std::swap(n, other.n);
  std::swap(is_preallocated, other.is_preallocated);
  std::swap(max_idx_row, other.max_idx_row);
  std::swap(M, other.M);
}

// Move constructor (C++11)
template<typename Scalar,typename Vector,typename Matrix,typename ConstMatrix>
Jacobi<Scalar, Vector, Matrix, ConstMatrix>::
Jacobi(Jacobi<Scalar, Vector, Matrix, ConstMatrix>&& other) {
  Init();
  this->swap(other);
}

// Using the "copy-swap" idiom for the assignment operator
template<typename Scalar,typename Vector,typename Matrix,typename ConstMatrix>
Jacobi<Scalar, Vector, Matrix, ConstMatrix>&
Jacobi<Scalar, Vector, Matrix, ConstMatrix>::
operator = (Jacobi<Scalar, Vector, Matrix, ConstMatrix> source) {
  this->swap(source);
  return *this;
}


// --- Implementation: Eigendecomposition of large matrices ----

template <typename T>
inline LambdaLanczos<T>::LambdaLanczos() {
  this->matrix_size = 0;
  this->max_iteration = 0;
  this->find_maximum = false;
}


template <typename T>
inline LambdaLanczos<T>::
LambdaLanczos(std::function<void(const std::vector<T>&,
                                 std::vector<T>&)> mv_mul,
                                 int matrix_size,
              bool find_maximum)
{
  this->mv_mul = mv_mul;
  this->matrix_size = matrix_size;
  this->max_iteration = matrix_size;
  this->find_maximum = find_maximum;
}


template <typename T>
inline int LambdaLanczos<T>::
run(real_t<T>& eigvalue, std::vector<T>& eigvec) const
{
  //assert(matrix_size > 0);
  //assert(0 < this->tridiag_eps_ratio && this->tridiag_eps_ratio < 1);

  std::vector<std::vector<T>> u; // Lanczos vectors
  std::vector<real_t<T>> alpha; // Diagonal elements of an approximated tridiagonal matrix
  std::vector<real_t<T>> beta;  // Subdiagonal elements of an approximated tridiagonal matrix

  const int n = this->matrix_size;

  u.reserve(this->initial_vector_size);
  alpha.reserve(this->initial_vector_size);
  beta.reserve(this->initial_vector_size);

  u.emplace_back(n, 0.0); // Same as u.push_back(std::vector<T>(n, 0.0))

  std::vector<T> vk(n, 0.0);

  real_t<T> alphak = 0.0;
  alpha.push_back(alphak);
  real_t<T> betak = 0.0;
  beta.push_back(betak);

  std::vector<T> uk(n);
  this->init_vector(uk);
  normalize(uk);
  u.push_back(uk);

  real_t<T> ev, pev; // Calculated eigenvalue and previous one
  pev = std::numeric_limits<real_t<T>>::max();

  int itern = this->max_iteration;
  for (int k = 1;k <= this->max_iteration;k++) {
    // vk = (A + offset*E)uk, here E is the identity matrix
    for (int i = 0;i < n;i++) {
      vk[i] = uk[i]*this->eigenvalue_offset;
    }
    this->mv_mul(uk, vk);

    alphak = std::real(inner_prod(u.back(), vk));

    // The inner product <uk|vk> is real.
    // Proof:
    //     <uk|vk> = <uk|A|uk>
    //   On the other hand its complex conjugate is
    //     <uk|vk>^* = <vk|uk> = <uk|A^*|uk> = <uk|A|uk>
    //   here the condition that matrix A is a symmetric (Hermitian) is used.
    //   Therefore
    //     <uk|vk> = <vk|uk>^*
    //   <uk|vk> is real.

    alpha.push_back(alphak);

    for (int i = 0;i < n; i++) {
      uk[i] = vk[i] - betak*u[k-1][i] - alphak*u[k][i];
    }

    schmidt_orth(uk, u);

    betak = l2_norm(uk);
    beta.push_back(betak);

    if (this->find_maximum) {
      ev = find_maximum_eigenvalue(alpha, beta);
    } else {
      ev = find_minimum_eigenvalue(alpha, beta);
    }

    const real_t<T> zero_threshold = minimum_effective_decimal<real_t<T>>()*1e-1;
    if (betak < zero_threshold) {
      u.push_back(uk);
      // This element will never be accessed,
      // but this "push" guarantees u to always have one more element than
      // alpha and beta do.
      itern = k;
      break;
    }

    normalize(uk);
    u.push_back(uk);

    if (abs(ev-pev) < std::min(abs(ev), abs(pev))*this->eps) {
      itern = k;
      break;
    } else {
      pev = ev;
    }
  }

  eigvalue = ev - this->eigenvalue_offset;

  int m = alpha.size();
  std::vector<T> cv(m+1);
  cv[0] = 0.0;
  cv[m] = 0.0;
  cv[m-1] = 1.0;

  beta[m-1] = 0.0;

  if (eigvec.size() < n) {
    eigvec.resize(n);
  }

  for (int i = 0;i < n;i++) {
    eigvec[i] = cv[m-1]*u[m-1][i];
  }

  for (int k = m-2;k >= 1;k--) {
    cv[k] = ((ev - alpha[k+1])*cv[k+1] - beta[k+1]*cv[k+2])/beta[k];

    for (int i = 0;i < n;i++) {
      eigvec[i] += cv[k]*u[k][i];
    }
  }

  normalize(eigvec);

  return itern;

} //LambdaLancos::run()


template <typename T>
inline void LambdaLanczos<T>::
schmidt_orth(std::vector<T>& uorth, const std::vector<std::vector<T>>& u)
{
  // Vectors in u must be normalized, but uorth doesn't have to be.

  int n = uorth.size();

  for (int k = 0;k < u.size();k++) {
    T innprod = inner_prod(uorth, u[k]);
    for (int i = 0;i < n;i++)
      uorth[i] -= innprod * u[k][i];
  }
}


template <typename T>
inline real_t<T> LambdaLanczos<T>::
find_minimum_eigenvalue(const std::vector<real_t<T>>& alpha,
                        const std::vector<real_t<T>>& beta) const
{
  real_t<T> eps = this->eps * this->tridiag_eps_ratio;
  real_t<T> pmid = std::numeric_limits<real_t<T>>::max();
  real_t<T> r = tridiagonal_eigen_limit(alpha, beta);
  real_t<T> lower = -r;
  real_t<T> upper = r;
  real_t<T> mid;
  int nmid; // Number of eigenvalues smaller than the "mid"

  while (upper-lower > std::min(abs(lower), abs(upper))*eps) {
    mid = (lower+upper)/2.0;
    nmid = num_of_eigs_smaller_than(mid, alpha, beta);
    if (nmid >= 1) {
      upper = mid;
    } else {
      lower = mid;
    }

    if (mid == pmid) {
      break; // This avoids an infinite loop due to zero matrix
    }
    pmid = mid;
  }

  return lower; // The "lower" almost equals the "upper" here.
}


template <typename T>
inline real_t<T> LambdaLanczos<T>::
find_maximum_eigenvalue(const std::vector<real_t<T>>& alpha,
                        const std::vector<real_t<T>>& beta) const
{
  real_t<T> eps = this->eps * this->tridiag_eps_ratio;
  real_t<T> pmid = std::numeric_limits<real_t<T>>::max();
  real_t<T> r = tridiagonal_eigen_limit(alpha, beta);
  real_t<T> lower = -r;
  real_t<T> upper = r;
  real_t<T> mid;
  int nmid; // Number of eigenvalues smaller than the "mid"

  int m = alpha.size() - 1;  // Number of eigenvalues of the approximated
                             // triangular matrix, which equals the rank of it


  while (upper-lower > std::min(abs(lower), abs(upper))*eps) {
    mid = (lower+upper)/2.0;
    nmid = num_of_eigs_smaller_than(mid, alpha, beta);

    if (nmid < m) {
      lower = mid;
    } else {
      upper = mid;
    }

    if (mid == pmid) {
      break; // This avoids an infinite loop due to zero matrix
    }
    pmid = mid;
  }

  return lower; // The "lower" almost equals the "upper" here.
}


/// @brief
/// Compute the upper bound of the absolute value of eigenvalues
/// by Gerschgorin theorem. This routine gives a rough upper bound,
/// but it is sufficient because the bisection routine using
/// the upper bound converges exponentially.

template <typename T>
inline real_t<T> LambdaLanczos<T>::
tridiagonal_eigen_limit(const std::vector<real_t<T>>& alpha,
                        const std::vector<real_t<T>>& beta)
{
  real_t<T> r = l1_norm(alpha);
  r += 2*l1_norm(beta);

  return r;
}


// Algorithm from
//  Peter Arbenz et al.
// "High Performance Algorithms for Structured Matrix Problems"
//  Nova Science Publishers, Inc.

template <typename T>
inline int LambdaLanczos<T>::
num_of_eigs_smaller_than(real_t<T> c,
                         const std::vector<real_t<T>>& alpha,
                         const std::vector<real_t<T>>& beta)
{
  real_t<T> q_i = 1.0;
  int count = 0;
  int m = alpha.size();

  for (int i = 1;i < m;i++){
    q_i = alpha[i] - c - beta[i-1]*beta[i-1]/q_i;
    if (q_i < 0){
      count++;
    }
    if (q_i == 0){
      q_i = minimum_effective_decimal<real_t<T>>();
    }
  }

  return count;
}


template <typename T>
inline void LambdaLanczos<T>::ChooseOffset() {
  const auto n = this->matrix_size;
  std::vector<T> unit_vec_j(n);
  std::vector<T> matrix_column_j(n);
  real_t<T> eval_upper_bound = 0.0;
  /// According to Gershgorin theorem, the maximum (magnitude) eigenvalue should
  /// not exceed max_j{Σ_i|Mij|}.  We can infer the contents of each column in
  /// the matrix by multiplying it by different unit vectors.  This is slow.
  for (int j = 0; j < n; j++) {
    std::fill(unit_vec_j.begin(), unit_vec_j.end(), 0);  // fill with zeros
    unit_vec_j[j] = 1.0;  // = jth element is 1, all other elements are 0
    // Multiplying the matrix by a unit vector (a vector containing only one
    // non-zero element at position j) extracts the jth column of the matrix.
    this->mv_mul(unit_vec_j, matrix_column_j);
    real_t<T> sum_column = 0.0;  // compute Σ_i|Mij|
    for (int i = 0; i < n; i++)
      sum_column += std::abs(matrix_column_j[i]);
    if (eval_upper_bound < sum_column)
      eval_upper_bound = sum_column; // compute max_j{Σ_i|Mij|}
  }
  if (find_maximum)
    this->eigenvalue_offset = eval_upper_bound;
  else
    this->eigenvalue_offset = -eval_upper_bound;
}


template <typename T>
inline int LambdaLanczos<T>::SetSize(int matrix_size)
{
  this->matrix_size = matrix_size;
  this->max_iteration = matrix_size;
  return matrix_size;
}

template <typename T>
inline void LambdaLanczos<T>::SetMul(std::function<void(const std::vector<T>&, std::vector<T>&)> mv_mul)
{
  this->mv_mul = mv_mul;
}

template <typename T>
inline void LambdaLanczos<T>::SetInitVec(std::function<void(std::vector<T>&)> init_vector)
{
  this->init_vector = init_vector;
}

template <typename T>
inline void LambdaLanczos<T>::SetFindMax(bool find_maximum) {
  this->find_maximum = find_maximum;
}

template <typename T>
inline void LambdaLanczos<T>::SetEvalOffset(T offset)
{
  this->eigenvalue_offset = offset;
}

template <typename T>
inline void LambdaLanczos<T>::SetEpsilon(T epsilon)
{
  this->eps = epsilon;
}

template <typename T>
inline void LambdaLanczos<T>::SetTriEpsRatio(T tri_eps_ratio)
{
  this->tridiag_eps_ratio = tri_eps_ratio;
}


template <typename T>
inline void VectorRandomInitializer<T>::
init(std::vector<T>& v)
{
  std::random_device dev;
  std::mt19937 mt(dev());
  std::uniform_real_distribution<T> rand((T)(-1.0), (T)(1.0));

  int n = v.size();
  for (int i = 0;i < n;i++) {
    v[i] = rand(mt);
  }

  normalize(v);
}


template <typename T>
inline void VectorRandomInitializer<std::complex<T>>::
init(std::vector<std::complex<T>>& v)
{
  std::random_device dev;
  std::mt19937 mt(dev());
  std::uniform_real_distribution<T> rand((T)(-1.0), (T)(1.0));

  int n = v.size();
  for (int i = 0;i < n;i++) {
    v[i] = std::complex<T>(rand(mt), rand(mt));
  }

  normalize(v);
}


// --- Implementation of PEigenDense

template<typename Scalar, typename Vector, typename ConstMatrix>
void PEigenDense<Scalar, Vector, ConstMatrix>::
SetSize(int matrix_size) {
  n = matrix_size;
  evec.resize(n);
}

template<typename Scalar, typename Vector, typename ConstMatrix>
PEigenDense<Scalar, Vector, ConstMatrix>::
PEigenDense(int matrix_size):evec(matrix_size) {
  SetSize(matrix_size);
}

template<typename Scalar, typename Vector, typename ConstMatrix>
real_t<Scalar> PEigenDense<Scalar, Vector, ConstMatrix>::
PrincipalEigen(ConstMatrix matrix,
               Vector eigenvector,
               bool find_max)
{
  //assert(n > 0);
  auto matmul = [&](const std::vector<Scalar>& in, std::vector<Scalar>& out) {
    for (int i = 0; i < n; i++) {
      for (int j = 0; j < n; j++) {
        out[i] += matrix[i][j]*in[j];
      }
    }
  };
  auto init_vec = [&](std::vector<Scalar>& vec) {
    for (int i = 0; i < n; i++)
      vec[i] = 0.0;
    vec[0] = 1.0;
  };

  // "ll_engine" calculates the eigenvalue and eigenvector.
  LambdaLanczos<Scalar> ll_engine(matmul, n, find_max);

  // The Lanczos algorithm selects the eigenvalue with the largest magnitude.
  // In order to insure that this is the one we want (maxima or minima), we can
  // add a constant to all of the eigenvalues by setting "eigenvalue_offset".
  Scalar eval_upper_bound = 0.0;
  for (int i = 0; i < n; i++) {
    Scalar sum_row = 0.0;
    for (int j = 0; j < n; i++)
      sum_row += std::abs(matrix[i][j]);
    if (eval_upper_bound < sum_row)
      eval_upper_bound = sum_row;
  }
  if (find_max)
    ll_engine.eigenvalue_offset = eval_upper_bound;
  else
    ll_engine.eigenvalue_offset = -eval_upper_bound;

  ll_engine.init_vector = init_vec;

  Scalar eval;

  // This line does all of the hard work:
  ll_engine.run(eval, evec);

  for (int i = 0; i < n; i++)
    eigenvector[i] = evec[i];

  return eval;
}

} //namespace MathEigen


#endif //#ifndef LMP_MATH_EIGEN_IMPL_H