Merge pull request #3579 from akohlmey/linalg-in-cpp

Convert linalg library from Fortran to C++
2023-01-05 21:16:06 -05:00
parent 843cc98531 57790ef35f
commit 3bf527d070
431 changed files with 31249 additions and 63712 deletions
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@ -440,16 +440,12 @@ if(PKG_MSCG OR PKG_ATC OR PKG_AWPMD OR PKG_ML-QUIP OR PKG_ML-POD OR PKG_LATTE OR
    find_package(BLAS)
  endif()
  if(NOT LAPACK_FOUND OR NOT BLAS_FOUND OR USE_INTERNAL_LINALG)
-    include(CheckGeneratorSupport)
+    file(GLOB LINALG_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/linalg/[^.]*.cpp)
-    if(NOT CMAKE_GENERATOR_SUPPORT_FORTRAN)
+    add_library(linalg STATIC ${LINALG_SOURCES})
      status(FATAL_ERROR "Cannot build internal linear algebra library as CMake build tool lacks Fortran support")
    endif()
    enable_language(Fortran)
    file(GLOB LAPACK_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/linalg/[^.]*.[fF])
    add_library(linalg STATIC ${LAPACK_SOURCES})
    set_target_properties(linalg PROPERTIES OUTPUT_NAME lammps_linalg${LAMMPS_MACHINE})
    set(BLAS_LIBRARIES "$<TARGET_FILE:linalg>")
    set(LAPACK_LIBRARIES "$<TARGET_FILE:linalg>")
    target_link_libraries(lammps PRIVATE linalg)
  else()
    list(APPEND LAPACK_LIBRARIES ${BLAS_LIBRARIES})
  endif()
--- a/cmake/CMakeSettings.json
+++ b/cmake/CMakeSettings.json
@ -72,7 +72,7 @@
      "configurationType": "Debug",
      "buildRoot": "${workspaceRoot}\\build\\${name}",
      "installRoot": "${workspaceRoot}\\install\\${name}",
-      "cmakeCommandArgs": "-C ${workspaceRoot}\\cmake\\presets\\windows.cmake -DCMAKE_C_COMPILER=clang-cl.exe -DCMAKE_CXX_COMPILER=clang-cl.exe",
+      "cmakeCommandArgs": "-C ${workspaceRoot}\\cmake\\presets\\windows.cmake -DCMAKE_C_COMPILER=clang-cl.exe -DCMAKE_CXX_COMPILER=clang-cl.exe -DBUILD_MPI=off",
      "buildCommandArgs": "",
      "ctestCommandArgs": "",
      "inheritEnvironments": [ "clang_cl_x64" ],
@ -105,7 +105,7 @@
      "configurationType": "Release",
      "buildRoot": "${workspaceRoot}\\build\\${name}",
      "installRoot": "${workspaceRoot}\\install\\${name}",
-      "cmakeCommandArgs": "-C ${workspaceRoot}\\cmake\\presets\\windows.cmake -DCMAKE_C_COMPILER=clang-cl.exe -DCMAKE_CXX_COMPILER=clang-cl.exe",
+      "cmakeCommandArgs": "-C ${workspaceRoot}\\cmake\\presets\\windows.cmake -DCMAKE_C_COMPILER=clang-cl.exe -DCMAKE_CXX_COMPILER=clang-cl.exe -DBUILD_MPI=off",
      "buildCommandArgs": "",
      "ctestCommandArgs": "-V",
      "inheritEnvironments": [ "clang_cl_x64" ],
@ -305,4 +305,4 @@
      ]
    }
  ]
-}
+}
--- a/cmake/presets/windows.cmake
+++ b/cmake/presets/windows.cmake
@ -1,6 +1,7 @@
 set(WIN_PACKAGES
  AMOEBA
  ASPHERE
  AWPMD
  BOCS
  BODY
  BPM
@ -20,6 +21,7 @@ set(WIN_PACKAGES
  DPD-SMOOTH
  DRUDE
  EFF
  ELECTRODE
  EXTRA-COMPUTE
  EXTRA-DUMP
  EXTRA-FIX
@ -35,6 +37,7 @@ set(WIN_PACKAGES
  MEAM
  MISC
  ML-IAP
  ML-POD
  ML-SNAP
  MOFFF
  MOLECULE
--- a/doc/src/Build_extras.rst
+++ b/doc/src/Build_extras.rst
@ -858,11 +858,11 @@ library.
      .. code-block:: bash
-         $ make lib-latte                          # print help message
+         $ make lib-latte                        # print help message
-         $ make lib-latte args="-b"                # download and build in lib/latte/LATTE-master
+         $ make lib-latte args="-b"              # download and build in lib/latte/LATTE-master
-         $ make lib-latte args="-p $HOME/latte"    # use existing LATTE installation in $HOME/latte
+         $ make lib-latte args="-p $HOME/latte"  # use existing LATTE installation in $HOME/latte
-         $ make lib-latte args="-b -m gfortran"    # download and build in lib/latte and
+         $ make lib-latte args="-b -m gfortran"  # download and build in lib/latte and
-                                                   #   copy Makefile.lammps.gfortran to Makefile.lammps
+                                                 #   copy Makefile.lammps.gfortran to Makefile.lammps
      Note that 3 symbolic (soft) links, ``includelink`` and ``liblink``
      and ``filelink.o``, are created in ``lib/latte`` to point to
@ -1208,10 +1208,10 @@ The ATC package requires the MANYBODY package also be installed.
      .. code-block:: bash
-         $ make lib-linalg                     # print help message
+         $ make lib-linalg                   # print help message
-         $ make lib-linalg args="-m serial"    # build with GNU Fortran compiler (settings as with "make serial")
+         $ make lib-linalg args="-m serial"  # build with GNU C++ compiler (settings as with "make serial")
-         $ make lib-linalg args="-m mpi"       # build with default MPI Fortran compiler (settings as with "make mpi")
+         $ make lib-linalg args="-m mpi"     # build with default MPI C++ compiler (settings as with "make mpi")
-         $ make lib-linalg args="-m gfortran"  # build with GNU Fortran compiler
+         $ make lib-linalg args="-m g++"     # build with GNU C++ compiler
 ----------
@ -1259,10 +1259,10 @@ AWPMD package
      .. code-block:: bash
-         $ make lib-linalg                     # print help message
+         $ make lib-linalg                   # print help message
-         $ make lib-linalg args="-m serial"    # build with GNU Fortran compiler (settings as with "make serial")
+         $ make lib-linalg args="-m serial"  # build with GNU C++ compiler (settings as with "make serial")
-         $ make lib-linalg args="-m mpi"       # build with default MPI Fortran compiler (settings as with "make mpi")
+         $ make lib-linalg args="-m mpi"     # build with default MPI C++ compiler (settings as with "make mpi")
-         $ make lib-linalg args="-m gfortran"  # build with GNU Fortran compiler
+         $ make lib-linalg args="-m g++"     # build with GNU C++ compiler
 ----------
@ -1363,10 +1363,10 @@ This package depends on the KSPACE package.
      .. code-block:: bash
-         $ make lib-linalg                     # print help message
+         $ make lib-linalg                   # print help message
-         $ make lib-linalg args="-m serial"    # build with GNU Fortran compiler (settings as with "make serial")
+         $ make lib-linalg args="-m serial"  # build with GNU C++ compiler (settings as with "make serial")
-         $ make lib-linalg args="-m mpi"       # build with default MPI Fortran compiler (settings as with "make mpi")
+         $ make lib-linalg args="-m mpi"     # build with default MPI C++ compiler (settings as with "make mpi")
-         $ make lib-linalg args="-m gfortran"  # build with GNU Fortran compiler
+         $ make lib-linalg args="-m g++"     # build with GNU C++ compiler
      The package itself is activated with ``make yes-KSPACE`` and
      ``make yes-ELECTRODE``
@ -1447,10 +1447,10 @@ ML-POD package
      .. code-block:: bash
-         $ make lib-linalg                     # print help message
+         $ make lib-linalg                   # print help message
-         $ make lib-linalg args="-m serial"    # build with GNU Fortran compiler (settings as with "make serial")
+         $ make lib-linalg args="-m serial"  # build with GNU C++ compiler (settings as with "make serial")
-         $ make lib-linalg args="-m mpi"       # build with default MPI Fortran compiler (settings as with "make mpi")
+         $ make lib-linalg args="-m mpi"     # build with default MPI C++ compiler (settings as with "make mpi")
-         $ make lib-linalg args="-m gfortran"  # build with GNU Fortran compiler
+         $ make lib-linalg args="-m g++"     # build with GNU C++ compiler
      The package itself is activated with ``make yes-ML-POD``.
--- a/lib/atc/Makefile.lammps.linalg
+++ b/lib/atc/Makefile.lammps.linalg
@ -1,6 +1,6 @@
 # Settings that the LAMMPS build will import when this package library is used
 atc_SYSINC =
-atc_SYSLIB = -llinalg -lgfortran
+atc_SYSLIB = -llinalg
 atc_SYSPATH = -L../../lib/linalg$(LIBOBJDIR)
--- a/lib/awpmd/Makefile.lammps.linalg
+++ b/lib/awpmd/Makefile.lammps.linalg
@ -1,5 +1,5 @@
 # Settings that the LAMMPS build will import when this package library is used
 awpmd_SYSINC =
-awpmd_SYSLIB = -llinalg -lgfortran
+awpmd_SYSLIB = -llinalg
 awpmd_SYSPATH = -L../../lib/linalg$(LIBOBJDIR)
--- a/lib/awpmd/ivutils/include/erf.h
+++ b/lib/awpmd/ivutils/include/erf.h
@ -1,19 +0,0 @@
 # ifndef ERF_H
 # define ERF_H
 # ifdef _WIN32
 # ifdef __cplusplus
 extern "C" {
 # endif
 double erf(double x);
 double erfc(double x);
 # ifdef __cplusplus
 }
 # endif
 # endif
 # endif
--- a/lib/awpmd/ivutils/include/lapack_inter.h
+++ b/lib/awpmd/ivutils/include/lapack_inter.h
@ -1,53 +1,36 @@
 // Interface for LAPACK function
-# ifndef LAPACK_INTER_H
+#ifndef LAPACK_INTER_H
-# define LAPACK_INTER_H
+#define LAPACK_INTER_H
 #include <complex>
 typedef int lapack_int;
 typedef complex<float> lapack_complex_float;
 typedef complex<double> lapack_complex_double;
-#if defined(_WIN32) && !defined(__MINGW32__)
+#define DGETRF dgetrf_
 #define DGETRS dgetrs_
 #define DGETRI dgetri_
 #define ZPPTRF zpptrf_
 #define ZPPTRI zpptri_
-  //#define MKL_Complex8 lapack_complex_float
+#ifdef __cplusplus
-  //#define MKL_Complex16 lapack_complex_double
+extern "C" {
-  #include "mkl.h"
+#endif /* __cplusplus */
-
+void dgetrf_(const lapack_int *m, const lapack_int *n, double *a,
-  inline void ZPPTRF( char* uplo, const lapack_int* n, lapack_complex_double* ap, lapack_int* info ) {
+             const lapack_int *lda, lapack_int *ipiv, lapack_int *info);
-    ZPPTRF(uplo, (int*)n, (MKL_Complex16*)ap, (int*)info);
+void dgetrs_(const char *trans, const lapack_int *n, const lapack_int *nrhs,
-  }
+             const double *a, const lapack_int *lda, const lapack_int *ipiv,
-  inline void ZPPTRI( char* uplo, const lapack_int* n, lapack_complex_double* ap, lapack_int* info ){
+             double *b, const lapack_int *ldb, lapack_int *info);
-     ZPPTRI(uplo, (int*)n, (MKL_Complex16*)ap, (int*)info);
+void dgetri_(const lapack_int *n, double *a, const lapack_int *lda,
-  }
+             const lapack_int *ipiv, double *work, const lapack_int *lwork,
-
+             lapack_int *info);
-#else
+void zpptrf_(const char *uplo, const lapack_int *n, lapack_complex_double *ap,
-
+             lapack_int *info);
-  #define DGETRF dgetrf_
+void zpptri_(const char *uplo, const lapack_int *n, lapack_complex_double *ap,
-  #define DGETRS dgetrs_
+             lapack_int *info);
-  #define DGETRI dgetri_
+#ifdef __cplusplus
-  #define ZPPTRF zpptrf_
+}
-  #define ZPPTRI zpptri_
+#endif /* __cplusplus */
  #ifdef __cplusplus
  extern "C" {
  #endif /* __cplusplus */
  void dgetrf_( const lapack_int* m, const lapack_int* n, double* a, const lapack_int* lda,
             lapack_int* ipiv, lapack_int* info );
  void dgetrs_( const char* trans, const lapack_int* n, const lapack_int* nrhs,
             const double* a, const lapack_int* lda, const lapack_int* ipiv,
             double* b, const lapack_int* ldb, lapack_int* info );
  void dgetri_( const lapack_int* n, double* a, const lapack_int* lda,
             const lapack_int* ipiv, double* work, const lapack_int* lwork,
             lapack_int* info );
  void zpptrf_( const char* uplo, const lapack_int* n, lapack_complex_double* ap,
             lapack_int* info );
  void zpptri_( const char* uplo, const lapack_int* n, lapack_complex_double* ap,
             lapack_int* info );
  #ifdef __cplusplus
  }
  #endif /* __cplusplus */
 #endif
 #endif /* lapack_intER_H */
--- a/lib/awpmd/systems/interact/TCP/wpmd.h
+++ b/lib/awpmd/systems/interact/TCP/wpmd.h
@ -149,10 +149,8 @@
 # include "pairhash.h"
 # include "TCP/tcpdefs.h"
 # include "wavepacket.h"
 # include "erf.h"
 # include "cerf.h"
 using namespace std;
 # include "lapack_inter.h"
--- a/lib/awpmd/systems/interact/TCP/wpmd_split.cpp
+++ b/lib/awpmd/systems/interact/TCP/wpmd_split.cpp
@ -1,6 +1,4 @@
 # include "wpmd_split.h"
 //# include "erf.h"
 void AWPMD_split::resize(int flag){
  for(int s=0;s<2;s++){
--- a/lib/colvars/colvarbias_meta.cpp
+++ b/lib/colvars/colvarbias_meta.cpp
@ -14,7 +14,7 @@
 #include <algorithm>
 // used to set the absolute path of a replica file
-#if defined(WIN32) && !defined(__CYGWIN__)
+#if defined(_WIN32) && !defined(__CYGWIN__)
 #include <direct.h>
 #define CHDIR ::_chdir
 #define GETCWD ::_getcwd
--- a/lib/colvars/colvarproxy.cpp
+++ b/lib/colvars/colvarproxy.cpp
@ -8,10 +8,10 @@
 // Colvars repository at GitHub.
 // Using access() to check if a file exists (until we can assume C++14/17)
-#if !defined(WIN32) || defined(__CYGWIN__)
+#if !defined(_WIN32) || defined(__CYGWIN__)
 #include <unistd.h>
 #endif
-#if defined(WIN32)
+#if defined(_WIN32)
 #include <io.h>
 #endif
@ -678,7 +678,7 @@ int colvarproxy_io::backup_file(char const *filename)
  // Simplified version of NAMD_file_exists()
  int exit_code;
  do {
-#if defined(WIN32) && !defined(__CYGWIN__)
+#if defined(_WIN32) && !defined(__CYGWIN__)
    // We could use _access_s here, but it is probably too new
    exit_code = _access(filename, 00);
 #else
@ -708,7 +708,7 @@ int colvarproxy_io::backup_file(char const *filename)
 int colvarproxy_io::remove_file(char const *filename)
 {
  int error_code = COLVARS_OK;
-#if defined(WIN32) && !defined(__CYGWIN__)
+#if defined(_WIN32) && !defined(__CYGWIN__)
  // Because the file may be open by other processes, rename it to filename.old
  std::string const renamed_file(std::string(filename)+".old");
  // It may still be there from an interrupted run, so remove it to be safe
@ -741,7 +741,7 @@ int colvarproxy_io::remove_file(char const *filename)
 int colvarproxy_io::rename_file(char const *filename, char const *newfilename)
 {
  int error_code = COLVARS_OK;
-#if defined(WIN32) && !defined(__CYGWIN__)
+#if defined(_WIN32) && !defined(__CYGWIN__)
  // On straight Windows, must remove the destination before renaming it
  error_code |= remove_file(newfilename);
 #endif
--- a/lib/electrode/Makefile.lammps.linalg
+++ b/lib/electrode/Makefile.lammps.linalg
@ -1,5 +1,5 @@
 # Settings that the LAMMPS build will import when this package library is used
 electrode_SYSINC =
-electrode_SYSLIB = -llinalg -lgfortran
+electrode_SYSLIB = -llinalg
 electrode_SYSPATH = -L../../lib/linalg$(LIBOBJDIR)
--- a/lib/linalg/Makefile.gfortran
+++ b/lib/linalg/Makefile.gfortran
@ -6,21 +6,17 @@ SHELL = /bin/sh
 # ------ FILES ------
-SRC =	$(wildcard *.f)
+SRC =	$(wildcard *.cpp)
 SRC1 =	$(wildcard *.F)
 FILES = $(SRC) $(SRC1) Makefile.* README
 # ------ DEFINITIONS ------
 LIB = liblinalg.a
-OBJ =   $(SRC:.f=.o) $(SRC1:.F=.o)
+OBJ =   $(SRC:.cpp=.o)
 # ------ SETTINGS ------
-FC =           gfortran
+CXX =           g++ -std=c++11
-FFLAGS =      -O3 -fPIC -ffast-math -fstrict-aliasing -fno-second-underscore
+CCFLAGS =       -O3 -fPIC -ffast-math -fstrict-aliasing
 FFLAGS0 =     -O0 -fPIC -fno-second-underscore
 ARCHIVE =	ar
 AR =	ar
 ARCHFLAG =	-rcs
@ -34,20 +30,11 @@ lib: 	$(OBJ)
 # ------ COMPILE RULES ------
-%.o:%.F
+%.o:%.cpp
-	$(FC) $(FFLAGS) -c $<
+	$(CC) $(CCFLAGS) -c $<
 %.o:%.f
 	$(FC) $(FFLAGS) -c $<
 dlamch.o: dlamch.f
 	$(FC) $(FFLAGS0) -c $<
 # ------ CLEAN ------
 clean:
-	-rm -f *.o *.mod *~ $(LIB)
+	-rm -f *.o *~ $(LIB)
 tar:
 	-tar -czvf ../linalg.tar.gz $(FILES)
--- a/lib/linalg/Makefile.mpi
+++ b/lib/linalg/Makefile.mpi
@ -6,21 +6,17 @@ SHELL = /bin/sh
 # ------ FILES ------
-SRC =	$(wildcard *.f)
+SRC =	$(wildcard *.cpp)
 SRC1 =	$(wildcard *.F)
 FILES = $(SRC) $(SRC1) Makefile.* README
 # ------ DEFINITIONS ------
 LIB = liblinalg.a
-OBJ =   $(SRC:.f=.o) $(SRC1:.F=.o)
+OBJ =   $(SRC:.cpp=.o)
 # ------ SETTINGS ------
-FC =          mpifort
+CC =          mpicxx
-FFLAGS =      -O3 -fPIC
+CCFLAGS =      -O3 -fPIC
 FFLAGS0 =     -O0 -fPIC
 ARCHIVE =	ar
 AR =	ar
 ARCHFLAG =	-rcs
@ -34,20 +30,11 @@ lib: 	$(OBJ)
 # ------ COMPILE RULES ------
-%.o:%.F
+%.o:%.cpp
-	$(FC) $(FFLAGS) -c $<
+	$(CC) $(CCFLAGS) -c $<
 %.o:%.f
 	$(FC) $(FFLAGS) -c $<
 dlamch.o: dlamch.f
 	$(FC) $(FFLAGS0) -c $<
 # ------ CLEAN ------
 clean:
-	-rm -f *.o *.mod *~ $(LIB)
+	-rm -f *.o *~ $(LIB)
 tar:
 	-tar -czvf ../linalg.tar.gz $(FILES)
--- a/lib/linalg/Makefile.serial
+++ b/lib/linalg/Makefile.serial
@ -1 +1 @@
-Makefile.gfortran
+Makefile.g++
--- a/lib/linalg/README
+++ b/lib/linalg/README
@ -1,7 +1,13 @@
 This directory has generic BLAS and LAPACK source files needed by the
 ATC, AWPMD, ELECTRODE, LATTE, and ML-POD packages (and possibly by other
 packages) in the future that can be used instead of platform or vendor
-optimized BLAS/LAPACK library.
+optimized BLAS/LAPACK library.  To simplify installation, these files
 have been translated from the Fortran versions of the BLAS and LAPACK
 references source files at https://netlib.org/lapack/ to C++ with f2c.
 The package with the tools to do the translation and the matching
 original Fortran sources are at https://github.com/lammps/linalg.
 Please note that even through the files are C++ source code the
 resulting library will follow the Fortran binary conventions.
 Note that this is an *incomplete* subset of full BLAS/LAPACK.
@ -20,7 +26,7 @@ can do it manually by following the instructions below.
 Build the library using one of the provided Makefile.* files or create
 your own, specific to your compiler and system.  For example:
-make -f Makefile.gfortran
+make -f Makefile.g++
 When you are done building this library, one file should exist in this
 directory:
--- a/lib/linalg/d_lmp_cnjg.cpp
+++ b/lib/linalg/d_lmp_cnjg.cpp
@ -0,0 +1,13 @@
 #include "lmp_f2c.h"
 extern "C" {
 void d_lmp_cnjg(doublecomplex *r, doublecomplex *z)
 {
    doublereal zi = z->i;
    r->r = z->r;
    r->i = -zi;
 }
 }
--- a/lib/linalg/d_lmp_imag.cpp
+++ b/lib/linalg/d_lmp_imag.cpp
@ -0,0 +1,10 @@
 #include "lmp_f2c.h"
 extern "C" {
 double d_lmp_imag(doublecomplex *z)
 {
    return (z->i);
 }
 }
--- a/lib/linalg/d_lmp_lg10.cpp
+++ b/lib/linalg/d_lmp_lg10.cpp
@ -0,0 +1,14 @@
 #include "lmp_f2c.h"
 #undef abs
 static constexpr double log10e = 0.43429448190325182765;
 #include <cmath>
 extern "C" {
 double d_lmp_lg10(doublereal *x)
 {
    return (log10e * log(*x));
 }
 }
--- a/lib/linalg/d_lmp_sign.cpp
+++ b/lib/linalg/d_lmp_sign.cpp
@ -0,0 +1,12 @@
 #include "lmp_f2c.h"
 extern "C" {
 double d_lmp_sign(doublereal *a, doublereal *b)
 {
    double x;
    x = (*a >= 0 ? *a : -*a);
    return (*b >= 0 ? x : -x);
 }
 }
--- a/lib/linalg/dasum.cpp
+++ b/lib/linalg/dasum.cpp
@ -0,0 +1,50 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 doublereal dasum_(integer *n, doublereal *dx, integer *incx)
 {
    integer i__1, i__2;
    doublereal ret_val, d__1, d__2, d__3, d__4, d__5, d__6;
    integer i__, m, mp1;
    doublereal dtemp;
    integer nincx;
    --dx;
    ret_val = 0.;
    dtemp = 0.;
    if (*n <= 0 || *incx <= 0) {
        return ret_val;
    }
    if (*incx == 1) {
        m = *n % 6;
        if (m != 0) {
            i__1 = m;
            for (i__ = 1; i__ <= i__1; ++i__) {
                dtemp += (d__1 = dx[i__], abs(d__1));
            }
            if (*n < 6) {
                ret_val = dtemp;
                return ret_val;
            }
        }
        mp1 = m + 1;
        i__1 = *n;
        for (i__ = mp1; i__ <= i__1; i__ += 6) {
            dtemp = dtemp + (d__1 = dx[i__], abs(d__1)) + (d__2 = dx[i__ + 1], abs(d__2)) +
                    (d__3 = dx[i__ + 2], abs(d__3)) + (d__4 = dx[i__ + 3], abs(d__4)) +
                    (d__5 = dx[i__ + 4], abs(d__5)) + (d__6 = dx[i__ + 5], abs(d__6));
        }
    } else {
        nincx = *n * *incx;
        i__1 = nincx;
        i__2 = *incx;
        for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
            dtemp += (d__1 = dx[i__], abs(d__1));
        }
    }
    ret_val = dtemp;
    return ret_val;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dasum.f
+++ b/lib/linalg/dasum.f
@ -1,131 +0,0 @@
 *> \brief \b DASUM
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       DOUBLE PRECISION FUNCTION DASUM(N,DX,INCX)
 *
 *       .. Scalar Arguments ..
 *       INTEGER INCX,N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION DX(*)
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *>    DASUM takes the sum of the absolute values.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>         number of elements in input vector(s)
 *> \endverbatim
 *>
 *> \param[in] DX
 *> \verbatim
 *>          DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
 *> \endverbatim
 *>
 *> \param[in] INCX
 *> \verbatim
 *>          INCX is INTEGER
 *>         storage spacing between elements of DX
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup double_blas_level1
 *
 *> \par Further Details:
 *  =====================
 *>
 *> \verbatim
 *>
 *>     jack dongarra, linpack, 3/11/78.
 *>     modified 3/93 to return if incx .le. 0.
 *>     modified 12/3/93, array(1) declarations changed to array(*)
 *> \endverbatim
 *>
 *  =====================================================================
      DOUBLE PRECISION FUNCTION DASUM(N,DX,INCX)
 *
 *  -- Reference BLAS level1 routine --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER INCX,N
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION DX(*)
 *     ..
 *
 *  =====================================================================
 *
 *     .. Local Scalars ..
      DOUBLE PRECISION DTEMP
      INTEGER I,M,MP1,NINCX
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC DABS,MOD
 *     ..
      DASUM = 0.0d0
      DTEMP = 0.0d0
      IF (N.LE.0 .OR. INCX.LE.0) RETURN
      IF (INCX.EQ.1) THEN
 *        code for increment equal to 1
 *
 *
 *        clean-up loop
 *
         M = MOD(N,6)
         IF (M.NE.0) THEN
            DO I = 1,M
               DTEMP = DTEMP + DABS(DX(I))
            END DO
            IF (N.LT.6) THEN
               DASUM = DTEMP
               RETURN
            END IF
         END IF
         MP1 = M + 1
         DO I = MP1,N,6
            DTEMP = DTEMP + DABS(DX(I)) + DABS(DX(I+1)) +
     $              DABS(DX(I+2)) + DABS(DX(I+3)) +
     $              DABS(DX(I+4)) + DABS(DX(I+5))
         END DO
      ELSE
 *
 *        code for increment not equal to 1
 *
         NINCX = N*INCX
         DO I = 1,NINCX,INCX
            DTEMP = DTEMP + DABS(DX(I))
         END DO
      END IF
      DASUM = DTEMP
      RETURN
 *
 *     End of DASUM
 *
      END
--- a/lib/linalg/daxpy.cpp
+++ b/lib/linalg/daxpy.cpp
@ -0,0 +1,56 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 int daxpy_(integer *n, doublereal *da, doublereal *dx, integer *incx, doublereal *dy, integer *incy)
 {
    integer i__1;
    integer i__, m, ix, iy, mp1;
    --dy;
    --dx;
    if (*n <= 0) {
        return 0;
    }
    if (*da == 0.) {
        return 0;
    }
    if (*incx == 1 && *incy == 1) {
        m = *n % 4;
        if (m != 0) {
            i__1 = m;
            for (i__ = 1; i__ <= i__1; ++i__) {
                dy[i__] += *da * dx[i__];
            }
        }
        if (*n < 4) {
            return 0;
        }
        mp1 = m + 1;
        i__1 = *n;
        for (i__ = mp1; i__ <= i__1; i__ += 4) {
            dy[i__] += *da * dx[i__];
            dy[i__ + 1] += *da * dx[i__ + 1];
            dy[i__ + 2] += *da * dx[i__ + 2];
            dy[i__ + 3] += *da * dx[i__ + 3];
        }
    } else {
        ix = 1;
        iy = 1;
        if (*incx < 0) {
            ix = (-(*n) + 1) * *incx + 1;
        }
        if (*incy < 0) {
            iy = (-(*n) + 1) * *incy + 1;
        }
        i__1 = *n;
        for (i__ = 1; i__ <= i__1; ++i__) {
            dy[iy] += *da * dx[ix];
            ix += *incx;
            iy += *incy;
        }
    }
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/daxpy.f
+++ b/lib/linalg/daxpy.f
@ -1,152 +0,0 @@
 *> \brief \b DAXPY
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DAXPY(N,DA,DX,INCX,DY,INCY)
 *
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION DA
 *       INTEGER INCX,INCY,N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION DX(*),DY(*)
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *>    DAXPY constant times a vector plus a vector.
 *>    uses unrolled loops for increments equal to one.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>         number of elements in input vector(s)
 *> \endverbatim
 *>
 *> \param[in] DA
 *> \verbatim
 *>          DA is DOUBLE PRECISION
 *>           On entry, DA specifies the scalar alpha.
 *> \endverbatim
 *>
 *> \param[in] DX
 *> \verbatim
 *>          DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
 *> \endverbatim
 *>
 *> \param[in] INCX
 *> \verbatim
 *>          INCX is INTEGER
 *>         storage spacing between elements of DX
 *> \endverbatim
 *>
 *> \param[in,out] DY
 *> \verbatim
 *>          DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
 *> \endverbatim
 *>
 *> \param[in] INCY
 *> \verbatim
 *>          INCY is INTEGER
 *>         storage spacing between elements of DY
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup double_blas_level1
 *
 *> \par Further Details:
 *  =====================
 *>
 *> \verbatim
 *>
 *>     jack dongarra, linpack, 3/11/78.
 *>     modified 12/3/93, array(1) declarations changed to array(*)
 *> \endverbatim
 *>
 *  =====================================================================
      SUBROUTINE DAXPY(N,DA,DX,INCX,DY,INCY)
 *
 *  -- Reference BLAS level1 routine --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      DOUBLE PRECISION DA
      INTEGER INCX,INCY,N
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION DX(*),DY(*)
 *     ..
 *
 *  =====================================================================
 *
 *     .. Local Scalars ..
      INTEGER I,IX,IY,M,MP1
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC MOD
 *     ..
      IF (N.LE.0) RETURN
      IF (DA.EQ.0.0d0) RETURN
      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
 *
 *        code for both increments equal to 1
 *
 *
 *        clean-up loop
 *
         M = MOD(N,4)
         IF (M.NE.0) THEN
            DO I = 1,M
               DY(I) = DY(I) + DA*DX(I)
            END DO
         END IF
         IF (N.LT.4) RETURN
         MP1 = M + 1
         DO I = MP1,N,4
            DY(I) = DY(I) + DA*DX(I)
            DY(I+1) = DY(I+1) + DA*DX(I+1)
            DY(I+2) = DY(I+2) + DA*DX(I+2)
            DY(I+3) = DY(I+3) + DA*DX(I+3)
         END DO
      ELSE
 *
 *        code for unequal increments or equal increments
 *          not equal to 1
 *
         IX = 1
         IY = 1
         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
         DO I = 1,N
          DY(IY) = DY(IY) + DA*DX(IX)
          IX = IX + INCX
          IY = IY + INCY
         END DO
      END IF
      RETURN
 *
 *     End of DAXPY
 *
      END
--- a/lib/linalg/dbdsqr.cpp
+++ b/lib/linalg/dbdsqr.cpp
@ -0,0 +1,522 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 static doublereal c_b15 = -.125;
 static integer c__1 = 1;
 static doublereal c_b49 = 1.;
 static doublereal c_b72 = -1.;
 int dbdsqr_(char *uplo, integer *n, integer *ncvt, integer *nru, integer *ncc, doublereal *d__,
            doublereal *e, doublereal *vt, integer *ldvt, doublereal *u, integer *ldu,
            doublereal *c__, integer *ldc, doublereal *work, integer *info, ftnlen uplo_len)
 {
    integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2;
    doublereal d__1, d__2, d__3, d__4;
    double pow_lmp_dd(doublereal *, doublereal *), sqrt(doublereal), d_lmp_sign(doublereal *, doublereal *);
    integer iterdivn;
    doublereal f, g, h__;
    integer i__, j, m;
    doublereal r__;
    integer maxitdivn;
    doublereal cs;
    integer ll;
    doublereal sn, mu;
    integer nm1, nm12, nm13, lll;
    doublereal eps, sll, tol, abse;
    integer idir;
    doublereal abss;
    integer oldm;
    doublereal cosl;
    integer isub, iter;
    doublereal unfl, sinl, cosr, smin, smax, sinr;
    extern int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *,
                     doublereal *),
        dlas2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *),
        dscal_(integer *, doublereal *, doublereal *, integer *);
    extern logical lsame_(char *, char *, ftnlen, ftnlen);
    doublereal oldcs;
    extern int dlasr_(char *, char *, char *, integer *, integer *, doublereal *, doublereal *,
                      doublereal *, integer *, ftnlen, ftnlen, ftnlen);
    integer oldll;
    doublereal shift, sigmn, oldsn;
    extern int dswap_(integer *, doublereal *, integer *, doublereal *, integer *);
    doublereal sminl, sigmx;
    logical lower;
    extern int dlasq1_(integer *, doublereal *, doublereal *, doublereal *, integer *),
        dlasv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *,
                doublereal *, doublereal *, doublereal *);
    extern doublereal dlamch_(char *, ftnlen);
    extern int dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *),
        xerbla_(char *, integer *, ftnlen);
    doublereal sminoa, thresh;
    logical rotate;
    doublereal tolmul;
    --d__;
    --e;
    vt_dim1 = *ldvt;
    vt_offset = 1 + vt_dim1;
    vt -= vt_offset;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1;
    u -= u_offset;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1;
    c__ -= c_offset;
    --work;
    *info = 0;
    lower = lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1);
    if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lower) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*ncvt < 0) {
        *info = -3;
    } else if (*nru < 0) {
        *info = -4;
    } else if (*ncc < 0) {
        *info = -5;
    } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1, *n)) {
        *info = -9;
    } else if (*ldu < max(1, *nru)) {
        *info = -11;
    } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1, *n)) {
        *info = -13;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_((char *)"DBDSQR", &i__1, (ftnlen)6);
        return 0;
    }
    if (*n == 0) {
        return 0;
    }
    if (*n == 1) {
        goto L160;
    }
    rotate = *ncvt > 0 || *nru > 0 || *ncc > 0;
    if (!rotate) {
        dlasq1_(n, &d__[1], &e[1], &work[1], info);
        if (*info != 2) {
            return 0;
        }
        *info = 0;
    }
    nm1 = *n - 1;
    nm12 = nm1 + nm1;
    nm13 = nm12 + nm1;
    idir = 0;
    eps = dlamch_((char *)"Epsilon", (ftnlen)7);
    unfl = dlamch_((char *)"Safe minimum", (ftnlen)12);
    if (lower) {
        i__1 = *n - 1;
        for (i__ = 1; i__ <= i__1; ++i__) {
            dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
            d__[i__] = r__;
            e[i__] = sn * d__[i__ + 1];
            d__[i__ + 1] = cs * d__[i__ + 1];
            work[i__] = cs;
            work[nm1 + i__] = sn;
        }
        if (*nru > 0) {
            dlasr_((char *)"R", (char *)"V", (char *)"F", nru, n, &work[1], &work[*n], &u[u_offset], ldu, (ftnlen)1,
                   (ftnlen)1, (ftnlen)1);
        }
        if (*ncc > 0) {
            dlasr_((char *)"L", (char *)"V", (char *)"F", n, ncc, &work[1], &work[*n], &c__[c_offset], ldc, (ftnlen)1,
                   (ftnlen)1, (ftnlen)1);
        }
    }
    d__3 = 100., d__4 = pow_lmp_dd(&eps, &c_b15);
    d__1 = 10., d__2 = min(d__3, d__4);
    tolmul = max(d__1, d__2);
    tol = tolmul * eps;
    smax = 0.;
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
        d__2 = smax, d__3 = (d__1 = d__[i__], abs(d__1));
        smax = max(d__2, d__3);
    }
    i__1 = *n - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {
        d__2 = smax, d__3 = (d__1 = e[i__], abs(d__1));
        smax = max(d__2, d__3);
    }
    sminl = 0.;
    if (tol >= 0.) {
        sminoa = abs(d__[1]);
        if (sminoa == 0.) {
            goto L50;
        }
        mu = sminoa;
        i__1 = *n;
        for (i__ = 2; i__ <= i__1; ++i__) {
            mu = (d__2 = d__[i__], abs(d__2)) * (mu / (mu + (d__1 = e[i__ - 1], abs(d__1))));
            sminoa = min(sminoa, mu);
            if (sminoa == 0.) {
                goto L50;
            }
        }
    L50:
        sminoa /= sqrt((doublereal)(*n));
        d__1 = tol * sminoa, d__2 = *n * (*n * unfl) * 6;
        thresh = max(d__1, d__2);
    } else {
        d__1 = abs(tol) * smax, d__2 = *n * (*n * unfl) * 6;
        thresh = max(d__1, d__2);
    }
    maxitdivn = *n * 6;
    iterdivn = 0;
    iter = -1;
    oldll = -1;
    oldm = -1;
    m = *n;
 L60:
    if (m <= 1) {
        goto L160;
    }
    if (iter >= *n) {
        iter -= *n;
        ++iterdivn;
        if (iterdivn >= maxitdivn) {
            goto L200;
        }
    }
    if (tol < 0. && (d__1 = d__[m], abs(d__1)) <= thresh) {
        d__[m] = 0.;
    }
    smax = (d__1 = d__[m], abs(d__1));
    smin = smax;
    i__1 = m - 1;
    for (lll = 1; lll <= i__1; ++lll) {
        ll = m - lll;
        abss = (d__1 = d__[ll], abs(d__1));
        abse = (d__1 = e[ll], abs(d__1));
        if (tol < 0. && abss <= thresh) {
            d__[ll] = 0.;
        }
        if (abse <= thresh) {
            goto L80;
        }
        smin = min(smin, abss);
        d__1 = max(smax, abss);
        smax = max(d__1, abse);
    }
    ll = 0;
    goto L90;
 L80:
    e[ll] = 0.;
    if (ll == m - 1) {
        --m;
        goto L60;
    }
 L90:
    ++ll;
    if (ll == m - 1) {
        dlasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr, &sinl, &cosl);
        d__[m - 1] = sigmx;
        e[m - 1] = 0.;
        d__[m] = sigmn;
        if (*ncvt > 0) {
            drot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, &cosr, &sinr);
        }
        if (*nru > 0) {
            drot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], &c__1, &cosl, &sinl);
        }
        if (*ncc > 0) {
            drot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, &cosl, &sinl);
        }
        m += -2;
        goto L60;
    }
    if (ll > oldm || m < oldll) {
        if ((d__1 = d__[ll], abs(d__1)) >= (d__2 = d__[m], abs(d__2))) {
            idir = 1;
        } else {
            idir = 2;
        }
    }
    if (idir == 1) {
        if ((d__2 = e[m - 1], abs(d__2)) <= abs(tol) * (d__1 = d__[m], abs(d__1)) ||
            tol < 0. && (d__3 = e[m - 1], abs(d__3)) <= thresh) {
            e[m - 1] = 0.;
            goto L60;
        }
        if (tol >= 0.) {
            mu = (d__1 = d__[ll], abs(d__1));
            sminl = mu;
            i__1 = m - 1;
            for (lll = ll; lll <= i__1; ++lll) {
                if ((d__1 = e[lll], abs(d__1)) <= tol * mu) {
                    e[lll] = 0.;
                    goto L60;
                }
                mu = (d__2 = d__[lll + 1], abs(d__2)) * (mu / (mu + (d__1 = e[lll], abs(d__1))));
                sminl = min(sminl, mu);
            }
        }
    } else {
        if ((d__2 = e[ll], abs(d__2)) <= abs(tol) * (d__1 = d__[ll], abs(d__1)) ||
            tol < 0. && (d__3 = e[ll], abs(d__3)) <= thresh) {
            e[ll] = 0.;
            goto L60;
        }
        if (tol >= 0.) {
            mu = (d__1 = d__[m], abs(d__1));
            sminl = mu;
            i__1 = ll;
            for (lll = m - 1; lll >= i__1; --lll) {
                if ((d__1 = e[lll], abs(d__1)) <= tol * mu) {
                    e[lll] = 0.;
                    goto L60;
                }
                mu = (d__2 = d__[lll], abs(d__2)) * (mu / (mu + (d__1 = e[lll], abs(d__1))));
                sminl = min(sminl, mu);
            }
        }
    }
    oldll = ll;
    oldm = m;
    d__1 = eps, d__2 = tol * .01;
    if (tol >= 0. && *n * tol * (sminl / smax) <= max(d__1, d__2)) {
        shift = 0.;
    } else {
        if (idir == 1) {
            sll = (d__1 = d__[ll], abs(d__1));
            dlas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__);
        } else {
            sll = (d__1 = d__[m], abs(d__1));
            dlas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__);
        }
        if (sll > 0.) {
            d__1 = shift / sll;
            if (d__1 * d__1 < eps) {
                shift = 0.;
            }
        }
    }
    iter = iter + m - ll;
    if (shift == 0.) {
        if (idir == 1) {
            cs = 1.;
            oldcs = 1.;
            i__1 = m - 1;
            for (i__ = ll; i__ <= i__1; ++i__) {
                d__1 = d__[i__] * cs;
                dlartg_(&d__1, &e[i__], &cs, &sn, &r__);
                if (i__ > ll) {
                    e[i__ - 1] = oldsn * r__;
                }
                d__1 = oldcs * r__;
                d__2 = d__[i__ + 1] * sn;
                dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]);
                work[i__ - ll + 1] = cs;
                work[i__ - ll + 1 + nm1] = sn;
                work[i__ - ll + 1 + nm12] = oldcs;
                work[i__ - ll + 1 + nm13] = oldsn;
            }
            h__ = d__[m] * cs;
            d__[m] = h__ * oldcs;
            e[m - 1] = h__ * oldsn;
            if (*ncvt > 0) {
                i__1 = m - ll + 1;
                dlasr_((char *)"L", (char *)"V", (char *)"F", &i__1, ncvt, &work[1], &work[*n], &vt[ll + vt_dim1], ldvt,
                       (ftnlen)1, (ftnlen)1, (ftnlen)1);
            }
            if (*nru > 0) {
                i__1 = m - ll + 1;
                dlasr_((char *)"R", (char *)"V", (char *)"F", nru, &i__1, &work[nm12 + 1], &work[nm13 + 1],
                       &u[ll * u_dim1 + 1], ldu, (ftnlen)1, (ftnlen)1, (ftnlen)1);
            }
            if (*ncc > 0) {
                i__1 = m - ll + 1;
                dlasr_((char *)"L", (char *)"V", (char *)"F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + 1],
                       &c__[ll + c_dim1], ldc, (ftnlen)1, (ftnlen)1, (ftnlen)1);
            }
            if ((d__1 = e[m - 1], abs(d__1)) <= thresh) {
                e[m - 1] = 0.;
            }
        } else {
            cs = 1.;
            oldcs = 1.;
            i__1 = ll + 1;
            for (i__ = m; i__ >= i__1; --i__) {
                d__1 = d__[i__] * cs;
                dlartg_(&d__1, &e[i__ - 1], &cs, &sn, &r__);
                if (i__ < m) {
                    e[i__] = oldsn * r__;
                }
                d__1 = oldcs * r__;
                d__2 = d__[i__ - 1] * sn;
                dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]);
                work[i__ - ll] = cs;
                work[i__ - ll + nm1] = -sn;
                work[i__ - ll + nm12] = oldcs;
                work[i__ - ll + nm13] = -oldsn;
            }
            h__ = d__[ll] * cs;
            d__[ll] = h__ * oldcs;
            e[ll] = h__ * oldsn;
            if (*ncvt > 0) {
                i__1 = m - ll + 1;
                dlasr_((char *)"L", (char *)"V", (char *)"B", &i__1, ncvt, &work[nm12 + 1], &work[nm13 + 1],
                       &vt[ll + vt_dim1], ldvt, (ftnlen)1, (ftnlen)1, (ftnlen)1);
            }
            if (*nru > 0) {
                i__1 = m - ll + 1;
                dlasr_((char *)"R", (char *)"V", (char *)"B", nru, &i__1, &work[1], &work[*n], &u[ll * u_dim1 + 1], ldu,
                       (ftnlen)1, (ftnlen)1, (ftnlen)1);
            }
            if (*ncc > 0) {
                i__1 = m - ll + 1;
                dlasr_((char *)"L", (char *)"V", (char *)"B", &i__1, ncc, &work[1], &work[*n], &c__[ll + c_dim1], ldc,
                       (ftnlen)1, (ftnlen)1, (ftnlen)1);
            }
            if ((d__1 = e[ll], abs(d__1)) <= thresh) {
                e[ll] = 0.;
            }
        }
    } else {
        if (idir == 1) {
            f = ((d__1 = d__[ll], abs(d__1)) - shift) *
                (d_lmp_sign(&c_b49, &d__[ll]) + shift / d__[ll]);
            g = e[ll];
            i__1 = m - 1;
            for (i__ = ll; i__ <= i__1; ++i__) {
                dlartg_(&f, &g, &cosr, &sinr, &r__);
                if (i__ > ll) {
                    e[i__ - 1] = r__;
                }
                f = cosr * d__[i__] + sinr * e[i__];
                e[i__] = cosr * e[i__] - sinr * d__[i__];
                g = sinr * d__[i__ + 1];
                d__[i__ + 1] = cosr * d__[i__ + 1];
                dlartg_(&f, &g, &cosl, &sinl, &r__);
                d__[i__] = r__;
                f = cosl * e[i__] + sinl * d__[i__ + 1];
                d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__];
                if (i__ < m - 1) {
                    g = sinl * e[i__ + 1];
                    e[i__ + 1] = cosl * e[i__ + 1];
                }
                work[i__ - ll + 1] = cosr;
                work[i__ - ll + 1 + nm1] = sinr;
                work[i__ - ll + 1 + nm12] = cosl;
                work[i__ - ll + 1 + nm13] = sinl;
            }
            e[m - 1] = f;
            if (*ncvt > 0) {
                i__1 = m - ll + 1;
                dlasr_((char *)"L", (char *)"V", (char *)"F", &i__1, ncvt, &work[1], &work[*n], &vt[ll + vt_dim1], ldvt,
                       (ftnlen)1, (ftnlen)1, (ftnlen)1);
            }
            if (*nru > 0) {
                i__1 = m - ll + 1;
                dlasr_((char *)"R", (char *)"V", (char *)"F", nru, &i__1, &work[nm12 + 1], &work[nm13 + 1],
                       &u[ll * u_dim1 + 1], ldu, (ftnlen)1, (ftnlen)1, (ftnlen)1);
            }
            if (*ncc > 0) {
                i__1 = m - ll + 1;
                dlasr_((char *)"L", (char *)"V", (char *)"F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + 1],
                       &c__[ll + c_dim1], ldc, (ftnlen)1, (ftnlen)1, (ftnlen)1);
            }
            if ((d__1 = e[m - 1], abs(d__1)) <= thresh) {
                e[m - 1] = 0.;
            }
        } else {
            f = ((d__1 = d__[m], abs(d__1)) - shift) * (d_lmp_sign(&c_b49, &d__[m]) + shift / d__[m]);
            g = e[m - 1];
            i__1 = ll + 1;
            for (i__ = m; i__ >= i__1; --i__) {
                dlartg_(&f, &g, &cosr, &sinr, &r__);
                if (i__ < m) {
                    e[i__] = r__;
                }
                f = cosr * d__[i__] + sinr * e[i__ - 1];
                e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__];
                g = sinr * d__[i__ - 1];
                d__[i__ - 1] = cosr * d__[i__ - 1];
                dlartg_(&f, &g, &cosl, &sinl, &r__);
                d__[i__] = r__;
                f = cosl * e[i__ - 1] + sinl * d__[i__ - 1];
                d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1];
                if (i__ > ll + 1) {
                    g = sinl * e[i__ - 2];
                    e[i__ - 2] = cosl * e[i__ - 2];
                }
                work[i__ - ll] = cosr;
                work[i__ - ll + nm1] = -sinr;
                work[i__ - ll + nm12] = cosl;
                work[i__ - ll + nm13] = -sinl;
            }
            e[ll] = f;
            if ((d__1 = e[ll], abs(d__1)) <= thresh) {
                e[ll] = 0.;
            }
            if (*ncvt > 0) {
                i__1 = m - ll + 1;
                dlasr_((char *)"L", (char *)"V", (char *)"B", &i__1, ncvt, &work[nm12 + 1], &work[nm13 + 1],
                       &vt[ll + vt_dim1], ldvt, (ftnlen)1, (ftnlen)1, (ftnlen)1);
            }
            if (*nru > 0) {
                i__1 = m - ll + 1;
                dlasr_((char *)"R", (char *)"V", (char *)"B", nru, &i__1, &work[1], &work[*n], &u[ll * u_dim1 + 1], ldu,
                       (ftnlen)1, (ftnlen)1, (ftnlen)1);
            }
            if (*ncc > 0) {
                i__1 = m - ll + 1;
                dlasr_((char *)"L", (char *)"V", (char *)"B", &i__1, ncc, &work[1], &work[*n], &c__[ll + c_dim1], ldc,
                       (ftnlen)1, (ftnlen)1, (ftnlen)1);
            }
        }
    }
    goto L60;
 L160:
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
        if (d__[i__] < 0.) {
            d__[i__] = -d__[i__];
            if (*ncvt > 0) {
                dscal_(ncvt, &c_b72, &vt[i__ + vt_dim1], ldvt);
            }
        }
    }
    i__1 = *n - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {
        isub = 1;
        smin = d__[1];
        i__2 = *n + 1 - i__;
        for (j = 2; j <= i__2; ++j) {
            if (d__[j] <= smin) {
                isub = j;
                smin = d__[j];
            }
        }
        if (isub != *n + 1 - i__) {
            d__[isub] = d__[*n + 1 - i__];
            d__[*n + 1 - i__] = smin;
            if (*ncvt > 0) {
                dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ + vt_dim1], ldvt);
            }
            if (*nru > 0) {
                dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) * u_dim1 + 1], &c__1);
            }
            if (*ncc > 0) {
                dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ + c_dim1], ldc);
            }
        }
    }
    goto L220;
 L200:
    *info = 0;
    i__1 = *n - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {
        if (e[i__] != 0.) {
            ++(*info);
        }
    }
 L220:
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dbdsqr.f
+++ b/lib/linalg/dbdsqr.f
@ -1,864 +0,0 @@
 *> \brief \b DBDSQR
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DBDSQR + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dbdsqr.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dbdsqr.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dbdsqr.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U,
 *                          LDU, C, LDC, WORK, INFO )
 *
 *       .. Scalar Arguments ..
 *       CHARACTER          UPLO
 *       INTEGER            INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   C( LDC, * ), D( * ), E( * ), U( LDU, * ),
 *      $                   VT( LDVT, * ), WORK( * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DBDSQR computes the singular values and, optionally, the right and/or
 *> left singular vectors from the singular value decomposition (SVD) of
 *> a real N-by-N (upper or lower) bidiagonal matrix B using the implicit
 *> zero-shift QR algorithm.  The SVD of B has the form
 *>
 *>    B = Q * S * P**T
 *>
 *> where S is the diagonal matrix of singular values, Q is an orthogonal
 *> matrix of left singular vectors, and P is an orthogonal matrix of
 *> right singular vectors.  If left singular vectors are requested, this
 *> subroutine actually returns U*Q instead of Q, and, if right singular
 *> vectors are requested, this subroutine returns P**T*VT instead of
 *> P**T, for given real input matrices U and VT.  When U and VT are the
 *> orthogonal matrices that reduce a general matrix A to bidiagonal
 *> form:  A = U*B*VT, as computed by DGEBRD, then
 *>
 *>    A = (U*Q) * S * (P**T*VT)
 *>
 *> is the SVD of A.  Optionally, the subroutine may also compute Q**T*C
 *> for a given real input matrix C.
 *>
 *> See "Computing  Small Singular Values of Bidiagonal Matrices With
 *> Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
 *> LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11,
 *> no. 5, pp. 873-912, Sept 1990) and
 *> "Accurate singular values and differential qd algorithms," by
 *> B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics
 *> Department, University of California at Berkeley, July 1992
 *> for a detailed description of the algorithm.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] UPLO
 *> \verbatim
 *>          UPLO is CHARACTER*1
 *>          = 'U':  B is upper bidiagonal;
 *>          = 'L':  B is lower bidiagonal.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The order of the matrix B.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in] NCVT
 *> \verbatim
 *>          NCVT is INTEGER
 *>          The number of columns of the matrix VT. NCVT >= 0.
 *> \endverbatim
 *>
 *> \param[in] NRU
 *> \verbatim
 *>          NRU is INTEGER
 *>          The number of rows of the matrix U. NRU >= 0.
 *> \endverbatim
 *>
 *> \param[in] NCC
 *> \verbatim
 *>          NCC is INTEGER
 *>          The number of columns of the matrix C. NCC >= 0.
 *> \endverbatim
 *>
 *> \param[in,out] D
 *> \verbatim
 *>          D is DOUBLE PRECISION array, dimension (N)
 *>          On entry, the n diagonal elements of the bidiagonal matrix B.
 *>          On exit, if INFO=0, the singular values of B in decreasing
 *>          order.
 *> \endverbatim
 *>
 *> \param[in,out] E
 *> \verbatim
 *>          E is DOUBLE PRECISION array, dimension (N-1)
 *>          On entry, the N-1 offdiagonal elements of the bidiagonal
 *>          matrix B.
 *>          On exit, if INFO = 0, E is destroyed; if INFO > 0, D and E
 *>          will contain the diagonal and superdiagonal elements of a
 *>          bidiagonal matrix orthogonally equivalent to the one given
 *>          as input.
 *> \endverbatim
 *>
 *> \param[in,out] VT
 *> \verbatim
 *>          VT is DOUBLE PRECISION array, dimension (LDVT, NCVT)
 *>          On entry, an N-by-NCVT matrix VT.
 *>          On exit, VT is overwritten by P**T * VT.
 *>          Not referenced if NCVT = 0.
 *> \endverbatim
 *>
 *> \param[in] LDVT
 *> \verbatim
 *>          LDVT is INTEGER
 *>          The leading dimension of the array VT.
 *>          LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0.
 *> \endverbatim
 *>
 *> \param[in,out] U
 *> \verbatim
 *>          U is DOUBLE PRECISION array, dimension (LDU, N)
 *>          On entry, an NRU-by-N matrix U.
 *>          On exit, U is overwritten by U * Q.
 *>          Not referenced if NRU = 0.
 *> \endverbatim
 *>
 *> \param[in] LDU
 *> \verbatim
 *>          LDU is INTEGER
 *>          The leading dimension of the array U.  LDU >= max(1,NRU).
 *> \endverbatim
 *>
 *> \param[in,out] C
 *> \verbatim
 *>          C is DOUBLE PRECISION array, dimension (LDC, NCC)
 *>          On entry, an N-by-NCC matrix C.
 *>          On exit, C is overwritten by Q**T * C.
 *>          Not referenced if NCC = 0.
 *> \endverbatim
 *>
 *> \param[in] LDC
 *> \verbatim
 *>          LDC is INTEGER
 *>          The leading dimension of the array C.
 *>          LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0.
 *> \endverbatim
 *>
 *> \param[out] WORK
 *> \verbatim
 *>          WORK is DOUBLE PRECISION array, dimension (4*(N-1))
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0:  successful exit
 *>          < 0:  If INFO = -i, the i-th argument had an illegal value
 *>          > 0:
 *>             if NCVT = NRU = NCC = 0,
 *>                = 1, a split was marked by a positive value in E
 *>                = 2, current block of Z not diagonalized after 30*N
 *>                     iterations (in inner while loop)
 *>                = 3, termination criterion of outer while loop not met
 *>                     (program created more than N unreduced blocks)
 *>             else NCVT = NRU = NCC = 0,
 *>                   the algorithm did not converge; D and E contain the
 *>                   elements of a bidiagonal matrix which is orthogonally
 *>                   similar to the input matrix B;  if INFO = i, i
 *>                   elements of E have not converged to zero.
 *> \endverbatim
 *
 *> \par Internal Parameters:
 *  =========================
 *>
 *> \verbatim
 *>  TOLMUL  DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8)))
 *>          TOLMUL controls the convergence criterion of the QR loop.
 *>          If it is positive, TOLMUL*EPS is the desired relative
 *>             precision in the computed singular values.
 *>          If it is negative, abs(TOLMUL*EPS*sigma_max) is the
 *>             desired absolute accuracy in the computed singular
 *>             values (corresponds to relative accuracy
 *>             abs(TOLMUL*EPS) in the largest singular value.
 *>          abs(TOLMUL) should be between 1 and 1/EPS, and preferably
 *>             between 10 (for fast convergence) and .1/EPS
 *>             (for there to be some accuracy in the results).
 *>          Default is to lose at either one eighth or 2 of the
 *>             available decimal digits in each computed singular value
 *>             (whichever is smaller).
 *>
 *>  MAXITR  INTEGER, default = 6
 *>          MAXITR controls the maximum number of passes of the
 *>          algorithm through its inner loop. The algorithms stops
 *>          (and so fails to converge) if the number of passes
 *>          through the inner loop exceeds MAXITR*N**2.
 *>
 *> \endverbatim
 *
 *> \par Note:
 *  ===========
 *>
 *> \verbatim
 *>  Bug report from Cezary Dendek.
 *>  On March 23rd 2017, the INTEGER variable MAXIT = MAXITR*N**2 is
 *>  removed since it can overflow pretty easily (for N larger or equal
 *>  than 18,919). We instead use MAXITDIVN = MAXITR*N.
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup auxOTHERcomputational
 *
 *  =====================================================================
      SUBROUTINE DBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U,
     $                   LDU, C, LDC, WORK, INFO )
 *
 *  -- LAPACK computational routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      CHARACTER          UPLO
      INTEGER            INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION   C( LDC, * ), D( * ), E( * ), U( LDU, * ),
     $                   VT( LDVT, * ), WORK( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ZERO
      PARAMETER          ( ZERO = 0.0D0 )
      DOUBLE PRECISION   ONE
      PARAMETER          ( ONE = 1.0D0 )
      DOUBLE PRECISION   NEGONE
      PARAMETER          ( NEGONE = -1.0D0 )
      DOUBLE PRECISION   HNDRTH
      PARAMETER          ( HNDRTH = 0.01D0 )
      DOUBLE PRECISION   TEN
      PARAMETER          ( TEN = 10.0D0 )
      DOUBLE PRECISION   HNDRD
      PARAMETER          ( HNDRD = 100.0D0 )
      DOUBLE PRECISION   MEIGTH
      PARAMETER          ( MEIGTH = -0.125D0 )
      INTEGER            MAXITR
      PARAMETER          ( MAXITR = 6 )
 *     ..
 *     .. Local Scalars ..
      LOGICAL            LOWER, ROTATE
      INTEGER            I, IDIR, ISUB, ITER, ITERDIVN, J, LL, LLL, M,
     $                   MAXITDIVN, NM1, NM12, NM13, OLDLL, OLDM
      DOUBLE PRECISION   ABSE, ABSS, COSL, COSR, CS, EPS, F, G, H, MU,
     $                   OLDCS, OLDSN, R, SHIFT, SIGMN, SIGMX, SINL,
     $                   SINR, SLL, SMAX, SMIN, SMINL, SMINOA,
     $                   SN, THRESH, TOL, TOLMUL, UNFL
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME
      DOUBLE PRECISION   DLAMCH
      EXTERNAL           LSAME, DLAMCH
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLARTG, DLAS2, DLASQ1, DLASR, DLASV2, DROT,
     $                   DSCAL, DSWAP, XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, DBLE, MAX, MIN, SIGN, SQRT
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input parameters.
 *
      INFO = 0
      LOWER = LSAME( UPLO, 'L' )
      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LOWER ) THEN
         INFO = -1
      ELSE IF( N.LT.0 ) THEN
         INFO = -2
      ELSE IF( NCVT.LT.0 ) THEN
         INFO = -3
      ELSE IF( NRU.LT.0 ) THEN
         INFO = -4
      ELSE IF( NCC.LT.0 ) THEN
         INFO = -5
      ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR.
     $         ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN
         INFO = -9
      ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN
         INFO = -11
      ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR.
     $         ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN
         INFO = -13
      END IF
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'DBDSQR', -INFO )
         RETURN
      END IF
      IF( N.EQ.0 )
     $   RETURN
      IF( N.EQ.1 )
     $   GO TO 160
 *
 *     ROTATE is true if any singular vectors desired, false otherwise
 *
      ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 )
 *
 *     If no singular vectors desired, use qd algorithm
 *
      IF( .NOT.ROTATE ) THEN
         CALL DLASQ1( N, D, E, WORK, INFO )
 *
 *     If INFO equals 2, dqds didn't finish, try to finish
 *
         IF( INFO .NE. 2 ) RETURN
         INFO = 0
      END IF
 *
      NM1 = N - 1
      NM12 = NM1 + NM1
      NM13 = NM12 + NM1
      IDIR = 0
 *
 *     Get machine constants
 *
      EPS = DLAMCH( 'Epsilon' )
      UNFL = DLAMCH( 'Safe minimum' )
 *
 *     If matrix lower bidiagonal, rotate to be upper bidiagonal
 *     by applying Givens rotations on the left
 *
      IF( LOWER ) THEN
         DO 10 I = 1, N - 1
            CALL DLARTG( D( I ), E( I ), CS, SN, R )
            D( I ) = R
            E( I ) = SN*D( I+1 )
            D( I+1 ) = CS*D( I+1 )
            WORK( I ) = CS
            WORK( NM1+I ) = SN
   10    CONTINUE
 *
 *        Update singular vectors if desired
 *
         IF( NRU.GT.0 )
     $      CALL DLASR( 'R', 'V', 'F', NRU, N, WORK( 1 ), WORK( N ), U,
     $                  LDU )
         IF( NCC.GT.0 )
     $      CALL DLASR( 'L', 'V', 'F', N, NCC, WORK( 1 ), WORK( N ), C,
     $                  LDC )
      END IF
 *
 *     Compute singular values to relative accuracy TOL
 *     (By setting TOL to be negative, algorithm will compute
 *     singular values to absolute accuracy ABS(TOL)*norm(input matrix))
 *
      TOLMUL = MAX( TEN, MIN( HNDRD, EPS**MEIGTH ) )
      TOL = TOLMUL*EPS
 *
 *     Compute approximate maximum, minimum singular values
 *
      SMAX = ZERO
      DO 20 I = 1, N
         SMAX = MAX( SMAX, ABS( D( I ) ) )
   20 CONTINUE
      DO 30 I = 1, N - 1
         SMAX = MAX( SMAX, ABS( E( I ) ) )
   30 CONTINUE
      SMINL = ZERO
      IF( TOL.GE.ZERO ) THEN
 *
 *        Relative accuracy desired
 *
         SMINOA = ABS( D( 1 ) )
         IF( SMINOA.EQ.ZERO )
     $      GO TO 50
         MU = SMINOA
         DO 40 I = 2, N
            MU = ABS( D( I ) )*( MU / ( MU+ABS( E( I-1 ) ) ) )
            SMINOA = MIN( SMINOA, MU )
            IF( SMINOA.EQ.ZERO )
     $         GO TO 50
   40    CONTINUE
   50    CONTINUE
         SMINOA = SMINOA / SQRT( DBLE( N ) )
         THRESH = MAX( TOL*SMINOA, MAXITR*(N*(N*UNFL)) )
      ELSE
 *
 *        Absolute accuracy desired
 *
         THRESH = MAX( ABS( TOL )*SMAX, MAXITR*(N*(N*UNFL)) )
      END IF
 *
 *     Prepare for main iteration loop for the singular values
 *     (MAXIT is the maximum number of passes through the inner
 *     loop permitted before nonconvergence signalled.)
 *
      MAXITDIVN = MAXITR*N
      ITERDIVN = 0
      ITER = -1
      OLDLL = -1
      OLDM = -1
 *
 *     M points to last element of unconverged part of matrix
 *
      M = N
 *
 *     Begin main iteration loop
 *
   60 CONTINUE
 *
 *     Check for convergence or exceeding iteration count
 *
      IF( M.LE.1 )
     $   GO TO 160
 *
      IF( ITER.GE.N ) THEN
         ITER = ITER - N
         ITERDIVN = ITERDIVN + 1
         IF( ITERDIVN.GE.MAXITDIVN )
     $      GO TO 200
      END IF
 *
 *     Find diagonal block of matrix to work on
 *
      IF( TOL.LT.ZERO .AND. ABS( D( M ) ).LE.THRESH )
     $   D( M ) = ZERO
      SMAX = ABS( D( M ) )
      SMIN = SMAX
      DO 70 LLL = 1, M - 1
         LL = M - LLL
         ABSS = ABS( D( LL ) )
         ABSE = ABS( E( LL ) )
         IF( TOL.LT.ZERO .AND. ABSS.LE.THRESH )
     $      D( LL ) = ZERO
         IF( ABSE.LE.THRESH )
     $      GO TO 80
         SMIN = MIN( SMIN, ABSS )
         SMAX = MAX( SMAX, ABSS, ABSE )
   70 CONTINUE
      LL = 0
      GO TO 90
   80 CONTINUE
      E( LL ) = ZERO
 *
 *     Matrix splits since E(LL) = 0
 *
      IF( LL.EQ.M-1 ) THEN
 *
 *        Convergence of bottom singular value, return to top of loop
 *
         M = M - 1
         GO TO 60
      END IF
   90 CONTINUE
      LL = LL + 1
 *
 *     E(LL) through E(M-1) are nonzero, E(LL-1) is zero
 *
      IF( LL.EQ.M-1 ) THEN
 *
 *        2 by 2 block, handle separately
 *
         CALL DLASV2( D( M-1 ), E( M-1 ), D( M ), SIGMN, SIGMX, SINR,
     $                COSR, SINL, COSL )
         D( M-1 ) = SIGMX
         E( M-1 ) = ZERO
         D( M ) = SIGMN
 *
 *        Compute singular vectors, if desired
 *
         IF( NCVT.GT.0 )
     $      CALL DROT( NCVT, VT( M-1, 1 ), LDVT, VT( M, 1 ), LDVT, COSR,
     $                 SINR )
         IF( NRU.GT.0 )
     $      CALL DROT( NRU, U( 1, M-1 ), 1, U( 1, M ), 1, COSL, SINL )
         IF( NCC.GT.0 )
     $      CALL DROT( NCC, C( M-1, 1 ), LDC, C( M, 1 ), LDC, COSL,
     $                 SINL )
         M = M - 2
         GO TO 60
      END IF
 *
 *     If working on new submatrix, choose shift direction
 *     (from larger end diagonal element towards smaller)
 *
      IF( LL.GT.OLDM .OR. M.LT.OLDLL ) THEN
         IF( ABS( D( LL ) ).GE.ABS( D( M ) ) ) THEN
 *
 *           Chase bulge from top (big end) to bottom (small end)
 *
            IDIR = 1
         ELSE
 *
 *           Chase bulge from bottom (big end) to top (small end)
 *
            IDIR = 2
         END IF
      END IF
 *
 *     Apply convergence tests
 *
      IF( IDIR.EQ.1 ) THEN
 *
 *        Run convergence test in forward direction
 *        First apply standard test to bottom of matrix
 *
         IF( ABS( E( M-1 ) ).LE.ABS( TOL )*ABS( D( M ) ) .OR.
     $       ( TOL.LT.ZERO .AND. ABS( E( M-1 ) ).LE.THRESH ) ) THEN
            E( M-1 ) = ZERO
            GO TO 60
         END IF
 *
         IF( TOL.GE.ZERO ) THEN
 *
 *           If relative accuracy desired,
 *           apply convergence criterion forward
 *
            MU = ABS( D( LL ) )
            SMINL = MU
            DO 100 LLL = LL, M - 1
               IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN
                  E( LLL ) = ZERO
                  GO TO 60
               END IF
               MU = ABS( D( LLL+1 ) )*( MU / ( MU+ABS( E( LLL ) ) ) )
               SMINL = MIN( SMINL, MU )
  100       CONTINUE
         END IF
 *
      ELSE
 *
 *        Run convergence test in backward direction
 *        First apply standard test to top of matrix
 *
         IF( ABS( E( LL ) ).LE.ABS( TOL )*ABS( D( LL ) ) .OR.
     $       ( TOL.LT.ZERO .AND. ABS( E( LL ) ).LE.THRESH ) ) THEN
            E( LL ) = ZERO
            GO TO 60
         END IF
 *
         IF( TOL.GE.ZERO ) THEN
 *
 *           If relative accuracy desired,
 *           apply convergence criterion backward
 *
            MU = ABS( D( M ) )
            SMINL = MU
            DO 110 LLL = M - 1, LL, -1
               IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN
                  E( LLL ) = ZERO
                  GO TO 60
               END IF
               MU = ABS( D( LLL ) )*( MU / ( MU+ABS( E( LLL ) ) ) )
               SMINL = MIN( SMINL, MU )
  110       CONTINUE
         END IF
      END IF
      OLDLL = LL
      OLDM = M
 *
 *     Compute shift.  First, test if shifting would ruin relative
 *     accuracy, and if so set the shift to zero.
 *
      IF( TOL.GE.ZERO .AND. N*TOL*( SMINL / SMAX ).LE.
     $    MAX( EPS, HNDRTH*TOL ) ) THEN
 *
 *        Use a zero shift to avoid loss of relative accuracy
 *
         SHIFT = ZERO
      ELSE
 *
 *        Compute the shift from 2-by-2 block at end of matrix
 *
         IF( IDIR.EQ.1 ) THEN
            SLL = ABS( D( LL ) )
            CALL DLAS2( D( M-1 ), E( M-1 ), D( M ), SHIFT, R )
         ELSE
            SLL = ABS( D( M ) )
            CALL DLAS2( D( LL ), E( LL ), D( LL+1 ), SHIFT, R )
         END IF
 *
 *        Test if shift negligible, and if so set to zero
 *
         IF( SLL.GT.ZERO ) THEN
            IF( ( SHIFT / SLL )**2.LT.EPS )
     $         SHIFT = ZERO
         END IF
      END IF
 *
 *     Increment iteration count
 *
      ITER = ITER + M - LL
 *
 *     If SHIFT = 0, do simplified QR iteration
 *
      IF( SHIFT.EQ.ZERO ) THEN
         IF( IDIR.EQ.1 ) THEN
 *
 *           Chase bulge from top to bottom
 *           Save cosines and sines for later singular vector updates
 *
            CS = ONE
            OLDCS = ONE
            DO 120 I = LL, M - 1
               CALL DLARTG( D( I )*CS, E( I ), CS, SN, R )
               IF( I.GT.LL )
     $            E( I-1 ) = OLDSN*R
               CALL DLARTG( OLDCS*R, D( I+1 )*SN, OLDCS, OLDSN, D( I ) )
               WORK( I-LL+1 ) = CS
               WORK( I-LL+1+NM1 ) = SN
               WORK( I-LL+1+NM12 ) = OLDCS
               WORK( I-LL+1+NM13 ) = OLDSN
  120       CONTINUE
            H = D( M )*CS
            D( M ) = H*OLDCS
            E( M-1 ) = H*OLDSN
 *
 *           Update singular vectors
 *
            IF( NCVT.GT.0 )
     $         CALL DLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ),
     $                     WORK( N ), VT( LL, 1 ), LDVT )
            IF( NRU.GT.0 )
     $         CALL DLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ),
     $                     WORK( NM13+1 ), U( 1, LL ), LDU )
            IF( NCC.GT.0 )
     $         CALL DLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ),
     $                     WORK( NM13+1 ), C( LL, 1 ), LDC )
 *
 *           Test convergence
 *
            IF( ABS( E( M-1 ) ).LE.THRESH )
     $         E( M-1 ) = ZERO
 *
         ELSE
 *
 *           Chase bulge from bottom to top
 *           Save cosines and sines for later singular vector updates
 *
            CS = ONE
            OLDCS = ONE
            DO 130 I = M, LL + 1, -1
               CALL DLARTG( D( I )*CS, E( I-1 ), CS, SN, R )
               IF( I.LT.M )
     $            E( I ) = OLDSN*R
               CALL DLARTG( OLDCS*R, D( I-1 )*SN, OLDCS, OLDSN, D( I ) )
               WORK( I-LL ) = CS
               WORK( I-LL+NM1 ) = -SN
               WORK( I-LL+NM12 ) = OLDCS
               WORK( I-LL+NM13 ) = -OLDSN
  130       CONTINUE
            H = D( LL )*CS
            D( LL ) = H*OLDCS
            E( LL ) = H*OLDSN
 *
 *           Update singular vectors
 *
            IF( NCVT.GT.0 )
     $         CALL DLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ),
     $                     WORK( NM13+1 ), VT( LL, 1 ), LDVT )
            IF( NRU.GT.0 )
     $         CALL DLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ),
     $                     WORK( N ), U( 1, LL ), LDU )
            IF( NCC.GT.0 )
     $         CALL DLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ),
     $                     WORK( N ), C( LL, 1 ), LDC )
 *
 *           Test convergence
 *
            IF( ABS( E( LL ) ).LE.THRESH )
     $         E( LL ) = ZERO
         END IF
      ELSE
 *
 *        Use nonzero shift
 *
         IF( IDIR.EQ.1 ) THEN
 *
 *           Chase bulge from top to bottom
 *           Save cosines and sines for later singular vector updates
 *
            F = ( ABS( D( LL ) )-SHIFT )*
     $          ( SIGN( ONE, D( LL ) )+SHIFT / D( LL ) )
            G = E( LL )
            DO 140 I = LL, M - 1
               CALL DLARTG( F, G, COSR, SINR, R )
               IF( I.GT.LL )
     $            E( I-1 ) = R
               F = COSR*D( I ) + SINR*E( I )
               E( I ) = COSR*E( I ) - SINR*D( I )
               G = SINR*D( I+1 )
               D( I+1 ) = COSR*D( I+1 )
               CALL DLARTG( F, G, COSL, SINL, R )
               D( I ) = R
               F = COSL*E( I ) + SINL*D( I+1 )
               D( I+1 ) = COSL*D( I+1 ) - SINL*E( I )
               IF( I.LT.M-1 ) THEN
                  G = SINL*E( I+1 )
                  E( I+1 ) = COSL*E( I+1 )
               END IF
               WORK( I-LL+1 ) = COSR
               WORK( I-LL+1+NM1 ) = SINR
               WORK( I-LL+1+NM12 ) = COSL
               WORK( I-LL+1+NM13 ) = SINL
  140       CONTINUE
            E( M-1 ) = F
 *
 *           Update singular vectors
 *
            IF( NCVT.GT.0 )
     $         CALL DLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ),
     $                     WORK( N ), VT( LL, 1 ), LDVT )
            IF( NRU.GT.0 )
     $         CALL DLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ),
     $                     WORK( NM13+1 ), U( 1, LL ), LDU )
            IF( NCC.GT.0 )
     $         CALL DLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ),
     $                     WORK( NM13+1 ), C( LL, 1 ), LDC )
 *
 *           Test convergence
 *
            IF( ABS( E( M-1 ) ).LE.THRESH )
     $         E( M-1 ) = ZERO
 *
         ELSE
 *
 *           Chase bulge from bottom to top
 *           Save cosines and sines for later singular vector updates
 *
            F = ( ABS( D( M ) )-SHIFT )*( SIGN( ONE, D( M ) )+SHIFT /
     $          D( M ) )
            G = E( M-1 )
            DO 150 I = M, LL + 1, -1
               CALL DLARTG( F, G, COSR, SINR, R )
               IF( I.LT.M )
     $            E( I ) = R
               F = COSR*D( I ) + SINR*E( I-1 )
               E( I-1 ) = COSR*E( I-1 ) - SINR*D( I )
               G = SINR*D( I-1 )
               D( I-1 ) = COSR*D( I-1 )
               CALL DLARTG( F, G, COSL, SINL, R )
               D( I ) = R
               F = COSL*E( I-1 ) + SINL*D( I-1 )
               D( I-1 ) = COSL*D( I-1 ) - SINL*E( I-1 )
               IF( I.GT.LL+1 ) THEN
                  G = SINL*E( I-2 )
                  E( I-2 ) = COSL*E( I-2 )
               END IF
               WORK( I-LL ) = COSR
               WORK( I-LL+NM1 ) = -SINR
               WORK( I-LL+NM12 ) = COSL
               WORK( I-LL+NM13 ) = -SINL
  150       CONTINUE
            E( LL ) = F
 *
 *           Test convergence
 *
            IF( ABS( E( LL ) ).LE.THRESH )
     $         E( LL ) = ZERO
 *
 *           Update singular vectors if desired
 *
            IF( NCVT.GT.0 )
     $         CALL DLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ),
     $                     WORK( NM13+1 ), VT( LL, 1 ), LDVT )
            IF( NRU.GT.0 )
     $         CALL DLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ),
     $                     WORK( N ), U( 1, LL ), LDU )
            IF( NCC.GT.0 )
     $         CALL DLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ),
     $                     WORK( N ), C( LL, 1 ), LDC )
         END IF
      END IF
 *
 *     QR iteration finished, go back and check convergence
 *
      GO TO 60
 *
 *     All singular values converged, so make them positive
 *
  160 CONTINUE
      DO 170 I = 1, N
         IF( D( I ).LT.ZERO ) THEN
            D( I ) = -D( I )
 *
 *           Change sign of singular vectors, if desired
 *
            IF( NCVT.GT.0 )
     $         CALL DSCAL( NCVT, NEGONE, VT( I, 1 ), LDVT )
         END IF
  170 CONTINUE
 *
 *     Sort the singular values into decreasing order (insertion sort on
 *     singular values, but only one transposition per singular vector)
 *
      DO 190 I = 1, N - 1
 *
 *        Scan for smallest D(I)
 *
         ISUB = 1
         SMIN = D( 1 )
         DO 180 J = 2, N + 1 - I
            IF( D( J ).LE.SMIN ) THEN
               ISUB = J
               SMIN = D( J )
            END IF
  180    CONTINUE
         IF( ISUB.NE.N+1-I ) THEN
 *
 *           Swap singular values and vectors
 *
            D( ISUB ) = D( N+1-I )
            D( N+1-I ) = SMIN
            IF( NCVT.GT.0 )
     $         CALL DSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( N+1-I, 1 ),
     $                     LDVT )
            IF( NRU.GT.0 )
     $         CALL DSWAP( NRU, U( 1, ISUB ), 1, U( 1, N+1-I ), 1 )
            IF( NCC.GT.0 )
     $         CALL DSWAP( NCC, C( ISUB, 1 ), LDC, C( N+1-I, 1 ), LDC )
         END IF
  190 CONTINUE
      GO TO 220
 *
 *     Maximum number of iterations exceeded, failure to converge
 *
  200 CONTINUE
      INFO = 0
      DO 210 I = 1, N - 1
         IF( E( I ).NE.ZERO )
     $      INFO = INFO + 1
  210 CONTINUE
  220 CONTINUE
      RETURN
 *
 *     End of DBDSQR
 *
      END
--- a/lib/linalg/dcabs1.cpp
+++ b/lib/linalg/dcabs1.cpp
@ -0,0 +1,14 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 doublereal dcabs1_(doublecomplex *z__)
 {
    doublereal ret_val, d__1, d__2;
    double d_lmp_imag(doublecomplex *);
    ret_val = (d__1 = z__->r, abs(d__1)) + (d__2 = d_lmp_imag(z__), abs(d__2));
    return ret_val;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dcabs1.f
+++ b/lib/linalg/dcabs1.f
@ -1,66 +0,0 @@
 *> \brief \b DCABS1
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       DOUBLE PRECISION FUNCTION DCABS1(Z)
 *
 *       .. Scalar Arguments ..
 *       COMPLEX*16 Z
 *       ..
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DCABS1 computes |Re(.)| + |Im(.)| of a double complex number
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] Z
 *> \verbatim
 *>          Z is COMPLEX*16
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup double_blas_level1
 *
 *  =====================================================================
      DOUBLE PRECISION FUNCTION DCABS1(Z)
 *
 *  -- Reference BLAS level1 routine --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      COMPLEX*16 Z
 *     ..
 *     ..
 *  =====================================================================
 *
 *     .. Intrinsic Functions ..
      INTRINSIC ABS,DBLE,DIMAG
 *
      DCABS1 = ABS(DBLE(Z)) + ABS(DIMAG(Z))
      RETURN
 *
 *     End of DCABS1
 *
      END
--- a/lib/linalg/dcopy.cpp
+++ b/lib/linalg/dcopy.cpp
@ -0,0 +1,56 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 int dcopy_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy)
 {
    integer i__1;
    integer i__, m, ix, iy, mp1;
    --dy;
    --dx;
    if (*n <= 0) {
        return 0;
    }
    if (*incx == 1 && *incy == 1) {
        m = *n % 7;
        if (m != 0) {
            i__1 = m;
            for (i__ = 1; i__ <= i__1; ++i__) {
                dy[i__] = dx[i__];
            }
            if (*n < 7) {
                return 0;
            }
        }
        mp1 = m + 1;
        i__1 = *n;
        for (i__ = mp1; i__ <= i__1; i__ += 7) {
            dy[i__] = dx[i__];
            dy[i__ + 1] = dx[i__ + 1];
            dy[i__ + 2] = dx[i__ + 2];
            dy[i__ + 3] = dx[i__ + 3];
            dy[i__ + 4] = dx[i__ + 4];
            dy[i__ + 5] = dx[i__ + 5];
            dy[i__ + 6] = dx[i__ + 6];
        }
    } else {
        ix = 1;
        iy = 1;
        if (*incx < 0) {
            ix = (-(*n) + 1) * *incx + 1;
        }
        if (*incy < 0) {
            iy = (-(*n) + 1) * *incy + 1;
        }
        i__1 = *n;
        for (i__ = 1; i__ <= i__1; ++i__) {
            dy[iy] = dx[ix];
            ix += *incx;
            iy += *incy;
        }
    }
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dcopy.f
+++ b/lib/linalg/dcopy.f
@ -1,146 +0,0 @@
 *> \brief \b DCOPY
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DCOPY(N,DX,INCX,DY,INCY)
 *
 *       .. Scalar Arguments ..
 *       INTEGER INCX,INCY,N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION DX(*),DY(*)
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *>    DCOPY copies a vector, x, to a vector, y.
 *>    uses unrolled loops for increments equal to 1.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>         number of elements in input vector(s)
 *> \endverbatim
 *>
 *> \param[in] DX
 *> \verbatim
 *>          DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
 *> \endverbatim
 *>
 *> \param[in] INCX
 *> \verbatim
 *>          INCX is INTEGER
 *>         storage spacing between elements of DX
 *> \endverbatim
 *>
 *> \param[out] DY
 *> \verbatim
 *>          DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
 *> \endverbatim
 *>
 *> \param[in] INCY
 *> \verbatim
 *>          INCY is INTEGER
 *>         storage spacing between elements of DY
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup double_blas_level1
 *
 *> \par Further Details:
 *  =====================
 *>
 *> \verbatim
 *>
 *>     jack dongarra, linpack, 3/11/78.
 *>     modified 12/3/93, array(1) declarations changed to array(*)
 *> \endverbatim
 *>
 *  =====================================================================
      SUBROUTINE DCOPY(N,DX,INCX,DY,INCY)
 *
 *  -- Reference BLAS level1 routine --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER INCX,INCY,N
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION DX(*),DY(*)
 *     ..
 *
 *  =====================================================================
 *
 *     .. Local Scalars ..
      INTEGER I,IX,IY,M,MP1
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC MOD
 *     ..
      IF (N.LE.0) RETURN
      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
 *
 *        code for both increments equal to 1
 *
 *
 *        clean-up loop
 *
         M = MOD(N,7)
         IF (M.NE.0) THEN
            DO I = 1,M
               DY(I) = DX(I)
            END DO
            IF (N.LT.7) RETURN
         END IF
         MP1 = M + 1
         DO I = MP1,N,7
            DY(I) = DX(I)
            DY(I+1) = DX(I+1)
            DY(I+2) = DX(I+2)
            DY(I+3) = DX(I+3)
            DY(I+4) = DX(I+4)
            DY(I+5) = DX(I+5)
            DY(I+6) = DX(I+6)
         END DO
      ELSE
 *
 *        code for unequal increments or equal increments
 *          not equal to 1
 *
         IX = 1
         IY = 1
         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
         DO I = 1,N
            DY(IY) = DX(IX)
            IX = IX + INCX
            IY = IY + INCY
         END DO
      END IF
      RETURN
 *
 *     End of DCOPY
 *
      END
--- a/lib/linalg/ddot.cpp
+++ b/lib/linalg/ddot.cpp
@ -0,0 +1,58 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy)
 {
    integer i__1;
    doublereal ret_val;
    integer i__, m, ix, iy, mp1;
    doublereal dtemp;
    --dy;
    --dx;
    ret_val = 0.;
    dtemp = 0.;
    if (*n <= 0) {
        return ret_val;
    }
    if (*incx == 1 && *incy == 1) {
        m = *n % 5;
        if (m != 0) {
            i__1 = m;
            for (i__ = 1; i__ <= i__1; ++i__) {
                dtemp += dx[i__] * dy[i__];
            }
            if (*n < 5) {
                ret_val = dtemp;
                return ret_val;
            }
        }
        mp1 = m + 1;
        i__1 = *n;
        for (i__ = mp1; i__ <= i__1; i__ += 5) {
            dtemp = dtemp + dx[i__] * dy[i__] + dx[i__ + 1] * dy[i__ + 1] +
                    dx[i__ + 2] * dy[i__ + 2] + dx[i__ + 3] * dy[i__ + 3] +
                    dx[i__ + 4] * dy[i__ + 4];
        }
    } else {
        ix = 1;
        iy = 1;
        if (*incx < 0) {
            ix = (-(*n) + 1) * *incx + 1;
        }
        if (*incy < 0) {
            iy = (-(*n) + 1) * *incy + 1;
        }
        i__1 = *n;
        for (i__ = 1; i__ <= i__1; ++i__) {
            dtemp += dx[ix] * dy[iy];
            ix += *incx;
            iy += *incy;
        }
    }
    ret_val = dtemp;
    return ret_val;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/ddot.f
+++ b/lib/linalg/ddot.f
@ -1,148 +0,0 @@
 *> \brief \b DDOT
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY)
 *
 *       .. Scalar Arguments ..
 *       INTEGER INCX,INCY,N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION DX(*),DY(*)
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *>    DDOT forms the dot product of two vectors.
 *>    uses unrolled loops for increments equal to one.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>         number of elements in input vector(s)
 *> \endverbatim
 *>
 *> \param[in] DX
 *> \verbatim
 *>          DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
 *> \endverbatim
 *>
 *> \param[in] INCX
 *> \verbatim
 *>          INCX is INTEGER
 *>         storage spacing between elements of DX
 *> \endverbatim
 *>
 *> \param[in] DY
 *> \verbatim
 *>          DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
 *> \endverbatim
 *>
 *> \param[in] INCY
 *> \verbatim
 *>          INCY is INTEGER
 *>         storage spacing between elements of DY
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup double_blas_level1
 *
 *> \par Further Details:
 *  =====================
 *>
 *> \verbatim
 *>
 *>     jack dongarra, linpack, 3/11/78.
 *>     modified 12/3/93, array(1) declarations changed to array(*)
 *> \endverbatim
 *>
 *  =====================================================================
      DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY)
 *
 *  -- Reference BLAS level1 routine --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER INCX,INCY,N
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION DX(*),DY(*)
 *     ..
 *
 *  =====================================================================
 *
 *     .. Local Scalars ..
      DOUBLE PRECISION DTEMP
      INTEGER I,IX,IY,M,MP1
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC MOD
 *     ..
      DDOT = 0.0d0
      DTEMP = 0.0d0
      IF (N.LE.0) RETURN
      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
 *
 *        code for both increments equal to 1
 *
 *
 *        clean-up loop
 *
         M = MOD(N,5)
         IF (M.NE.0) THEN
            DO I = 1,M
               DTEMP = DTEMP + DX(I)*DY(I)
            END DO
            IF (N.LT.5) THEN
               DDOT=DTEMP
            RETURN
            END IF
         END IF
         MP1 = M + 1
         DO I = MP1,N,5
          DTEMP = DTEMP + DX(I)*DY(I) + DX(I+1)*DY(I+1) +
     $            DX(I+2)*DY(I+2) + DX(I+3)*DY(I+3) + DX(I+4)*DY(I+4)
         END DO
      ELSE
 *
 *        code for unequal increments or equal increments
 *          not equal to 1
 *
         IX = 1
         IY = 1
         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
         DO I = 1,N
            DTEMP = DTEMP + DX(IX)*DY(IY)
            IX = IX + INCX
            IY = IY + INCY
         END DO
      END IF
      DDOT = DTEMP
      RETURN
 *
 *     End of DDOT
 *
      END
--- a/lib/linalg/dgebd2.cpp
+++ b/lib/linalg/dgebd2.cpp
@ -0,0 +1,105 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 static integer c__1 = 1;
 int dgebd2_(integer *m, integer *n, doublereal *a, integer *lda, doublereal *d__, doublereal *e,
            doublereal *tauq, doublereal *taup, doublereal *work, integer *info)
 {
    integer a_dim1, a_offset, i__1, i__2, i__3;
    integer i__;
    extern int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *,
                      doublereal *, integer *, doublereal *, ftnlen),
        dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *),
        xerbla_(char *, integer *, ftnlen);
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --d__;
    --e;
    --tauq;
    --taup;
    --work;
    *info = 0;
    if (*m < 0) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*lda < max(1, *m)) {
        *info = -4;
    }
    if (*info < 0) {
        i__1 = -(*info);
        xerbla_((char *)"DGEBD2", &i__1, (ftnlen)6);
        return 0;
    }
    if (*m >= *n) {
        i__1 = *n;
        for (i__ = 1; i__ <= i__1; ++i__) {
            i__2 = *m - i__ + 1;
            i__3 = i__ + 1;
            dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3, *m) + i__ * a_dim1], &c__1,
                    &tauq[i__]);
            d__[i__] = a[i__ + i__ * a_dim1];
            a[i__ + i__ * a_dim1] = 1.;
            if (i__ < *n) {
                i__2 = *m - i__ + 1;
                i__3 = *n - i__;
                dlarf_((char *)"Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tauq[i__],
                       &a[i__ + (i__ + 1) * a_dim1], lda, &work[1], (ftnlen)4);
            }
            a[i__ + i__ * a_dim1] = d__[i__];
            if (i__ < *n) {
                i__2 = *n - i__;
                i__3 = i__ + 2;
                dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min(i__3, *n) * a_dim1], lda,
                        &taup[i__]);
                e[i__] = a[i__ + (i__ + 1) * a_dim1];
                a[i__ + (i__ + 1) * a_dim1] = 1.;
                i__2 = *m - i__;
                i__3 = *n - i__;
                dlarf_((char *)"Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1], lda, &taup[i__],
                       &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1], (ftnlen)5);
                a[i__ + (i__ + 1) * a_dim1] = e[i__];
            } else {
                taup[i__] = 0.;
            }
        }
    } else {
        i__1 = *m;
        for (i__ = 1; i__ <= i__1; ++i__) {
            i__2 = *n - i__ + 1;
            i__3 = i__ + 1;
            dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3, *n) * a_dim1], lda,
                    &taup[i__]);
            d__[i__] = a[i__ + i__ * a_dim1];
            a[i__ + i__ * a_dim1] = 1.;
            if (i__ < *m) {
                i__2 = *m - i__;
                i__3 = *n - i__ + 1;
                dlarf_((char *)"Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[i__],
                       &a[i__ + 1 + i__ * a_dim1], lda, &work[1], (ftnlen)5);
            }
            a[i__ + i__ * a_dim1] = d__[i__];
            if (i__ < *m) {
                i__2 = *m - i__;
                i__3 = i__ + 2;
                dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3, *m) + i__ * a_dim1], &c__1,
                        &tauq[i__]);
                e[i__] = a[i__ + 1 + i__ * a_dim1];
                a[i__ + 1 + i__ * a_dim1] = 1.;
                i__2 = *m - i__;
                i__3 = *n - i__;
                dlarf_((char *)"Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tauq[i__],
                       &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1], (ftnlen)4);
                a[i__ + 1 + i__ * a_dim1] = e[i__];
            } else {
                tauq[i__] = 0.;
            }
        }
    }
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dgebd2.f
+++ b/lib/linalg/dgebd2.f
@ -1,317 +0,0 @@
 *> \brief \b DGEBD2 reduces a general matrix to bidiagonal form using an unblocked algorithm.
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DGEBD2 + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgebd2.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgebd2.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgebd2.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO )
 *
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, LDA, M, N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAUP( * ),
 *      $                   TAUQ( * ), WORK( * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DGEBD2 reduces a real general m by n matrix A to upper or lower
 *> bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
 *>
 *> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] M
 *> \verbatim
 *>          M is INTEGER
 *>          The number of rows in the matrix A.  M >= 0.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The number of columns in the matrix A.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in,out] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension (LDA,N)
 *>          On entry, the m by n general matrix to be reduced.
 *>          On exit,
 *>          if m >= n, the diagonal and the first superdiagonal are
 *>            overwritten with the upper bidiagonal matrix B; the
 *>            elements below the diagonal, with the array TAUQ, represent
 *>            the orthogonal matrix Q as a product of elementary
 *>            reflectors, and the elements above the first superdiagonal,
 *>            with the array TAUP, represent the orthogonal matrix P as
 *>            a product of elementary reflectors;
 *>          if m < n, the diagonal and the first subdiagonal are
 *>            overwritten with the lower bidiagonal matrix B; the
 *>            elements below the first subdiagonal, with the array TAUQ,
 *>            represent the orthogonal matrix Q as a product of
 *>            elementary reflectors, and the elements above the diagonal,
 *>            with the array TAUP, represent the orthogonal matrix P as
 *>            a product of elementary reflectors.
 *>          See Further Details.
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>          The leading dimension of the array A.  LDA >= max(1,M).
 *> \endverbatim
 *>
 *> \param[out] D
 *> \verbatim
 *>          D is DOUBLE PRECISION array, dimension (min(M,N))
 *>          The diagonal elements of the bidiagonal matrix B:
 *>          D(i) = A(i,i).
 *> \endverbatim
 *>
 *> \param[out] E
 *> \verbatim
 *>          E is DOUBLE PRECISION array, dimension (min(M,N)-1)
 *>          The off-diagonal elements of the bidiagonal matrix B:
 *>          if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
 *>          if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
 *> \endverbatim
 *>
 *> \param[out] TAUQ
 *> \verbatim
 *>          TAUQ is DOUBLE PRECISION array, dimension (min(M,N))
 *>          The scalar factors of the elementary reflectors which
 *>          represent the orthogonal matrix Q. See Further Details.
 *> \endverbatim
 *>
 *> \param[out] TAUP
 *> \verbatim
 *>          TAUP is DOUBLE PRECISION array, dimension (min(M,N))
 *>          The scalar factors of the elementary reflectors which
 *>          represent the orthogonal matrix P. See Further Details.
 *> \endverbatim
 *>
 *> \param[out] WORK
 *> \verbatim
 *>          WORK is DOUBLE PRECISION array, dimension (max(M,N))
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0: successful exit.
 *>          < 0: if INFO = -i, the i-th argument had an illegal value.
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup doubleGEcomputational
 *
 *> \par Further Details:
 *  =====================
 *>
 *> \verbatim
 *>
 *>  The matrices Q and P are represented as products of elementary
 *>  reflectors:
 *>
 *>  If m >= n,
 *>
 *>     Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
 *>
 *>  Each H(i) and G(i) has the form:
 *>
 *>     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T
 *>
 *>  where tauq and taup are real scalars, and v and u are real vectors;
 *>  v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
 *>  u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
 *>  tauq is stored in TAUQ(i) and taup in TAUP(i).
 *>
 *>  If m < n,
 *>
 *>     Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
 *>
 *>  Each H(i) and G(i) has the form:
 *>
 *>     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T
 *>
 *>  where tauq and taup are real scalars, and v and u are real vectors;
 *>  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
 *>  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
 *>  tauq is stored in TAUQ(i) and taup in TAUP(i).
 *>
 *>  The contents of A on exit are illustrated by the following examples:
 *>
 *>  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
 *>
 *>    (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
 *>    (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
 *>    (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
 *>    (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
 *>    (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
 *>    (  v1  v2  v3  v4  v5 )
 *>
 *>  where d and e denote diagonal and off-diagonal elements of B, vi
 *>  denotes an element of the vector defining H(i), and ui an element of
 *>  the vector defining G(i).
 *> \endverbatim
 *>
 *  =====================================================================
      SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO )
 *
 *  -- LAPACK computational routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER            INFO, LDA, M, N
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAUP( * ),
     $                   TAUQ( * ), WORK( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ZERO, ONE
      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
 *     ..
 *     .. Local Scalars ..
      INTEGER            I
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLARF, DLARFG, XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          MAX, MIN
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input parameters
 *
      INFO = 0
      IF( M.LT.0 ) THEN
         INFO = -1
      ELSE IF( N.LT.0 ) THEN
         INFO = -2
      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
         INFO = -4
      END IF
      IF( INFO.LT.0 ) THEN
         CALL XERBLA( 'DGEBD2', -INFO )
         RETURN
      END IF
 *
      IF( M.GE.N ) THEN
 *
 *        Reduce to upper bidiagonal form
 *
         DO 10 I = 1, N
 *
 *           Generate elementary reflector H(i) to annihilate A(i+1:m,i)
 *
            CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1,
     $                   TAUQ( I ) )
            D( I ) = A( I, I )
            A( I, I ) = ONE
 *
 *           Apply H(i) to A(i:m,i+1:n) from the left
 *
            IF( I.LT.N )
     $         CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAUQ( I ),
     $                     A( I, I+1 ), LDA, WORK )
            A( I, I ) = D( I )
 *
            IF( I.LT.N ) THEN
 *
 *              Generate elementary reflector G(i) to annihilate
 *              A(i,i+2:n)
 *
               CALL DLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ),
     $                      LDA, TAUP( I ) )
               E( I ) = A( I, I+1 )
               A( I, I+1 ) = ONE
 *
 *              Apply G(i) to A(i+1:m,i+1:n) from the right
 *
               CALL DLARF( 'Right', M-I, N-I, A( I, I+1 ), LDA,
     $                     TAUP( I ), A( I+1, I+1 ), LDA, WORK )
               A( I, I+1 ) = E( I )
            ELSE
               TAUP( I ) = ZERO
            END IF
   10    CONTINUE
      ELSE
 *
 *        Reduce to lower bidiagonal form
 *
         DO 20 I = 1, M
 *
 *           Generate elementary reflector G(i) to annihilate A(i,i+1:n)
 *
            CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA,
     $                   TAUP( I ) )
            D( I ) = A( I, I )
            A( I, I ) = ONE
 *
 *           Apply G(i) to A(i+1:m,i:n) from the right
 *
            IF( I.LT.M )
     $         CALL DLARF( 'Right', M-I, N-I+1, A( I, I ), LDA,
     $                     TAUP( I ), A( I+1, I ), LDA, WORK )
            A( I, I ) = D( I )
 *
            IF( I.LT.M ) THEN
 *
 *              Generate elementary reflector H(i) to annihilate
 *              A(i+2:m,i)
 *
               CALL DLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1,
     $                      TAUQ( I ) )
               E( I ) = A( I+1, I )
               A( I+1, I ) = ONE
 *
 *              Apply H(i) to A(i+1:m,i+1:n) from the left
 *
               CALL DLARF( 'Left', M-I, N-I, A( I+1, I ), 1, TAUQ( I ),
     $                     A( I+1, I+1 ), LDA, WORK )
               A( I+1, I ) = E( I )
            ELSE
               TAUQ( I ) = ZERO
            END IF
   20    CONTINUE
      END IF
      RETURN
 *
 *     End of DGEBD2
 *
      END
--- a/lib/linalg/dgebrd.cpp
+++ b/lib/linalg/dgebrd.cpp
@ -0,0 +1,129 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;
 static doublereal c_b21 = -1.;
 static doublereal c_b22 = 1.;
 int dgebrd_(integer *m, integer *n, doublereal *a, integer *lda, doublereal *d__, doublereal *e,
            doublereal *tauq, doublereal *taup, doublereal *work, integer *lwork, integer *info)
 {
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
    integer i__, j, nb, nx, ws;
    extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
                      integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
                      ftnlen, ftnlen);
    integer nbmin, iinfo, minmn;
    extern int dgebd2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
                       doublereal *, doublereal *, doublereal *, integer *),
        dlabrd_(integer *, integer *, integer *, doublereal *, integer *, doublereal *,
                doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *,
                integer *),
        xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
                           ftnlen, ftnlen);
    integer ldwrkx, ldwrky, lwkopt;
    logical lquery;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --d__;
    --e;
    --tauq;
    --taup;
    --work;
    *info = 0;
    i__1 = 1, i__2 = ilaenv_(&c__1, (char *)"DGEBRD", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
    nb = max(i__1, i__2);
    lwkopt = (*m + *n) * nb;
    work[1] = (doublereal)lwkopt;
    lquery = *lwork == -1;
    if (*m < 0) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*lda < max(1, *m)) {
        *info = -4;
    } else {
        i__1 = max(1, *m);
        if (*lwork < max(i__1, *n) && !lquery) {
            *info = -10;
        }
    }
    if (*info < 0) {
        i__1 = -(*info);
        xerbla_((char *)"DGEBRD", &i__1, (ftnlen)6);
        return 0;
    } else if (lquery) {
        return 0;
    }
    minmn = min(*m, *n);
    if (minmn == 0) {
        work[1] = 1.;
        return 0;
    }
    ws = max(*m, *n);
    ldwrkx = *m;
    ldwrky = *n;
    if (nb > 1 && nb < minmn) {
        i__1 = nb, i__2 = ilaenv_(&c__3, (char *)"DGEBRD", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
        nx = max(i__1, i__2);
        if (nx < minmn) {
            ws = (*m + *n) * nb;
            if (*lwork < ws) {
                nbmin = ilaenv_(&c__2, (char *)"DGEBRD", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
                if (*lwork >= (*m + *n) * nbmin) {
                    nb = *lwork / (*m + *n);
                } else {
                    nb = 1;
                    nx = minmn;
                }
            }
        }
    } else {
        nx = minmn;
    }
    i__1 = minmn - nx;
    i__2 = nb;
    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
        i__3 = *m - i__ + 1;
        i__4 = *n - i__ + 1;
        dlabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], &tauq[i__],
                &taup[i__], &work[1], &ldwrkx, &work[ldwrkx * nb + 1], &ldwrky);
        i__3 = *m - i__ - nb + 1;
        i__4 = *n - i__ - nb + 1;
        dgemm_((char *)"No transpose", (char *)"Transpose", &i__3, &i__4, &nb, &c_b21, &a[i__ + nb + i__ * a_dim1],
               lda, &work[ldwrkx * nb + nb + 1], &ldwrky, &c_b22,
               &a[i__ + nb + (i__ + nb) * a_dim1], lda, (ftnlen)12, (ftnlen)9);
        i__3 = *m - i__ - nb + 1;
        i__4 = *n - i__ - nb + 1;
        dgemm_((char *)"No transpose", (char *)"No transpose", &i__3, &i__4, &nb, &c_b21, &work[nb + 1], &ldwrkx,
               &a[i__ + (i__ + nb) * a_dim1], lda, &c_b22, &a[i__ + nb + (i__ + nb) * a_dim1], lda,
               (ftnlen)12, (ftnlen)12);
        if (*m >= *n) {
            i__3 = i__ + nb - 1;
            for (j = i__; j <= i__3; ++j) {
                a[j + j * a_dim1] = d__[j];
                a[j + (j + 1) * a_dim1] = e[j];
            }
        } else {
            i__3 = i__ + nb - 1;
            for (j = i__; j <= i__3; ++j) {
                a[j + j * a_dim1] = d__[j];
                a[j + 1 + j * a_dim1] = e[j];
            }
        }
    }
    i__2 = *m - i__ + 1;
    i__1 = *n - i__ + 1;
    dgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], &tauq[i__], &taup[i__],
            &work[1], &iinfo);
    work[1] = (doublereal)ws;
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dgebrd.f
+++ b/lib/linalg/dgebrd.f
@ -1,349 +0,0 @@
 *> \brief \b DGEBRD
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DGEBRD + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgebrd.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgebrd.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgebrd.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK,
 *                          INFO )
 *
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, LDA, LWORK, M, N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAUP( * ),
 *      $                   TAUQ( * ), WORK( * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DGEBRD reduces a general real M-by-N matrix A to upper or lower
 *> bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
 *>
 *> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] M
 *> \verbatim
 *>          M is INTEGER
 *>          The number of rows in the matrix A.  M >= 0.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The number of columns in the matrix A.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in,out] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension (LDA,N)
 *>          On entry, the M-by-N general matrix to be reduced.
 *>          On exit,
 *>          if m >= n, the diagonal and the first superdiagonal are
 *>            overwritten with the upper bidiagonal matrix B; the
 *>            elements below the diagonal, with the array TAUQ, represent
 *>            the orthogonal matrix Q as a product of elementary
 *>            reflectors, and the elements above the first superdiagonal,
 *>            with the array TAUP, represent the orthogonal matrix P as
 *>            a product of elementary reflectors;
 *>          if m < n, the diagonal and the first subdiagonal are
 *>            overwritten with the lower bidiagonal matrix B; the
 *>            elements below the first subdiagonal, with the array TAUQ,
 *>            represent the orthogonal matrix Q as a product of
 *>            elementary reflectors, and the elements above the diagonal,
 *>            with the array TAUP, represent the orthogonal matrix P as
 *>            a product of elementary reflectors.
 *>          See Further Details.
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>          The leading dimension of the array A.  LDA >= max(1,M).
 *> \endverbatim
 *>
 *> \param[out] D
 *> \verbatim
 *>          D is DOUBLE PRECISION array, dimension (min(M,N))
 *>          The diagonal elements of the bidiagonal matrix B:
 *>          D(i) = A(i,i).
 *> \endverbatim
 *>
 *> \param[out] E
 *> \verbatim
 *>          E is DOUBLE PRECISION array, dimension (min(M,N)-1)
 *>          The off-diagonal elements of the bidiagonal matrix B:
 *>          if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
 *>          if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
 *> \endverbatim
 *>
 *> \param[out] TAUQ
 *> \verbatim
 *>          TAUQ is DOUBLE PRECISION array, dimension (min(M,N))
 *>          The scalar factors of the elementary reflectors which
 *>          represent the orthogonal matrix Q. See Further Details.
 *> \endverbatim
 *>
 *> \param[out] TAUP
 *> \verbatim
 *>          TAUP is DOUBLE PRECISION array, dimension (min(M,N))
 *>          The scalar factors of the elementary reflectors which
 *>          represent the orthogonal matrix P. See Further Details.
 *> \endverbatim
 *>
 *> \param[out] WORK
 *> \verbatim
 *>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
 *>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 *> \endverbatim
 *>
 *> \param[in] LWORK
 *> \verbatim
 *>          LWORK is INTEGER
 *>          The length of the array WORK.  LWORK >= max(1,M,N).
 *>          For optimum performance LWORK >= (M+N)*NB, where NB
 *>          is the optimal blocksize.
 *>
 *>          If LWORK = -1, then a workspace query is assumed; the routine
 *>          only calculates the optimal size of the WORK array, returns
 *>          this value as the first entry of the WORK array, and no error
 *>          message related to LWORK is issued by XERBLA.
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0:  successful exit
 *>          < 0:  if INFO = -i, the i-th argument had an illegal value.
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup doubleGEcomputational
 *
 *> \par Further Details:
 *  =====================
 *>
 *> \verbatim
 *>
 *>  The matrices Q and P are represented as products of elementary
 *>  reflectors:
 *>
 *>  If m >= n,
 *>
 *>     Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
 *>
 *>  Each H(i) and G(i) has the form:
 *>
 *>     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T
 *>
 *>  where tauq and taup are real scalars, and v and u are real vectors;
 *>  v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
 *>  u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
 *>  tauq is stored in TAUQ(i) and taup in TAUP(i).
 *>
 *>  If m < n,
 *>
 *>     Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
 *>
 *>  Each H(i) and G(i) has the form:
 *>
 *>     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T
 *>
 *>  where tauq and taup are real scalars, and v and u are real vectors;
 *>  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
 *>  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
 *>  tauq is stored in TAUQ(i) and taup in TAUP(i).
 *>
 *>  The contents of A on exit are illustrated by the following examples:
 *>
 *>  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
 *>
 *>    (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
 *>    (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
 *>    (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
 *>    (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
 *>    (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
 *>    (  v1  v2  v3  v4  v5 )
 *>
 *>  where d and e denote diagonal and off-diagonal elements of B, vi
 *>  denotes an element of the vector defining H(i), and ui an element of
 *>  the vector defining G(i).
 *> \endverbatim
 *>
 *  =====================================================================
      SUBROUTINE DGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK,
     $                   INFO )
 *
 *  -- LAPACK computational routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER            INFO, LDA, LWORK, M, N
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAUP( * ),
     $                   TAUQ( * ), WORK( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ONE
      PARAMETER          ( ONE = 1.0D+0 )
 *     ..
 *     .. Local Scalars ..
      LOGICAL            LQUERY
      INTEGER            I, IINFO, J, LDWRKX, LDWRKY, LWKOPT, MINMN, NB,
     $                   NBMIN, NX, WS
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DGEBD2, DGEMM, DLABRD, XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          DBLE, MAX, MIN
 *     ..
 *     .. External Functions ..
      INTEGER            ILAENV
      EXTERNAL           ILAENV
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input parameters
 *
      INFO = 0
      NB = MAX( 1, ILAENV( 1, 'DGEBRD', ' ', M, N, -1, -1 ) )
      LWKOPT = ( M+N )*NB
      WORK( 1 ) = DBLE( LWKOPT )
      LQUERY = ( LWORK.EQ.-1 )
      IF( M.LT.0 ) THEN
         INFO = -1
      ELSE IF( N.LT.0 ) THEN
         INFO = -2
      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
         INFO = -4
      ELSE IF( LWORK.LT.MAX( 1, M, N ) .AND. .NOT.LQUERY ) THEN
         INFO = -10
      END IF
      IF( INFO.LT.0 ) THEN
         CALL XERBLA( 'DGEBRD', -INFO )
         RETURN
      ELSE IF( LQUERY ) THEN
         RETURN
      END IF
 *
 *     Quick return if possible
 *
      MINMN = MIN( M, N )
      IF( MINMN.EQ.0 ) THEN
         WORK( 1 ) = 1
         RETURN
      END IF
 *
      WS = MAX( M, N )
      LDWRKX = M
      LDWRKY = N
 *
      IF( NB.GT.1 .AND. NB.LT.MINMN ) THEN
 *
 *        Set the crossover point NX.
 *
         NX = MAX( NB, ILAENV( 3, 'DGEBRD', ' ', M, N, -1, -1 ) )
 *
 *        Determine when to switch from blocked to unblocked code.
 *
         IF( NX.LT.MINMN ) THEN
            WS = ( M+N )*NB
            IF( LWORK.LT.WS ) THEN
 *
 *              Not enough work space for the optimal NB, consider using
 *              a smaller block size.
 *
               NBMIN = ILAENV( 2, 'DGEBRD', ' ', M, N, -1, -1 )
               IF( LWORK.GE.( M+N )*NBMIN ) THEN
                  NB = LWORK / ( M+N )
               ELSE
                  NB = 1
                  NX = MINMN
               END IF
            END IF
         END IF
      ELSE
         NX = MINMN
      END IF
 *
      DO 30 I = 1, MINMN - NX, NB
 *
 *        Reduce rows and columns i:i+nb-1 to bidiagonal form and return
 *        the matrices X and Y which are needed to update the unreduced
 *        part of the matrix
 *
         CALL DLABRD( M-I+1, N-I+1, NB, A( I, I ), LDA, D( I ), E( I ),
     $                TAUQ( I ), TAUP( I ), WORK, LDWRKX,
     $                WORK( LDWRKX*NB+1 ), LDWRKY )
 *
 *        Update the trailing submatrix A(i+nb:m,i+nb:n), using an update
 *        of the form  A := A - V*Y**T - X*U**T
 *
         CALL DGEMM( 'No transpose', 'Transpose', M-I-NB+1, N-I-NB+1,
     $               NB, -ONE, A( I+NB, I ), LDA,
     $               WORK( LDWRKX*NB+NB+1 ), LDWRKY, ONE,
     $               A( I+NB, I+NB ), LDA )
         CALL DGEMM( 'No transpose', 'No transpose', M-I-NB+1, N-I-NB+1,
     $               NB, -ONE, WORK( NB+1 ), LDWRKX, A( I, I+NB ), LDA,
     $               ONE, A( I+NB, I+NB ), LDA )
 *
 *        Copy diagonal and off-diagonal elements of B back into A
 *
         IF( M.GE.N ) THEN
            DO 10 J = I, I + NB - 1
               A( J, J ) = D( J )
               A( J, J+1 ) = E( J )
   10       CONTINUE
         ELSE
            DO 20 J = I, I + NB - 1
               A( J, J ) = D( J )
               A( J+1, J ) = E( J )
   20       CONTINUE
         END IF
   30 CONTINUE
 *
 *     Use unblocked code to reduce the remainder of the matrix
 *
      CALL DGEBD2( M-I+1, N-I+1, A( I, I ), LDA, D( I ), E( I ),
     $             TAUQ( I ), TAUP( I ), WORK, IINFO )
      WORK( 1 ) = WS
      RETURN
 *
 *     End of DGEBRD
 *
      END
--- a/lib/linalg/dgecon.cpp
+++ b/lib/linalg/dgecon.cpp
@ -0,0 +1,101 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 static integer c__1 = 1;
 int dgecon_(char *norm, integer *n, doublereal *a, integer *lda, doublereal *anorm,
            doublereal *rcond, doublereal *work, integer *iwork, integer *info, ftnlen norm_len)
 {
    integer a_dim1, a_offset, i__1;
    doublereal d__1;
    doublereal sl;
    integer ix;
    doublereal su;
    integer kase, kase1;
    doublereal scale;
    extern logical lsame_(char *, char *, ftnlen, ftnlen);
    integer isave[3];
    extern int drscl_(integer *, doublereal *, doublereal *, integer *),
        dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *,
                integer *);
    extern doublereal dlamch_(char *, ftnlen);
    extern integer idamax_(integer *, doublereal *, integer *);
    extern int xerbla_(char *, integer *, ftnlen);
    doublereal ainvnm;
    extern int dlatrs_(char *, char *, char *, char *, integer *, doublereal *, integer *,
                       doublereal *, doublereal *, doublereal *, integer *, ftnlen, ftnlen, ftnlen,
                       ftnlen);
    logical onenrm;
    char normin[1];
    doublereal smlnum;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --work;
    --iwork;
    *info = 0;
    onenrm = *(unsigned char *)norm == '1' || lsame_(norm, (char *)"O", (ftnlen)1, (ftnlen)1);
    if (!onenrm && !lsame_(norm, (char *)"I", (ftnlen)1, (ftnlen)1)) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*lda < max(1, *n)) {
        *info = -4;
    } else if (*anorm < 0.) {
        *info = -5;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_((char *)"DGECON", &i__1, (ftnlen)6);
        return 0;
    }
    *rcond = 0.;
    if (*n == 0) {
        *rcond = 1.;
        return 0;
    } else if (*anorm == 0.) {
        return 0;
    }
    smlnum = dlamch_((char *)"Safe minimum", (ftnlen)12);
    ainvnm = 0.;
    *(unsigned char *)normin = 'N';
    if (onenrm) {
        kase1 = 1;
    } else {
        kase1 = 2;
    }
    kase = 0;
 L10:
    dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave);
    if (kase != 0) {
        if (kase == kase1) {
            dlatrs_((char *)"Lower", (char *)"No transpose", (char *)"Unit", normin, n, &a[a_offset], lda, &work[1], &sl,
                    &work[(*n << 1) + 1], info, (ftnlen)5, (ftnlen)12, (ftnlen)4, (ftnlen)1);
            dlatrs_((char *)"Upper", (char *)"No transpose", (char *)"Non-unit", normin, n, &a[a_offset], lda, &work[1],
                    &su, &work[*n * 3 + 1], info, (ftnlen)5, (ftnlen)12, (ftnlen)8, (ftnlen)1);
        } else {
            dlatrs_((char *)"Upper", (char *)"Transpose", (char *)"Non-unit", normin, n, &a[a_offset], lda, &work[1], &su,
                    &work[*n * 3 + 1], info, (ftnlen)5, (ftnlen)9, (ftnlen)8, (ftnlen)1);
            dlatrs_((char *)"Lower", (char *)"Transpose", (char *)"Unit", normin, n, &a[a_offset], lda, &work[1], &sl,
                    &work[(*n << 1) + 1], info, (ftnlen)5, (ftnlen)9, (ftnlen)4, (ftnlen)1);
        }
        scale = sl * su;
        *(unsigned char *)normin = 'Y';
        if (scale != 1.) {
            ix = idamax_(n, &work[1], &c__1);
            if (scale < (d__1 = work[ix], abs(d__1)) * smlnum || scale == 0.) {
                goto L20;
            }
            drscl_(n, &scale, &work[1], &c__1);
        }
        goto L10;
    }
    if (ainvnm != 0.) {
        *rcond = 1. / ainvnm / *anorm;
    }
 L20:
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dgecon.f
+++ b/lib/linalg/dgecon.f
@ -1,258 +0,0 @@
 *> \brief \b DGECON
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DGECON + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgecon.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgecon.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgecon.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK,
 *                          INFO )
 *
 *       .. Scalar Arguments ..
 *       CHARACTER          NORM
 *       INTEGER            INFO, LDA, N
 *       DOUBLE PRECISION   ANORM, RCOND
 *       ..
 *       .. Array Arguments ..
 *       INTEGER            IWORK( * )
 *       DOUBLE PRECISION   A( LDA, * ), WORK( * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DGECON estimates the reciprocal of the condition number of a general
 *> real matrix A, in either the 1-norm or the infinity-norm, using
 *> the LU factorization computed by DGETRF.
 *>
 *> An estimate is obtained for norm(inv(A)), and the reciprocal of the
 *> condition number is computed as
 *>    RCOND = 1 / ( norm(A) * norm(inv(A)) ).
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] NORM
 *> \verbatim
 *>          NORM is CHARACTER*1
 *>          Specifies whether the 1-norm condition number or the
 *>          infinity-norm condition number is required:
 *>          = '1' or 'O':  1-norm;
 *>          = 'I':         Infinity-norm.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The order of the matrix A.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension (LDA,N)
 *>          The factors L and U from the factorization A = P*L*U
 *>          as computed by DGETRF.
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>          The leading dimension of the array A.  LDA >= max(1,N).
 *> \endverbatim
 *>
 *> \param[in] ANORM
 *> \verbatim
 *>          ANORM is DOUBLE PRECISION
 *>          If NORM = '1' or 'O', the 1-norm of the original matrix A.
 *>          If NORM = 'I', the infinity-norm of the original matrix A.
 *> \endverbatim
 *>
 *> \param[out] RCOND
 *> \verbatim
 *>          RCOND is DOUBLE PRECISION
 *>          The reciprocal of the condition number of the matrix A,
 *>          computed as RCOND = 1/(norm(A) * norm(inv(A))).
 *> \endverbatim
 *>
 *> \param[out] WORK
 *> \verbatim
 *>          WORK is DOUBLE PRECISION array, dimension (4*N)
 *> \endverbatim
 *>
 *> \param[out] IWORK
 *> \verbatim
 *>          IWORK is INTEGER array, dimension (N)
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0:  successful exit
 *>          < 0:  if INFO = -i, the i-th argument had an illegal value
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup doubleGEcomputational
 *
 *  =====================================================================
      SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK,
     $                   INFO )
 *
 *  -- LAPACK computational routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      CHARACTER          NORM
      INTEGER            INFO, LDA, N
      DOUBLE PRECISION   ANORM, RCOND
 *     ..
 *     .. Array Arguments ..
      INTEGER            IWORK( * )
      DOUBLE PRECISION   A( LDA, * ), WORK( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ONE, ZERO
      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
 *     ..
 *     .. Local Scalars ..
      LOGICAL            ONENRM
      CHARACTER          NORMIN
      INTEGER            IX, KASE, KASE1
      DOUBLE PRECISION   AINVNM, SCALE, SL, SMLNUM, SU
 *     ..
 *     .. Local Arrays ..
      INTEGER            ISAVE( 3 )
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME
      INTEGER            IDAMAX
      DOUBLE PRECISION   DLAMCH
      EXTERNAL           LSAME, IDAMAX, DLAMCH
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLACN2, DLATRS, DRSCL, XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, MAX
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input parameters.
 *
      INFO = 0
      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
         INFO = -1
      ELSE IF( N.LT.0 ) THEN
         INFO = -2
      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
         INFO = -4
      ELSE IF( ANORM.LT.ZERO ) THEN
         INFO = -5
      END IF
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'DGECON', -INFO )
         RETURN
      END IF
 *
 *     Quick return if possible
 *
      RCOND = ZERO
      IF( N.EQ.0 ) THEN
         RCOND = ONE
         RETURN
      ELSE IF( ANORM.EQ.ZERO ) THEN
         RETURN
      END IF
 *
      SMLNUM = DLAMCH( 'Safe minimum' )
 *
 *     Estimate the norm of inv(A).
 *
      AINVNM = ZERO
      NORMIN = 'N'
      IF( ONENRM ) THEN
         KASE1 = 1
      ELSE
         KASE1 = 2
      END IF
      KASE = 0
   10 CONTINUE
      CALL DLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE )
      IF( KASE.NE.0 ) THEN
         IF( KASE.EQ.KASE1 ) THEN
 *
 *           Multiply by inv(L).
 *
            CALL DLATRS( 'Lower', 'No transpose', 'Unit', NORMIN, N, A,
     $                   LDA, WORK, SL, WORK( 2*N+1 ), INFO )
 *
 *           Multiply by inv(U).
 *
            CALL DLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N,
     $                   A, LDA, WORK, SU, WORK( 3*N+1 ), INFO )
         ELSE
 *
 *           Multiply by inv(U**T).
 *
            CALL DLATRS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, A,
     $                   LDA, WORK, SU, WORK( 3*N+1 ), INFO )
 *
 *           Multiply by inv(L**T).
 *
            CALL DLATRS( 'Lower', 'Transpose', 'Unit', NORMIN, N, A,
     $                   LDA, WORK, SL, WORK( 2*N+1 ), INFO )
         END IF
 *
 *        Divide X by 1/(SL*SU) if doing so will not cause overflow.
 *
         SCALE = SL*SU
         NORMIN = 'Y'
         IF( SCALE.NE.ONE ) THEN
            IX = IDAMAX( N, WORK, 1 )
            IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO )
     $         GO TO 20
            CALL DRSCL( N, SCALE, WORK, 1 )
         END IF
         GO TO 10
      END IF
 *
 *     Compute the estimate of the reciprocal condition number.
 *
      IF( AINVNM.NE.ZERO )
     $   RCOND = ( ONE / AINVNM ) / ANORM
 *
   20 CONTINUE
      RETURN
 *
 *     End of DGECON
 *
      END
--- a/lib/linalg/dgelq2.cpp
+++ b/lib/linalg/dgelq2.cpp
@ -0,0 +1,53 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 int dgelq2_(integer *m, integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal *work,
            integer *info)
 {
    integer a_dim1, a_offset, i__1, i__2, i__3;
    integer i__, k;
    doublereal aii;
    extern int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *,
                      doublereal *, integer *, doublereal *, ftnlen),
        dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *),
        xerbla_(char *, integer *, ftnlen);
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --tau;
    --work;
    *info = 0;
    if (*m < 0) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*lda < max(1, *m)) {
        *info = -4;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_((char *)"DGELQ2", &i__1, (ftnlen)6);
        return 0;
    }
    k = min(*m, *n);
    i__1 = k;
    for (i__ = 1; i__ <= i__1; ++i__) {
        i__2 = *n - i__ + 1;
        i__3 = i__ + 1;
        dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3, *n) * a_dim1], lda, &tau[i__]);
        if (i__ < *m) {
            aii = a[i__ + i__ * a_dim1];
            a[i__ + i__ * a_dim1] = 1.;
            i__2 = *m - i__;
            i__3 = *n - i__ + 1;
            dlarf_((char *)"Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__],
                   &a[i__ + 1 + i__ * a_dim1], lda, &work[1], (ftnlen)5);
            a[i__ + i__ * a_dim1] = aii;
        }
    }
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dgelq2.f
+++ b/lib/linalg/dgelq2.f
@ -1,197 +0,0 @@
 *> \brief \b DGELQ2 computes the LQ factorization of a general rectangular matrix using an unblocked algorithm.
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DGELQ2 + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelq2.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelq2.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelq2.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGELQ2( M, N, A, LDA, TAU, WORK, INFO )
 *
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, LDA, M, N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DGELQ2 computes an LQ factorization of a real m-by-n matrix A:
 *>
 *>    A = ( L 0 ) *  Q
 *>
 *> where:
 *>
 *>    Q is a n-by-n orthogonal matrix;
 *>    L is a lower-triangular m-by-m matrix;
 *>    0 is a m-by-(n-m) zero matrix, if m < n.
 *>
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] M
 *> \verbatim
 *>          M is INTEGER
 *>          The number of rows of the matrix A.  M >= 0.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The number of columns of the matrix A.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in,out] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension (LDA,N)
 *>          On entry, the m by n matrix A.
 *>          On exit, the elements on and below the diagonal of the array
 *>          contain the m by min(m,n) lower trapezoidal matrix L (L is
 *>          lower triangular if m <= n); the elements above the diagonal,
 *>          with the array TAU, represent the orthogonal matrix Q as a
 *>          product of elementary reflectors (see Further Details).
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>          The leading dimension of the array A.  LDA >= max(1,M).
 *> \endverbatim
 *>
 *> \param[out] TAU
 *> \verbatim
 *>          TAU is DOUBLE PRECISION array, dimension (min(M,N))
 *>          The scalar factors of the elementary reflectors (see Further
 *>          Details).
 *> \endverbatim
 *>
 *> \param[out] WORK
 *> \verbatim
 *>          WORK is DOUBLE PRECISION array, dimension (M)
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0: successful exit
 *>          < 0: if INFO = -i, the i-th argument had an illegal value
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup doubleGEcomputational
 *
 *> \par Further Details:
 *  =====================
 *>
 *> \verbatim
 *>
 *>  The matrix Q is represented as a product of elementary reflectors
 *>
 *>     Q = H(k) . . . H(2) H(1), where k = min(m,n).
 *>
 *>  Each H(i) has the form
 *>
 *>     H(i) = I - tau * v * v**T
 *>
 *>  where tau is a real scalar, and v is a real vector with
 *>  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
 *>  and tau in TAU(i).
 *> \endverbatim
 *>
 *  =====================================================================
      SUBROUTINE DGELQ2( M, N, A, LDA, TAU, WORK, INFO )
 *
 *  -- LAPACK computational routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER            INFO, LDA, M, N
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ONE
      PARAMETER          ( ONE = 1.0D+0 )
 *     ..
 *     .. Local Scalars ..
      INTEGER            I, K
      DOUBLE PRECISION   AII
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLARF, DLARFG, XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          MAX, MIN
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input arguments
 *
      INFO = 0
      IF( M.LT.0 ) THEN
         INFO = -1
      ELSE IF( N.LT.0 ) THEN
         INFO = -2
      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
         INFO = -4
      END IF
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'DGELQ2', -INFO )
         RETURN
      END IF
 *
      K = MIN( M, N )
 *
      DO 10 I = 1, K
 *
 *        Generate elementary reflector H(i) to annihilate A(i,i+1:n)
 *
         CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA,
     $                TAU( I ) )
         IF( I.LT.M ) THEN
 *
 *           Apply H(i) to A(i+1:m,i:n) from the right
 *
            AII = A( I, I )
            A( I, I ) = ONE
            CALL DLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAU( I ),
     $                  A( I+1, I ), LDA, WORK )
            A( I, I ) = AII
         END IF
   10 CONTINUE
      RETURN
 *
 *     End of DGELQ2
 *
      END
--- a/lib/linalg/dgelqf.cpp
+++ b/lib/linalg/dgelqf.cpp
@ -0,0 +1,106 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;
 int dgelqf_(integer *m, integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal *work,
            integer *lwork, integer *info)
 {
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
    integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
    extern int dgelq2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
                       integer *),
        dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *,
                integer *, doublereal *, integer *, doublereal *, integer *, doublereal *,
                integer *, ftnlen, ftnlen, ftnlen, ftnlen),
        dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *,
                doublereal *, integer *, ftnlen, ftnlen),
        xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
                           ftnlen, ftnlen);
    integer ldwork, lwkopt;
    logical lquery;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --tau;
    --work;
    *info = 0;
    nb = ilaenv_(&c__1, (char *)"DGELQF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
    lwkopt = *m * nb;
    work[1] = (doublereal)lwkopt;
    lquery = *lwork == -1;
    if (*m < 0) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*lda < max(1, *m)) {
        *info = -4;
    } else if (*lwork < max(1, *m) && !lquery) {
        *info = -7;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_((char *)"DGELQF", &i__1, (ftnlen)6);
        return 0;
    } else if (lquery) {
        return 0;
    }
    k = min(*m, *n);
    if (k == 0) {
        work[1] = 1.;
        return 0;
    }
    nbmin = 2;
    nx = 0;
    iws = *m;
    if (nb > 1 && nb < k) {
        i__1 = 0, i__2 = ilaenv_(&c__3, (char *)"DGELQF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
        nx = max(i__1, i__2);
        if (nx < k) {
            ldwork = *m;
            iws = ldwork * nb;
            if (*lwork < iws) {
                nb = *lwork / ldwork;
                i__1 = 2,
                i__2 = ilaenv_(&c__2, (char *)"DGELQF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
                nbmin = max(i__1, i__2);
            }
        }
    }
    if (nb >= nbmin && nb < k && nx < k) {
        i__1 = k - nx;
        i__2 = nb;
        for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
            i__3 = k - i__ + 1;
            ib = min(i__3, nb);
            i__3 = *n - i__ + 1;
            dgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1], &iinfo);
            if (i__ + ib <= *m) {
                i__3 = *n - i__ + 1;
                dlarft_((char *)"Forward", (char *)"Rowwise", &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__],
                        &work[1], &ldwork, (ftnlen)7, (ftnlen)7);
                i__3 = *m - i__ - ib + 1;
                i__4 = *n - i__ + 1;
                dlarfb_((char *)"Right", (char *)"No transpose", (char *)"Forward", (char *)"Rowwise", &i__3, &i__4, &ib,
                        &a[i__ + i__ * a_dim1], lda, &work[1], &ldwork, &a[i__ + ib + i__ * a_dim1],
                        lda, &work[ib + 1], &ldwork, (ftnlen)5, (ftnlen)12, (ftnlen)7, (ftnlen)7);
            }
        }
    } else {
        i__ = 1;
    }
    if (i__ <= k) {
        i__2 = *m - i__ + 1;
        i__1 = *n - i__ + 1;
        dgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1], &iinfo);
    }
    work[1] = (doublereal)iws;
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dgelqf.f
+++ b/lib/linalg/dgelqf.f
@ -1,274 +0,0 @@
 *> \brief \b DGELQF
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DGELQF + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelqf.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelqf.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelqf.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
 *
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, LDA, LWORK, M, N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DGELQF computes an LQ factorization of a real M-by-N matrix A:
 *>
 *>    A = ( L 0 ) *  Q
 *>
 *> where:
 *>
 *>    Q is a N-by-N orthogonal matrix;
 *>    L is a lower-triangular M-by-M matrix;
 *>    0 is a M-by-(N-M) zero matrix, if M < N.
 *>
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] M
 *> \verbatim
 *>          M is INTEGER
 *>          The number of rows of the matrix A.  M >= 0.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The number of columns of the matrix A.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in,out] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension (LDA,N)
 *>          On entry, the M-by-N matrix A.
 *>          On exit, the elements on and below the diagonal of the array
 *>          contain the m-by-min(m,n) lower trapezoidal matrix L (L is
 *>          lower triangular if m <= n); the elements above the diagonal,
 *>          with the array TAU, represent the orthogonal matrix Q as a
 *>          product of elementary reflectors (see Further Details).
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>          The leading dimension of the array A.  LDA >= max(1,M).
 *> \endverbatim
 *>
 *> \param[out] TAU
 *> \verbatim
 *>          TAU is DOUBLE PRECISION array, dimension (min(M,N))
 *>          The scalar factors of the elementary reflectors (see Further
 *>          Details).
 *> \endverbatim
 *>
 *> \param[out] WORK
 *> \verbatim
 *>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
 *>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 *> \endverbatim
 *>
 *> \param[in] LWORK
 *> \verbatim
 *>          LWORK is INTEGER
 *>          The dimension of the array WORK.  LWORK >= max(1,M).
 *>          For optimum performance LWORK >= M*NB, where NB is the
 *>          optimal blocksize.
 *>
 *>          If LWORK = -1, then a workspace query is assumed; the routine
 *>          only calculates the optimal size of the WORK array, returns
 *>          this value as the first entry of the WORK array, and no error
 *>          message related to LWORK is issued by XERBLA.
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0:  successful exit
 *>          < 0:  if INFO = -i, the i-th argument had an illegal value
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup doubleGEcomputational
 *
 *> \par Further Details:
 *  =====================
 *>
 *> \verbatim
 *>
 *>  The matrix Q is represented as a product of elementary reflectors
 *>
 *>     Q = H(k) . . . H(2) H(1), where k = min(m,n).
 *>
 *>  Each H(i) has the form
 *>
 *>     H(i) = I - tau * v * v**T
 *>
 *>  where tau is a real scalar, and v is a real vector with
 *>  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
 *>  and tau in TAU(i).
 *> \endverbatim
 *>
 *  =====================================================================
      SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
 *
 *  -- LAPACK computational routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER            INFO, LDA, LWORK, M, N
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Local Scalars ..
      LOGICAL            LQUERY
      INTEGER            I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB,
     $                   NBMIN, NX
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DGELQ2, DLARFB, DLARFT, XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          MAX, MIN
 *     ..
 *     .. External Functions ..
      INTEGER            ILAENV
      EXTERNAL           ILAENV
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input arguments
 *
      INFO = 0
      NB = ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
      LWKOPT = M*NB
      WORK( 1 ) = LWKOPT
      LQUERY = ( LWORK.EQ.-1 )
      IF( M.LT.0 ) THEN
         INFO = -1
      ELSE IF( N.LT.0 ) THEN
         INFO = -2
      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
         INFO = -4
      ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN
         INFO = -7
      END IF
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'DGELQF', -INFO )
         RETURN
      ELSE IF( LQUERY ) THEN
         RETURN
      END IF
 *
 *     Quick return if possible
 *
      K = MIN( M, N )
      IF( K.EQ.0 ) THEN
         WORK( 1 ) = 1
         RETURN
      END IF
 *
      NBMIN = 2
      NX = 0
      IWS = M
      IF( NB.GT.1 .AND. NB.LT.K ) THEN
 *
 *        Determine when to cross over from blocked to unblocked code.
 *
         NX = MAX( 0, ILAENV( 3, 'DGELQF', ' ', M, N, -1, -1 ) )
         IF( NX.LT.K ) THEN
 *
 *           Determine if workspace is large enough for blocked code.
 *
            LDWORK = M
            IWS = LDWORK*NB
            IF( LWORK.LT.IWS ) THEN
 *
 *              Not enough workspace to use optimal NB:  reduce NB and
 *              determine the minimum value of NB.
 *
               NB = LWORK / LDWORK
               NBMIN = MAX( 2, ILAENV( 2, 'DGELQF', ' ', M, N, -1,
     $                 -1 ) )
            END IF
         END IF
      END IF
 *
      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
 *
 *        Use blocked code initially
 *
         DO 10 I = 1, K - NX, NB
            IB = MIN( K-I+1, NB )
 *
 *           Compute the LQ factorization of the current block
 *           A(i:i+ib-1,i:n)
 *
            CALL DGELQ2( IB, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
     $                   IINFO )
            IF( I+IB.LE.M ) THEN
 *
 *              Form the triangular factor of the block reflector
 *              H = H(i) H(i+1) . . . H(i+ib-1)
 *
               CALL DLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ),
     $                      LDA, TAU( I ), WORK, LDWORK )
 *
 *              Apply H to A(i+ib:m,i:n) from the right
 *
               CALL DLARFB( 'Right', 'No transpose', 'Forward',
     $                      'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ),
     $                      LDA, WORK, LDWORK, A( I+IB, I ), LDA,
     $                      WORK( IB+1 ), LDWORK )
            END IF
   10    CONTINUE
      ELSE
         I = 1
      END IF
 *
 *     Use unblocked code to factor the last or only block.
 *
      IF( I.LE.K )
     $   CALL DGELQ2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
     $                IINFO )
 *
      WORK( 1 ) = IWS
      RETURN
 *
 *     End of DGELQF
 *
      END
--- a/lib/linalg/dgelsd.cpp
+++ b/lib/linalg/dgelsd.cpp
@ -0,0 +1,341 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 static integer c__6 = 6;
 static integer c_n1 = -1;
 static integer c__9 = 9;
 static integer c__0 = 0;
 static integer c__1 = 1;
 static doublereal c_b82 = 0.;
 int dgelsd_(integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b,
            integer *ldb, doublereal *s, doublereal *rcond, integer *rank, doublereal *work,
            integer *lwork, integer *iwork, integer *info)
 {
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
    double log(doublereal);
    integer ie, il, mm;
    doublereal eps, anrm, bnrm;
    integer itau, nlvl, iascl, ibscl;
    doublereal sfmin;
    integer minmn, maxmn, itaup, itauq, mnthr, nwork;
    extern int dlabad_(doublereal *, doublereal *),
        dgebrd_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
                doublereal *, doublereal *, doublereal *, integer *, integer *);
    extern doublereal dlamch_(char *, ftnlen),
        dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *, ftnlen);
    extern int dgelqf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
                       integer *, integer *),
        dlalsd_(char *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *,
                integer *, doublereal *, integer *, doublereal *, integer *, integer *, ftnlen),
        dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *,
                doublereal *, integer *, integer *, ftnlen),
        dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
                integer *, integer *),
        dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *,
                ftnlen),
        dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *,
                ftnlen),
        xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
                           ftnlen, ftnlen);
    doublereal bignum;
    extern int dormbr_(char *, char *, char *, integer *, integer *, integer *, doublereal *,
                       integer *, doublereal *, doublereal *, integer *, doublereal *, integer *,
                       integer *, ftnlen, ftnlen, ftnlen);
    integer wlalsd;
    extern int dormlq_(char *, char *, integer *, integer *, integer *, doublereal *, integer *,
                       doublereal *, doublereal *, integer *, doublereal *, integer *, integer *,
                       ftnlen, ftnlen);
    integer ldwork;
    extern int dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *,
                       doublereal *, doublereal *, integer *, doublereal *, integer *, integer *,
                       ftnlen, ftnlen);
    integer liwork, minwrk, maxwrk;
    doublereal smlnum;
    logical lquery;
    integer smlsiz;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    --s;
    --work;
    --iwork;
    *info = 0;
    minmn = min(*m, *n);
    maxmn = max(*m, *n);
    mnthr = ilaenv_(&c__6, (char *)"DGELSD", (char *)" ", m, n, nrhs, &c_n1, (ftnlen)6, (ftnlen)1);
    lquery = *lwork == -1;
    if (*m < 0) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*nrhs < 0) {
        *info = -3;
    } else if (*lda < max(1, *m)) {
        *info = -5;
    } else if (*ldb < max(1, maxmn)) {
        *info = -7;
    }
    smlsiz = ilaenv_(&c__9, (char *)"DGELSD", (char *)" ", &c__0, &c__0, &c__0, &c__0, (ftnlen)6, (ftnlen)1);
    minwrk = 1;
    liwork = 1;
    minmn = max(1, minmn);
    i__1 = (integer)(log((doublereal)minmn / (doublereal)(smlsiz + 1)) / log(2.)) + 1;
    nlvl = max(i__1, 0);
    if (*info == 0) {
        maxwrk = 0;
        liwork = minmn * 3 * nlvl + minmn * 11;
        mm = *m;
        if (*m >= *n && *m >= mnthr) {
            mm = *n;
            i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, (char *)"DGEQRF", (char *)" ", m, n, &c_n1, &c_n1,
                                                    (ftnlen)6, (ftnlen)1);
            maxwrk = max(i__1, i__2);
            i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, (char *)"DORMQR", (char *)"LT", m, nrhs, n, &c_n1,
                                                       (ftnlen)6, (ftnlen)2);
            maxwrk = max(i__1, i__2);
        }
        if (*m >= *n) {
            i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, (char *)"DGEBRD", (char *)" ", &mm, n, &c_n1,
                                                               &c_n1, (ftnlen)6, (ftnlen)1);
            maxwrk = max(i__1, i__2);
            i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, (char *)"DORMBR", (char *)"QLT", &mm, nrhs, n,
                                                           &c_n1, (ftnlen)6, (ftnlen)3);
            maxwrk = max(i__1, i__2);
            i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, (char *)"DORMBR", (char *)"PLN", n, nrhs, n,
                                                              &c_n1, (ftnlen)6, (ftnlen)3);
            maxwrk = max(i__1, i__2);
            i__1 = smlsiz + 1;
            wlalsd = *n * 9 + (*n << 1) * smlsiz + (*n << 3) * nlvl + *n * *nrhs + i__1 * i__1;
            i__1 = maxwrk, i__2 = *n * 3 + wlalsd;
            maxwrk = max(i__1, i__2);
            i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1, i__2),
            i__2 = *n * 3 + wlalsd;
            minwrk = max(i__1, i__2);
        }
        if (*n > *m) {
            i__1 = smlsiz + 1;
            wlalsd = *m * 9 + (*m << 1) * smlsiz + (*m << 3) * nlvl + *m * *nrhs + i__1 * i__1;
            if (*n >= mnthr) {
                maxwrk = *m + *m * ilaenv_(&c__1, (char *)"DGELQF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6,
                                           (ftnlen)1);
                i__1 = maxwrk, i__2 = *m * *m + (*m << 2) +
                                      (*m << 1) * ilaenv_(&c__1, (char *)"DGEBRD", (char *)" ", m, m, &c_n1, &c_n1,
                                                          (ftnlen)6, (ftnlen)1);
                maxwrk = max(i__1, i__2);
                i__1 = maxwrk, i__2 = *m * *m + (*m << 2) +
                                      *nrhs * ilaenv_(&c__1, (char *)"DORMBR", (char *)"QLT", m, nrhs, m, &c_n1,
                                                      (ftnlen)6, (ftnlen)3);
                maxwrk = max(i__1, i__2);
                i__1 = maxwrk, i__2 = *m * *m + (*m << 2) +
                                      (*m - 1) * ilaenv_(&c__1, (char *)"DORMBR", (char *)"PLN", m, nrhs, m, &c_n1,
                                                         (ftnlen)6, (ftnlen)3);
                maxwrk = max(i__1, i__2);
                if (*nrhs > 1) {
                    i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
                    maxwrk = max(i__1, i__2);
                } else {
                    i__1 = maxwrk, i__2 = *m * *m + (*m << 1);
                    maxwrk = max(i__1, i__2);
                }
                i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, (char *)"DORMLQ", (char *)"LT", n, nrhs, m, &c_n1,
                                                           (ftnlen)6, (ftnlen)2);
                maxwrk = max(i__1, i__2);
                i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + wlalsd;
                maxwrk = max(i__1, i__2);
                i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3, i__4), i__3 = max(i__3, *nrhs),
                i__4 = *n - *m * 3;
                i__1 = maxwrk, i__2 = (*m << 2) + *m * *m + max(i__3, i__4);
                maxwrk = max(i__1, i__2);
            } else {
                maxwrk = *m * 3 + (*n + *m) * ilaenv_(&c__1, (char *)"DGEBRD", (char *)" ", m, n, &c_n1, &c_n1,
                                                      (ftnlen)6, (ftnlen)1);
                i__1 = maxwrk, i__2 = *m * 3 + *nrhs * ilaenv_(&c__1, (char *)"DORMBR", (char *)"QLT", m, nrhs, n,
                                                               &c_n1, (ftnlen)6, (ftnlen)3);
                maxwrk = max(i__1, i__2);
                i__1 = maxwrk, i__2 = *m * 3 + *m * ilaenv_(&c__1, (char *)"DORMBR", (char *)"PLN", n, nrhs, m,
                                                            &c_n1, (ftnlen)6, (ftnlen)3);
                maxwrk = max(i__1, i__2);
                i__1 = maxwrk, i__2 = *m * 3 + wlalsd;
                maxwrk = max(i__1, i__2);
            }
            i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *m, i__1 = max(i__1, i__2),
            i__2 = *m * 3 + wlalsd;
            minwrk = max(i__1, i__2);
        }
        minwrk = min(minwrk, maxwrk);
        work[1] = (doublereal)maxwrk;
        iwork[1] = liwork;
        if (*lwork < minwrk && !lquery) {
            *info = -12;
        }
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_((char *)"DGELSD", &i__1, (ftnlen)6);
        return 0;
    } else if (lquery) {
        goto L10;
    }
    if (*m == 0 || *n == 0) {
        *rank = 0;
        return 0;
    }
    eps = dlamch_((char *)"P", (ftnlen)1);
    sfmin = dlamch_((char *)"S", (ftnlen)1);
    smlnum = sfmin / eps;
    bignum = 1. / smlnum;
    dlabad_(&smlnum, &bignum);
    anrm = dlange_((char *)"M", m, n, &a[a_offset], lda, &work[1], (ftnlen)1);
    iascl = 0;
    if (anrm > 0. && anrm < smlnum) {
        dlascl_((char *)"G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, info, (ftnlen)1);
        iascl = 1;
    } else if (anrm > bignum) {
        dlascl_((char *)"G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, info, (ftnlen)1);
        iascl = 2;
    } else if (anrm == 0.) {
        i__1 = max(*m, *n);
        dlaset_((char *)"F", &i__1, nrhs, &c_b82, &c_b82, &b[b_offset], ldb, (ftnlen)1);
        dlaset_((char *)"F", &minmn, &c__1, &c_b82, &c_b82, &s[1], &c__1, (ftnlen)1);
        *rank = 0;
        goto L10;
    }
    bnrm = dlange_((char *)"M", m, nrhs, &b[b_offset], ldb, &work[1], (ftnlen)1);
    ibscl = 0;
    if (bnrm > 0. && bnrm < smlnum) {
        dlascl_((char *)"G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
        ibscl = 1;
    } else if (bnrm > bignum) {
        dlascl_((char *)"G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
        ibscl = 2;
    }
    if (*m < *n) {
        i__1 = *n - *m;
        dlaset_((char *)"F", &i__1, nrhs, &c_b82, &c_b82, &b[*m + 1 + b_dim1], ldb, (ftnlen)1);
    }
    if (*m >= *n) {
        mm = *m;
        if (*m >= mnthr) {
            mm = *n;
            itau = 1;
            nwork = itau + *n;
            i__1 = *lwork - nwork + 1;
            dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, info);
            i__1 = *lwork - nwork + 1;
            dormqr_((char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[b_offset], ldb,
                    &work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1);
            if (*n > 1) {
                i__1 = *n - 1;
                i__2 = *n - 1;
                dlaset_((char *)"L", &i__1, &i__2, &c_b82, &c_b82, &a[a_dim1 + 2], lda, (ftnlen)1);
            }
        }
        ie = 1;
        itauq = ie + *n;
        itaup = itauq + *n;
        nwork = itaup + *n;
        i__1 = *lwork - nwork + 1;
        dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup],
                &work[nwork], &i__1, info);
        i__1 = *lwork - nwork + 1;
        dormbr_((char *)"Q", (char *)"L", (char *)"T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], &b[b_offset], ldb,
                &work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
        dlalsd_((char *)"U", &smlsiz, n, nrhs, &s[1], &work[ie], &b[b_offset], ldb, rcond, rank,
                &work[nwork], &iwork[1], info, (ftnlen)1);
        if (*info != 0) {
            goto L10;
        }
        i__1 = *lwork - nwork + 1;
        dormbr_((char *)"P", (char *)"L", (char *)"N", n, nrhs, n, &a[a_offset], lda, &work[itaup], &b[b_offset], ldb,
                &work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
    } else {
        i__1 = *m, i__2 = (*m << 1) - 4, i__1 = max(i__1, i__2), i__1 = max(i__1, *nrhs),
        i__2 = *n - *m * 3, i__1 = max(i__1, i__2);
        if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1, wlalsd)) {
            ldwork = *m;
            i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3, i__4), i__3 = max(i__3, *nrhs),
            i__4 = *n - *m * 3;
            i__1 = (*m << 2) + *m * *lda + max(i__3, i__4), i__2 = *m * *lda + *m + *m * *nrhs,
            i__1 = max(i__1, i__2), i__2 = (*m << 2) + *m * *lda + wlalsd;
            if (*lwork >= max(i__1, i__2)) {
                ldwork = *lda;
            }
            itau = 1;
            nwork = *m + 1;
            i__1 = *lwork - nwork + 1;
            dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, info);
            il = nwork;
            dlacpy_((char *)"L", m, m, &a[a_offset], lda, &work[il], &ldwork, (ftnlen)1);
            i__1 = *m - 1;
            i__2 = *m - 1;
            dlaset_((char *)"U", &i__1, &i__2, &c_b82, &c_b82, &work[il + ldwork], &ldwork, (ftnlen)1);
            ie = il + ldwork * *m;
            itauq = ie + *m;
            itaup = itauq + *m;
            nwork = itaup + *m;
            i__1 = *lwork - nwork + 1;
            dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], &work[itaup],
                    &work[nwork], &i__1, info);
            i__1 = *lwork - nwork + 1;
            dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, m, &work[il], &ldwork, &work[itauq], &b[b_offset], ldb,
                    &work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
            dlalsd_((char *)"U", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], ldb, rcond, rank,
                    &work[nwork], &iwork[1], info, (ftnlen)1);
            if (*info != 0) {
                goto L10;
            }
            i__1 = *lwork - nwork + 1;
            dormbr_((char *)"P", (char *)"L", (char *)"N", m, nrhs, m, &work[il], &ldwork, &work[itaup], &b[b_offset], ldb,
                    &work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
            i__1 = *n - *m;
            dlaset_((char *)"F", &i__1, nrhs, &c_b82, &c_b82, &b[*m + 1 + b_dim1], ldb, (ftnlen)1);
            nwork = itau + *m;
            i__1 = *lwork - nwork + 1;
            dormlq_((char *)"L", (char *)"T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[b_offset], ldb,
                    &work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1);
        } else {
            ie = 1;
            itauq = ie + *m;
            itaup = itauq + *m;
            nwork = itaup + *m;
            i__1 = *lwork - nwork + 1;
            dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup],
                    &work[nwork], &i__1, info);
            i__1 = *lwork - nwork + 1;
            dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, &work[itauq], &b[b_offset], ldb,
                    &work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
            dlalsd_((char *)"L", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], ldb, rcond, rank,
                    &work[nwork], &iwork[1], info, (ftnlen)1);
            if (*info != 0) {
                goto L10;
            }
            i__1 = *lwork - nwork + 1;
            dormbr_((char *)"P", (char *)"L", (char *)"N", n, nrhs, m, &a[a_offset], lda, &work[itaup], &b[b_offset], ldb,
                    &work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
        }
    }
    if (iascl == 1) {
        dlascl_((char *)"G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
        dlascl_((char *)"G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &minmn, info, (ftnlen)1);
    } else if (iascl == 2) {
        dlascl_((char *)"G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
        dlascl_((char *)"G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &minmn, info, (ftnlen)1);
    }
    if (ibscl == 1) {
        dlascl_((char *)"G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
    } else if (ibscl == 2) {
        dlascl_((char *)"G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
    }
 L10:
    work[1] = (doublereal)maxwrk;
    iwork[1] = liwork;
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dgelsd.f
+++ b/lib/linalg/dgelsd.f
@ -1,626 +0,0 @@
 *> \brief <b> DGELSD computes the minimum-norm solution to a linear least squares problem for GE matrices</b>
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DGELSD + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelsd.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelsd.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelsd.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
 *                          WORK, LWORK, IWORK, INFO )
 *
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
 *       DOUBLE PRECISION   RCOND
 *       ..
 *       .. Array Arguments ..
 *       INTEGER            IWORK( * )
 *       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DGELSD computes the minimum-norm solution to a real linear least
 *> squares problem:
 *>     minimize 2-norm(| b - A*x |)
 *> using the singular value decomposition (SVD) of A. A is an M-by-N
 *> matrix which may be rank-deficient.
 *>
 *> Several right hand side vectors b and solution vectors x can be
 *> handled in a single call; they are stored as the columns of the
 *> M-by-NRHS right hand side matrix B and the N-by-NRHS solution
 *> matrix X.
 *>
 *> The problem is solved in three steps:
 *> (1) Reduce the coefficient matrix A to bidiagonal form with
 *>     Householder transformations, reducing the original problem
 *>     into a "bidiagonal least squares problem" (BLS)
 *> (2) Solve the BLS using a divide and conquer approach.
 *> (3) Apply back all the Householder transformations to solve
 *>     the original least squares problem.
 *>
 *> The effective rank of A is determined by treating as zero those
 *> singular values which are less than RCOND times the largest singular
 *> value.
 *>
 *> The divide and conquer algorithm makes very mild assumptions about
 *> floating point arithmetic. It will work on machines with a guard
 *> digit in add/subtract, or on those binary machines without guard
 *> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
 *> Cray-2. It could conceivably fail on hexadecimal or decimal machines
 *> without guard digits, but we know of none.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] M
 *> \verbatim
 *>          M is INTEGER
 *>          The number of rows of A. M >= 0.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The number of columns of A. N >= 0.
 *> \endverbatim
 *>
 *> \param[in] NRHS
 *> \verbatim
 *>          NRHS is INTEGER
 *>          The number of right hand sides, i.e., the number of columns
 *>          of the matrices B and X. NRHS >= 0.
 *> \endverbatim
 *>
 *> \param[in,out] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension (LDA,N)
 *>          On entry, the M-by-N matrix A.
 *>          On exit, A has been destroyed.
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>          The leading dimension of the array A.  LDA >= max(1,M).
 *> \endverbatim
 *>
 *> \param[in,out] B
 *> \verbatim
 *>          B is DOUBLE PRECISION array, dimension (LDB,NRHS)
 *>          On entry, the M-by-NRHS right hand side matrix B.
 *>          On exit, B is overwritten by the N-by-NRHS solution
 *>          matrix X.  If m >= n and RANK = n, the residual
 *>          sum-of-squares for the solution in the i-th column is given
 *>          by the sum of squares of elements n+1:m in that column.
 *> \endverbatim
 *>
 *> \param[in] LDB
 *> \verbatim
 *>          LDB is INTEGER
 *>          The leading dimension of the array B. LDB >= max(1,max(M,N)).
 *> \endverbatim
 *>
 *> \param[out] S
 *> \verbatim
 *>          S is DOUBLE PRECISION array, dimension (min(M,N))
 *>          The singular values of A in decreasing order.
 *>          The condition number of A in the 2-norm = S(1)/S(min(m,n)).
 *> \endverbatim
 *>
 *> \param[in] RCOND
 *> \verbatim
 *>          RCOND is DOUBLE PRECISION
 *>          RCOND is used to determine the effective rank of A.
 *>          Singular values S(i) <= RCOND*S(1) are treated as zero.
 *>          If RCOND < 0, machine precision is used instead.
 *> \endverbatim
 *>
 *> \param[out] RANK
 *> \verbatim
 *>          RANK is INTEGER
 *>          The effective rank of A, i.e., the number of singular values
 *>          which are greater than RCOND*S(1).
 *> \endverbatim
 *>
 *> \param[out] WORK
 *> \verbatim
 *>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
 *>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 *> \endverbatim
 *>
 *> \param[in] LWORK
 *> \verbatim
 *>          LWORK is INTEGER
 *>          The dimension of the array WORK. LWORK must be at least 1.
 *>          The exact minimum amount of workspace needed depends on M,
 *>          N and NRHS. As long as LWORK is at least
 *>              12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2,
 *>          if M is greater than or equal to N or
 *>              12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2,
 *>          if M is less than N, the code will execute correctly.
 *>          SMLSIZ is returned by ILAENV and is equal to the maximum
 *>          size of the subproblems at the bottom of the computation
 *>          tree (usually about 25), and
 *>             NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
 *>          For good performance, LWORK should generally be larger.
 *>
 *>          If LWORK = -1, then a workspace query is assumed; the routine
 *>          only calculates the optimal size of the WORK array, returns
 *>          this value as the first entry of the WORK array, and no error
 *>          message related to LWORK is issued by XERBLA.
 *> \endverbatim
 *>
 *> \param[out] IWORK
 *> \verbatim
 *>          IWORK is INTEGER array, dimension (MAX(1,LIWORK))
 *>          LIWORK >= max(1, 3 * MINMN * NLVL + 11 * MINMN),
 *>          where MINMN = MIN( M,N ).
 *>          On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK.
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0:  successful exit
 *>          < 0:  if INFO = -i, the i-th argument had an illegal value.
 *>          > 0:  the algorithm for computing the SVD failed to converge;
 *>                if INFO = i, i off-diagonal elements of an intermediate
 *>                bidiagonal form did not converge to zero.
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup doubleGEsolve
 *
 *> \par Contributors:
 *  ==================
 *>
 *>     Ming Gu and Ren-Cang Li, Computer Science Division, University of
 *>       California at Berkeley, USA \n
 *>     Osni Marques, LBNL/NERSC, USA \n
 *
 *  =====================================================================
      SUBROUTINE DGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
     $                   WORK, LWORK, IWORK, INFO )
 *
 *  -- LAPACK driver routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
      DOUBLE PRECISION   RCOND
 *     ..
 *     .. Array Arguments ..
      INTEGER            IWORK( * )
      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ZERO, ONE, TWO
      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 )
 *     ..
 *     .. Local Scalars ..
      LOGICAL            LQUERY
      INTEGER            IASCL, IBSCL, IE, IL, ITAU, ITAUP, ITAUQ,
     $                   LDWORK, LIWORK, MAXMN, MAXWRK, MINMN, MINWRK,
     $                   MM, MNTHR, NLVL, NWORK, SMLSIZ, WLALSD
      DOUBLE PRECISION   ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DGEBRD, DGELQF, DGEQRF, DLABAD, DLACPY, DLALSD,
     $                   DLASCL, DLASET, DORMBR, DORMLQ, DORMQR, XERBLA
 *     ..
 *     .. External Functions ..
      INTEGER            ILAENV
      DOUBLE PRECISION   DLAMCH, DLANGE
      EXTERNAL           ILAENV, DLAMCH, DLANGE
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          DBLE, INT, LOG, MAX, MIN
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input arguments.
 *
      INFO = 0
      MINMN = MIN( M, N )
      MAXMN = MAX( M, N )
      MNTHR = ILAENV( 6, 'DGELSD', ' ', M, N, NRHS, -1 )
      LQUERY = ( LWORK.EQ.-1 )
      IF( M.LT.0 ) THEN
         INFO = -1
      ELSE IF( N.LT.0 ) THEN
         INFO = -2
      ELSE IF( NRHS.LT.0 ) THEN
         INFO = -3
      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
         INFO = -5
      ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN
         INFO = -7
      END IF
 *
      SMLSIZ = ILAENV( 9, 'DGELSD', ' ', 0, 0, 0, 0 )
 *
 *     Compute workspace.
 *     (Note: Comments in the code beginning "Workspace:" describe the
 *     minimal amount of workspace needed at that point in the code,
 *     as well as the preferred amount for good performance.
 *     NB refers to the optimal block size for the immediately
 *     following subroutine, as returned by ILAENV.)
 *
      MINWRK = 1
      LIWORK = 1
      MINMN = MAX( 1, MINMN )
      NLVL = MAX( INT( LOG( DBLE( MINMN ) / DBLE( SMLSIZ+1 ) ) /
     $       LOG( TWO ) ) + 1, 0 )
 *
      IF( INFO.EQ.0 ) THEN
         MAXWRK = 0
         LIWORK = 3*MINMN*NLVL + 11*MINMN
         MM = M
         IF( M.GE.N .AND. M.GE.MNTHR ) THEN
 *
 *           Path 1a - overdetermined, with many more rows than columns.
 *
            MM = N
            MAXWRK = MAX( MAXWRK, N+N*ILAENV( 1, 'DGEQRF', ' ', M, N,
     $               -1, -1 ) )
            MAXWRK = MAX( MAXWRK, N+NRHS*
     $               ILAENV( 1, 'DORMQR', 'LT', M, NRHS, N, -1 ) )
         END IF
         IF( M.GE.N ) THEN
 *
 *           Path 1 - overdetermined or exactly determined.
 *
            MAXWRK = MAX( MAXWRK, 3*N+( MM+N )*
     $               ILAENV( 1, 'DGEBRD', ' ', MM, N, -1, -1 ) )
            MAXWRK = MAX( MAXWRK, 3*N+NRHS*
     $               ILAENV( 1, 'DORMBR', 'QLT', MM, NRHS, N, -1 ) )
            MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
     $               ILAENV( 1, 'DORMBR', 'PLN', N, NRHS, N, -1 ) )
            WLALSD = 9*N+2*N*SMLSIZ+8*N*NLVL+N*NRHS+(SMLSIZ+1)**2
            MAXWRK = MAX( MAXWRK, 3*N+WLALSD )
            MINWRK = MAX( 3*N+MM, 3*N+NRHS, 3*N+WLALSD )
         END IF
         IF( N.GT.M ) THEN
            WLALSD = 9*M+2*M*SMLSIZ+8*M*NLVL+M*NRHS+(SMLSIZ+1)**2
            IF( N.GE.MNTHR ) THEN
 *
 *              Path 2a - underdetermined, with many more columns
 *              than rows.
 *
               MAXWRK = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
               MAXWRK = MAX( MAXWRK, M*M+4*M+2*M*
     $                  ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
               MAXWRK = MAX( MAXWRK, M*M+4*M+NRHS*
     $                  ILAENV( 1, 'DORMBR', 'QLT', M, NRHS, M, -1 ) )
               MAXWRK = MAX( MAXWRK, M*M+4*M+( M-1 )*
     $                  ILAENV( 1, 'DORMBR', 'PLN', M, NRHS, M, -1 ) )
               IF( NRHS.GT.1 ) THEN
                  MAXWRK = MAX( MAXWRK, M*M+M+M*NRHS )
               ELSE
                  MAXWRK = MAX( MAXWRK, M*M+2*M )
               END IF
               MAXWRK = MAX( MAXWRK, M+NRHS*
     $                  ILAENV( 1, 'DORMLQ', 'LT', N, NRHS, M, -1 ) )
               MAXWRK = MAX( MAXWRK, M*M+4*M+WLALSD )
 !     XXX: Ensure the Path 2a case below is triggered.  The workspace
 !     calculation should use queries for all routines eventually.
               MAXWRK = MAX( MAXWRK,
     $              4*M+M*M+MAX( M, 2*M-4, NRHS, N-3*M ) )
            ELSE
 *
 *              Path 2 - remaining underdetermined cases.
 *
               MAXWRK = 3*M + ( N+M )*ILAENV( 1, 'DGEBRD', ' ', M, N,
     $                  -1, -1 )
               MAXWRK = MAX( MAXWRK, 3*M+NRHS*
     $                  ILAENV( 1, 'DORMBR', 'QLT', M, NRHS, N, -1 ) )
               MAXWRK = MAX( MAXWRK, 3*M+M*
     $                  ILAENV( 1, 'DORMBR', 'PLN', N, NRHS, M, -1 ) )
               MAXWRK = MAX( MAXWRK, 3*M+WLALSD )
            END IF
            MINWRK = MAX( 3*M+NRHS, 3*M+M, 3*M+WLALSD )
         END IF
         MINWRK = MIN( MINWRK, MAXWRK )
         WORK( 1 ) = MAXWRK
         IWORK( 1 ) = LIWORK
         IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
            INFO = -12
         END IF
      END IF
 *
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'DGELSD', -INFO )
         RETURN
      ELSE IF( LQUERY ) THEN
         GO TO 10
      END IF
 *
 *     Quick return if possible.
 *
      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
         RANK = 0
         RETURN
      END IF
 *
 *     Get machine parameters.
 *
      EPS = DLAMCH( 'P' )
      SFMIN = DLAMCH( 'S' )
      SMLNUM = SFMIN / EPS
      BIGNUM = ONE / SMLNUM
      CALL DLABAD( SMLNUM, BIGNUM )
 *
 *     Scale A if max entry outside range [SMLNUM,BIGNUM].
 *
      ANRM = DLANGE( 'M', M, N, A, LDA, WORK )
      IASCL = 0
      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
 *
 *        Scale matrix norm up to SMLNUM.
 *
         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
         IASCL = 1
      ELSE IF( ANRM.GT.BIGNUM ) THEN
 *
 *        Scale matrix norm down to BIGNUM.
 *
         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
         IASCL = 2
      ELSE IF( ANRM.EQ.ZERO ) THEN
 *
 *        Matrix all zero. Return zero solution.
 *
         CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
         CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 )
         RANK = 0
         GO TO 10
      END IF
 *
 *     Scale B if max entry outside range [SMLNUM,BIGNUM].
 *
      BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK )
      IBSCL = 0
      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
 *
 *        Scale matrix norm up to SMLNUM.
 *
         CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
         IBSCL = 1
      ELSE IF( BNRM.GT.BIGNUM ) THEN
 *
 *        Scale matrix norm down to BIGNUM.
 *
         CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
         IBSCL = 2
      END IF
 *
 *     If M < N make sure certain entries of B are zero.
 *
      IF( M.LT.N )
     $   CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
 *
 *     Overdetermined case.
 *
      IF( M.GE.N ) THEN
 *
 *        Path 1 - overdetermined or exactly determined.
 *
         MM = M
         IF( M.GE.MNTHR ) THEN
 *
 *           Path 1a - overdetermined, with many more rows than columns.
 *
            MM = N
            ITAU = 1
            NWORK = ITAU + N
 *
 *           Compute A=Q*R.
 *           (Workspace: need 2*N, prefer N+N*NB)
 *
            CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
     $                   LWORK-NWORK+1, INFO )
 *
 *           Multiply B by transpose(Q).
 *           (Workspace: need N+NRHS, prefer N+NRHS*NB)
 *
            CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B,
     $                   LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
 *
 *           Zero out below R.
 *
            IF( N.GT.1 ) THEN
               CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
            END IF
         END IF
 *
         IE = 1
         ITAUQ = IE + N
         ITAUP = ITAUQ + N
         NWORK = ITAUP + N
 *
 *        Bidiagonalize R in A.
 *        (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
 *
         CALL DGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
     $                WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
     $                INFO )
 *
 *        Multiply B by transpose of left bidiagonalizing vectors of R.
 *        (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
 *
         CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ),
     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
 *
 *        Solve the bidiagonal least squares problem.
 *
         CALL DLALSD( 'U', SMLSIZ, N, NRHS, S, WORK( IE ), B, LDB,
     $                RCOND, RANK, WORK( NWORK ), IWORK, INFO )
         IF( INFO.NE.0 ) THEN
            GO TO 10
         END IF
 *
 *        Multiply B by right bidiagonalizing vectors of R.
 *
         CALL DORMBR( 'P', 'L', 'N', N, NRHS, N, A, LDA, WORK( ITAUP ),
     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
 *
      ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+
     $         MAX( M, 2*M-4, NRHS, N-3*M, WLALSD ) ) THEN
 *
 *        Path 2a - underdetermined, with many more columns than rows
 *        and sufficient workspace for an efficient algorithm.
 *
         LDWORK = M
         IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ),
     $       M*LDA+M+M*NRHS, 4*M+M*LDA+WLALSD ) )LDWORK = LDA
         ITAU = 1
         NWORK = M + 1
 *
 *        Compute A=L*Q.
 *        (Workspace: need 2*M, prefer M+M*NB)
 *
         CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
     $                LWORK-NWORK+1, INFO )
         IL = NWORK
 *
 *        Copy L to WORK(IL), zeroing out above its diagonal.
 *
         CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK )
         CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ),
     $                LDWORK )
         IE = IL + LDWORK*M
         ITAUQ = IE + M
         ITAUP = ITAUQ + M
         NWORK = ITAUP + M
 *
 *        Bidiagonalize L in WORK(IL).
 *        (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
 *
         CALL DGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ),
     $                WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
     $                LWORK-NWORK+1, INFO )
 *
 *        Multiply B by transpose of left bidiagonalizing vectors of L.
 *        (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
 *
         CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK,
     $                WORK( ITAUQ ), B, LDB, WORK( NWORK ),
     $                LWORK-NWORK+1, INFO )
 *
 *        Solve the bidiagonal least squares problem.
 *
         CALL DLALSD( 'U', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB,
     $                RCOND, RANK, WORK( NWORK ), IWORK, INFO )
         IF( INFO.NE.0 ) THEN
            GO TO 10
         END IF
 *
 *        Multiply B by right bidiagonalizing vectors of L.
 *
         CALL DORMBR( 'P', 'L', 'N', M, NRHS, M, WORK( IL ), LDWORK,
     $                WORK( ITAUP ), B, LDB, WORK( NWORK ),
     $                LWORK-NWORK+1, INFO )
 *
 *        Zero out below first M rows of B.
 *
         CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
         NWORK = ITAU + M
 *
 *        Multiply transpose(Q) by B.
 *        (Workspace: need M+NRHS, prefer M+NRHS*NB)
 *
         CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B,
     $                LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
 *
      ELSE
 *
 *        Path 2 - remaining underdetermined cases.
 *
         IE = 1
         ITAUQ = IE + M
         ITAUP = ITAUQ + M
         NWORK = ITAUP + M
 *
 *        Bidiagonalize A.
 *        (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
 *
         CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
     $                WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
     $                INFO )
 *
 *        Multiply B by transpose of left bidiagonalizing vectors.
 *        (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
 *
         CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ),
     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
 *
 *        Solve the bidiagonal least squares problem.
 *
         CALL DLALSD( 'L', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB,
     $                RCOND, RANK, WORK( NWORK ), IWORK, INFO )
         IF( INFO.NE.0 ) THEN
            GO TO 10
         END IF
 *
 *        Multiply B by right bidiagonalizing vectors of A.
 *
         CALL DORMBR( 'P', 'L', 'N', N, NRHS, M, A, LDA, WORK( ITAUP ),
     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
 *
      END IF
 *
 *     Undo scaling.
 *
      IF( IASCL.EQ.1 ) THEN
         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
         CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
     $                INFO )
      ELSE IF( IASCL.EQ.2 ) THEN
         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
         CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
     $                INFO )
      END IF
      IF( IBSCL.EQ.1 ) THEN
         CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
      ELSE IF( IBSCL.EQ.2 ) THEN
         CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
      END IF
 *
   10 CONTINUE
      WORK( 1 ) = MAXWRK
      IWORK( 1 ) = LIWORK
      RETURN
 *
 *     End of DGELSD
 *
      END
--- a/lib/linalg/dgelss.cpp
+++ b/lib/linalg/dgelss.cpp
@ -0,0 +1,466 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 static integer c__6 = 6;
 static integer c_n1 = -1;
 static integer c__0 = 0;
 static doublereal c_b46 = 0.;
 static integer c__1 = 1;
 static doublereal c_b79 = 1.;
 int dgelss_(integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b,
            integer *ldb, doublereal *s, doublereal *rcond, integer *rank, doublereal *work,
            integer *lwork, integer *info)
 {
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
    doublereal d__1;
    integer i__, bl, ie, il, mm;
    doublereal dum[1], eps, thr, anrm, bnrm;
    integer itau, lwork_dgebrd__, lwork_dgelqf__, lwork_dgeqrf__, lwork_dorgbr__, lwork_dormbr__,
        lwork_dormlq__, lwork_dormqr__;
    extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
                      integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
                      ftnlen, ftnlen);
    integer iascl, ibscl;
    extern int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *,
                      doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen),
        drscl_(integer *, doublereal *, doublereal *, integer *);
    integer chunk;
    doublereal sfmin;
    integer minmn;
    extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *);
    integer maxmn, itaup, itauq, mnthr, iwork;
    extern int dlabad_(doublereal *, doublereal *),
        dgebrd_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
                doublereal *, doublereal *, doublereal *, integer *, integer *);
    extern doublereal dlamch_(char *, ftnlen),
        dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *, ftnlen);
    integer bdspac;
    extern int dgelqf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
                       integer *, integer *),
        dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *,
                doublereal *, integer *, integer *, ftnlen),
        dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
                integer *, integer *),
        dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *,
                ftnlen),
        dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *,
                ftnlen),
        xerbla_(char *, integer *, ftnlen),
        dbdsqr_(char *, integer *, integer *, integer *, integer *, doublereal *, doublereal *,
                doublereal *, integer *, doublereal *, integer *, doublereal *, integer *,
                doublereal *, integer *, ftnlen),
        dorgbr_(char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *,
                doublereal *, integer *, integer *, ftnlen);
    doublereal bignum;
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
                           ftnlen, ftnlen);
    extern int dormbr_(char *, char *, char *, integer *, integer *, integer *, doublereal *,
                       integer *, doublereal *, doublereal *, integer *, doublereal *, integer *,
                       integer *, ftnlen, ftnlen, ftnlen),
        dormlq_(char *, char *, integer *, integer *, integer *, doublereal *, integer *,
                doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, ftnlen,
                ftnlen);
    integer ldwork;
    extern int dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *,
                       doublereal *, doublereal *, integer *, doublereal *, integer *, integer *,
                       ftnlen, ftnlen);
    integer minwrk, maxwrk;
    doublereal smlnum;
    logical lquery;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    --s;
    --work;
    *info = 0;
    minmn = min(*m, *n);
    maxmn = max(*m, *n);
    lquery = *lwork == -1;
    if (*m < 0) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*nrhs < 0) {
        *info = -3;
    } else if (*lda < max(1, *m)) {
        *info = -5;
    } else if (*ldb < max(1, maxmn)) {
        *info = -7;
    }
    if (*info == 0) {
        minwrk = 1;
        maxwrk = 1;
        if (minmn > 0) {
            mm = *m;
            mnthr = ilaenv_(&c__6, (char *)"DGELSS", (char *)" ", m, n, nrhs, &c_n1, (ftnlen)6, (ftnlen)1);
            if (*m >= *n && *m >= mnthr) {
                dgeqrf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, info);
                lwork_dgeqrf__ = (integer)dum[0];
                dormqr_((char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, dum, &b[b_offset], ldb, dum, &c_n1,
                        info, (ftnlen)1, (ftnlen)1);
                lwork_dormqr__ = (integer)dum[0];
                mm = *n;
                i__1 = maxwrk, i__2 = *n + lwork_dgeqrf__;
                maxwrk = max(i__1, i__2);
                i__1 = maxwrk, i__2 = *n + lwork_dormqr__;
                maxwrk = max(i__1, i__2);
            }
            if (*m >= *n) {
                i__1 = 1, i__2 = *n * 5;
                bdspac = max(i__1, i__2);
                dgebrd_(&mm, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1, info);
                lwork_dgebrd__ = (integer)dum[0];
                dormbr_((char *)"Q", (char *)"L", (char *)"T", &mm, nrhs, n, &a[a_offset], lda, dum, &b[b_offset], ldb, dum,
                        &c_n1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
                lwork_dormbr__ = (integer)dum[0];
                dorgbr_((char *)"P", n, n, n, &a[a_offset], lda, dum, dum, &c_n1, info, (ftnlen)1);
                lwork_dorgbr__ = (integer)dum[0];
                i__1 = maxwrk, i__2 = *n * 3 + lwork_dgebrd__;
                maxwrk = max(i__1, i__2);
                i__1 = maxwrk, i__2 = *n * 3 + lwork_dormbr__;
                maxwrk = max(i__1, i__2);
                i__1 = maxwrk, i__2 = *n * 3 + lwork_dorgbr__;
                maxwrk = max(i__1, i__2);
                maxwrk = max(maxwrk, bdspac);
                i__1 = maxwrk, i__2 = *n * *nrhs;
                maxwrk = max(i__1, i__2);
                i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1, i__2);
                minwrk = max(i__1, bdspac);
                maxwrk = max(minwrk, maxwrk);
            }
            if (*n > *m) {
                i__1 = 1, i__2 = *m * 5;
                bdspac = max(i__1, i__2);
                i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *n, i__1 = max(i__1, i__2);
                minwrk = max(i__1, bdspac);
                if (*n >= mnthr) {
                    dgelqf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, info);
                    lwork_dgelqf__ = (integer)dum[0];
                    dgebrd_(m, m, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1, info);
                    lwork_dgebrd__ = (integer)dum[0];
                    dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, dum, &b[b_offset], ldb,
                            dum, &c_n1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
                    lwork_dormbr__ = (integer)dum[0];
                    dorgbr_((char *)"P", m, m, m, &a[a_offset], lda, dum, dum, &c_n1, info, (ftnlen)1);
                    lwork_dorgbr__ = (integer)dum[0];
                    dormlq_((char *)"L", (char *)"T", n, nrhs, m, &a[a_offset], lda, dum, &b[b_offset], ldb, dum,
                            &c_n1, info, (ftnlen)1, (ftnlen)1);
                    lwork_dormlq__ = (integer)dum[0];
                    maxwrk = *m + lwork_dgelqf__;
                    i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + lwork_dgebrd__;
                    maxwrk = max(i__1, i__2);
                    i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + lwork_dormbr__;
                    maxwrk = max(i__1, i__2);
                    i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + lwork_dorgbr__;
                    maxwrk = max(i__1, i__2);
                    i__1 = maxwrk, i__2 = *m * *m + *m + bdspac;
                    maxwrk = max(i__1, i__2);
                    if (*nrhs > 1) {
                        i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
                        maxwrk = max(i__1, i__2);
                    } else {
                        i__1 = maxwrk, i__2 = *m * *m + (*m << 1);
                        maxwrk = max(i__1, i__2);
                    }
                    i__1 = maxwrk, i__2 = *m + lwork_dormlq__;
                    maxwrk = max(i__1, i__2);
                } else {
                    dgebrd_(m, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1, info);
                    lwork_dgebrd__ = (integer)dum[0];
                    dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, m, &a[a_offset], lda, dum, &b[b_offset], ldb,
                            dum, &c_n1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
                    lwork_dormbr__ = (integer)dum[0];
                    dorgbr_((char *)"P", m, n, m, &a[a_offset], lda, dum, dum, &c_n1, info, (ftnlen)1);
                    lwork_dorgbr__ = (integer)dum[0];
                    maxwrk = *m * 3 + lwork_dgebrd__;
                    i__1 = maxwrk, i__2 = *m * 3 + lwork_dormbr__;
                    maxwrk = max(i__1, i__2);
                    i__1 = maxwrk, i__2 = *m * 3 + lwork_dorgbr__;
                    maxwrk = max(i__1, i__2);
                    maxwrk = max(maxwrk, bdspac);
                    i__1 = maxwrk, i__2 = *n * *nrhs;
                    maxwrk = max(i__1, i__2);
                }
            }
            maxwrk = max(minwrk, maxwrk);
        }
        work[1] = (doublereal)maxwrk;
        if (*lwork < minwrk && !lquery) {
            *info = -12;
        }
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_((char *)"DGELSS", &i__1, (ftnlen)6);
        return 0;
    } else if (lquery) {
        return 0;
    }
    if (*m == 0 || *n == 0) {
        *rank = 0;
        return 0;
    }
    eps = dlamch_((char *)"P", (ftnlen)1);
    sfmin = dlamch_((char *)"S", (ftnlen)1);
    smlnum = sfmin / eps;
    bignum = 1. / smlnum;
    dlabad_(&smlnum, &bignum);
    anrm = dlange_((char *)"M", m, n, &a[a_offset], lda, &work[1], (ftnlen)1);
    iascl = 0;
    if (anrm > 0. && anrm < smlnum) {
        dlascl_((char *)"G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, info, (ftnlen)1);
        iascl = 1;
    } else if (anrm > bignum) {
        dlascl_((char *)"G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, info, (ftnlen)1);
        iascl = 2;
    } else if (anrm == 0.) {
        i__1 = max(*m, *n);
        dlaset_((char *)"F", &i__1, nrhs, &c_b46, &c_b46, &b[b_offset], ldb, (ftnlen)1);
        dlaset_((char *)"F", &minmn, &c__1, &c_b46, &c_b46, &s[1], &minmn, (ftnlen)1);
        *rank = 0;
        goto L70;
    }
    bnrm = dlange_((char *)"M", m, nrhs, &b[b_offset], ldb, &work[1], (ftnlen)1);
    ibscl = 0;
    if (bnrm > 0. && bnrm < smlnum) {
        dlascl_((char *)"G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
        ibscl = 1;
    } else if (bnrm > bignum) {
        dlascl_((char *)"G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
        ibscl = 2;
    }
    if (*m >= *n) {
        mm = *m;
        if (*m >= mnthr) {
            mm = *n;
            itau = 1;
            iwork = itau + *n;
            i__1 = *lwork - iwork + 1;
            dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__1, info);
            i__1 = *lwork - iwork + 1;
            dormqr_((char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[b_offset], ldb,
                    &work[iwork], &i__1, info, (ftnlen)1, (ftnlen)1);
            if (*n > 1) {
                i__1 = *n - 1;
                i__2 = *n - 1;
                dlaset_((char *)"L", &i__1, &i__2, &c_b46, &c_b46, &a[a_dim1 + 2], lda, (ftnlen)1);
            }
        }
        ie = 1;
        itauq = ie + *n;
        itaup = itauq + *n;
        iwork = itaup + *n;
        i__1 = *lwork - iwork + 1;
        dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup],
                &work[iwork], &i__1, info);
        i__1 = *lwork - iwork + 1;
        dormbr_((char *)"Q", (char *)"L", (char *)"T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], &b[b_offset], ldb,
                &work[iwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
        i__1 = *lwork - iwork + 1;
        dorgbr_((char *)"P", n, n, n, &a[a_offset], lda, &work[itaup], &work[iwork], &i__1, info,
                (ftnlen)1);
        iwork = ie + *n;
        dbdsqr_((char *)"U", n, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], lda, dum, &c__1,
                &b[b_offset], ldb, &work[iwork], info, (ftnlen)1);
        if (*info != 0) {
            goto L70;
        }
        d__1 = *rcond * s[1];
        thr = max(d__1, sfmin);
        if (*rcond < 0.) {
            d__1 = eps * s[1];
            thr = max(d__1, sfmin);
        }
        *rank = 0;
        i__1 = *n;
        for (i__ = 1; i__ <= i__1; ++i__) {
            if (s[i__] > thr) {
                drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb);
                ++(*rank);
            } else {
                dlaset_((char *)"F", &c__1, nrhs, &c_b46, &c_b46, &b[i__ + b_dim1], ldb, (ftnlen)1);
            }
        }
        if (*lwork >= *ldb * *nrhs && *nrhs > 1) {
            dgemm_((char *)"T", (char *)"N", n, nrhs, n, &c_b79, &a[a_offset], lda, &b[b_offset], ldb, &c_b46,
                   &work[1], ldb, (ftnlen)1, (ftnlen)1);
            dlacpy_((char *)"G", n, nrhs, &work[1], ldb, &b[b_offset], ldb, (ftnlen)1);
        } else if (*nrhs > 1) {
            chunk = *lwork / *n;
            i__1 = *nrhs;
            i__2 = chunk;
            for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
                i__3 = *nrhs - i__ + 1;
                bl = min(i__3, chunk);
                dgemm_((char *)"T", (char *)"N", n, &bl, n, &c_b79, &a[a_offset], lda, &b[i__ * b_dim1 + 1], ldb,
                       &c_b46, &work[1], n, (ftnlen)1, (ftnlen)1);
                dlacpy_((char *)"G", n, &bl, &work[1], n, &b[i__ * b_dim1 + 1], ldb, (ftnlen)1);
            }
        } else {
            dgemv_((char *)"T", n, n, &c_b79, &a[a_offset], lda, &b[b_offset], &c__1, &c_b46, &work[1],
                   &c__1, (ftnlen)1);
            dcopy_(n, &work[1], &c__1, &b[b_offset], &c__1);
        }
    } else {
        i__2 = *m, i__1 = (*m << 1) - 4, i__2 = max(i__2, i__1), i__2 = max(i__2, *nrhs),
        i__1 = *n - *m * 3;
        if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__2, i__1)) {
            ldwork = *m;
            i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3, i__4), i__3 = max(i__3, *nrhs),
            i__4 = *n - *m * 3;
            i__2 = (*m << 2) + *m * *lda + max(i__3, i__4), i__1 = *m * *lda + *m + *m * *nrhs;
            if (*lwork >= max(i__2, i__1)) {
                ldwork = *lda;
            }
            itau = 1;
            iwork = *m + 1;
            i__2 = *lwork - iwork + 1;
            dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__2, info);
            il = iwork;
            dlacpy_((char *)"L", m, m, &a[a_offset], lda, &work[il], &ldwork, (ftnlen)1);
            i__2 = *m - 1;
            i__1 = *m - 1;
            dlaset_((char *)"U", &i__2, &i__1, &c_b46, &c_b46, &work[il + ldwork], &ldwork, (ftnlen)1);
            ie = il + ldwork * *m;
            itauq = ie + *m;
            itaup = itauq + *m;
            iwork = itaup + *m;
            i__2 = *lwork - iwork + 1;
            dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], &work[itaup],
                    &work[iwork], &i__2, info);
            i__2 = *lwork - iwork + 1;
            dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, m, &work[il], &ldwork, &work[itauq], &b[b_offset], ldb,
                    &work[iwork], &i__2, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
            i__2 = *lwork - iwork + 1;
            dorgbr_((char *)"P", m, m, m, &work[il], &ldwork, &work[itaup], &work[iwork], &i__2, info,
                    (ftnlen)1);
            iwork = ie + *m;
            dbdsqr_((char *)"U", m, m, &c__0, nrhs, &s[1], &work[ie], &work[il], &ldwork, &a[a_offset], lda,
                    &b[b_offset], ldb, &work[iwork], info, (ftnlen)1);
            if (*info != 0) {
                goto L70;
            }
            d__1 = *rcond * s[1];
            thr = max(d__1, sfmin);
            if (*rcond < 0.) {
                d__1 = eps * s[1];
                thr = max(d__1, sfmin);
            }
            *rank = 0;
            i__2 = *m;
            for (i__ = 1; i__ <= i__2; ++i__) {
                if (s[i__] > thr) {
                    drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb);
                    ++(*rank);
                } else {
                    dlaset_((char *)"F", &c__1, nrhs, &c_b46, &c_b46, &b[i__ + b_dim1], ldb, (ftnlen)1);
                }
            }
            iwork = ie;
            if (*lwork >= *ldb * *nrhs + iwork - 1 && *nrhs > 1) {
                dgemm_((char *)"T", (char *)"N", m, nrhs, m, &c_b79, &work[il], &ldwork, &b[b_offset], ldb, &c_b46,
                       &work[iwork], ldb, (ftnlen)1, (ftnlen)1);
                dlacpy_((char *)"G", m, nrhs, &work[iwork], ldb, &b[b_offset], ldb, (ftnlen)1);
            } else if (*nrhs > 1) {
                chunk = (*lwork - iwork + 1) / *m;
                i__2 = *nrhs;
                i__1 = chunk;
                for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
                    i__3 = *nrhs - i__ + 1;
                    bl = min(i__3, chunk);
                    dgemm_((char *)"T", (char *)"N", m, &bl, m, &c_b79, &work[il], &ldwork, &b[i__ * b_dim1 + 1],
                           ldb, &c_b46, &work[iwork], m, (ftnlen)1, (ftnlen)1);
                    dlacpy_((char *)"G", m, &bl, &work[iwork], m, &b[i__ * b_dim1 + 1], ldb, (ftnlen)1);
                }
            } else {
                dgemv_((char *)"T", m, m, &c_b79, &work[il], &ldwork, &b[b_dim1 + 1], &c__1, &c_b46,
                       &work[iwork], &c__1, (ftnlen)1);
                dcopy_(m, &work[iwork], &c__1, &b[b_dim1 + 1], &c__1);
            }
            i__1 = *n - *m;
            dlaset_((char *)"F", &i__1, nrhs, &c_b46, &c_b46, &b[*m + 1 + b_dim1], ldb, (ftnlen)1);
            iwork = itau + *m;
            i__1 = *lwork - iwork + 1;
            dormlq_((char *)"L", (char *)"T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[b_offset], ldb,
                    &work[iwork], &i__1, info, (ftnlen)1, (ftnlen)1);
        } else {
            ie = 1;
            itauq = ie + *m;
            itaup = itauq + *m;
            iwork = itaup + *m;
            i__1 = *lwork - iwork + 1;
            dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup],
                    &work[iwork], &i__1, info);
            i__1 = *lwork - iwork + 1;
            dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, &work[itauq], &b[b_offset], ldb,
                    &work[iwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
            i__1 = *lwork - iwork + 1;
            dorgbr_((char *)"P", m, n, m, &a[a_offset], lda, &work[itaup], &work[iwork], &i__1, info,
                    (ftnlen)1);
            iwork = ie + *m;
            dbdsqr_((char *)"L", m, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], lda, dum, &c__1,
                    &b[b_offset], ldb, &work[iwork], info, (ftnlen)1);
            if (*info != 0) {
                goto L70;
            }
            d__1 = *rcond * s[1];
            thr = max(d__1, sfmin);
            if (*rcond < 0.) {
                d__1 = eps * s[1];
                thr = max(d__1, sfmin);
            }
            *rank = 0;
            i__1 = *m;
            for (i__ = 1; i__ <= i__1; ++i__) {
                if (s[i__] > thr) {
                    drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb);
                    ++(*rank);
                } else {
                    dlaset_((char *)"F", &c__1, nrhs, &c_b46, &c_b46, &b[i__ + b_dim1], ldb, (ftnlen)1);
                }
            }
            if (*lwork >= *ldb * *nrhs && *nrhs > 1) {
                dgemm_((char *)"T", (char *)"N", n, nrhs, m, &c_b79, &a[a_offset], lda, &b[b_offset], ldb, &c_b46,
                       &work[1], ldb, (ftnlen)1, (ftnlen)1);
                dlacpy_((char *)"F", n, nrhs, &work[1], ldb, &b[b_offset], ldb, (ftnlen)1);
            } else if (*nrhs > 1) {
                chunk = *lwork / *n;
                i__1 = *nrhs;
                i__2 = chunk;
                for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
                    i__3 = *nrhs - i__ + 1;
                    bl = min(i__3, chunk);
                    dgemm_((char *)"T", (char *)"N", n, &bl, m, &c_b79, &a[a_offset], lda, &b[i__ * b_dim1 + 1],
                           ldb, &c_b46, &work[1], n, (ftnlen)1, (ftnlen)1);
                    dlacpy_((char *)"F", n, &bl, &work[1], n, &b[i__ * b_dim1 + 1], ldb, (ftnlen)1);
                }
            } else {
                dgemv_((char *)"T", m, n, &c_b79, &a[a_offset], lda, &b[b_offset], &c__1, &c_b46, &work[1],
                       &c__1, (ftnlen)1);
                dcopy_(n, &work[1], &c__1, &b[b_offset], &c__1);
            }
        }
    }
    if (iascl == 1) {
        dlascl_((char *)"G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
        dlascl_((char *)"G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &minmn, info, (ftnlen)1);
    } else if (iascl == 2) {
        dlascl_((char *)"G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
        dlascl_((char *)"G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &minmn, info, (ftnlen)1);
    }
    if (ibscl == 1) {
        dlascl_((char *)"G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
    } else if (ibscl == 2) {
        dlascl_((char *)"G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
    }
 L70:
    work[1] = (doublereal)maxwrk;
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dgelss.f
+++ b/lib/linalg/dgelss.f
@ -1,744 +0,0 @@
 *> \brief <b> DGELSS solves overdetermined or underdetermined systems for GE matrices</b>
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DGELSS + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelss.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelss.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelss.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
 *                          WORK, LWORK, INFO )
 *
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
 *       DOUBLE PRECISION   RCOND
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DGELSS computes the minimum norm solution to a real linear least
 *> squares problem:
 *>
 *> Minimize 2-norm(| b - A*x |).
 *>
 *> using the singular value decomposition (SVD) of A. A is an M-by-N
 *> matrix which may be rank-deficient.
 *>
 *> Several right hand side vectors b and solution vectors x can be
 *> handled in a single call; they are stored as the columns of the
 *> M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
 *> X.
 *>
 *> The effective rank of A is determined by treating as zero those
 *> singular values which are less than RCOND times the largest singular
 *> value.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] M
 *> \verbatim
 *>          M is INTEGER
 *>          The number of rows of the matrix A. M >= 0.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The number of columns of the matrix A. N >= 0.
 *> \endverbatim
 *>
 *> \param[in] NRHS
 *> \verbatim
 *>          NRHS is INTEGER
 *>          The number of right hand sides, i.e., the number of columns
 *>          of the matrices B and X. NRHS >= 0.
 *> \endverbatim
 *>
 *> \param[in,out] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension (LDA,N)
 *>          On entry, the M-by-N matrix A.
 *>          On exit, the first min(m,n) rows of A are overwritten with
 *>          its right singular vectors, stored rowwise.
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>          The leading dimension of the array A.  LDA >= max(1,M).
 *> \endverbatim
 *>
 *> \param[in,out] B
 *> \verbatim
 *>          B is DOUBLE PRECISION array, dimension (LDB,NRHS)
 *>          On entry, the M-by-NRHS right hand side matrix B.
 *>          On exit, B is overwritten by the N-by-NRHS solution
 *>          matrix X.  If m >= n and RANK = n, the residual
 *>          sum-of-squares for the solution in the i-th column is given
 *>          by the sum of squares of elements n+1:m in that column.
 *> \endverbatim
 *>
 *> \param[in] LDB
 *> \verbatim
 *>          LDB is INTEGER
 *>          The leading dimension of the array B. LDB >= max(1,max(M,N)).
 *> \endverbatim
 *>
 *> \param[out] S
 *> \verbatim
 *>          S is DOUBLE PRECISION array, dimension (min(M,N))
 *>          The singular values of A in decreasing order.
 *>          The condition number of A in the 2-norm = S(1)/S(min(m,n)).
 *> \endverbatim
 *>
 *> \param[in] RCOND
 *> \verbatim
 *>          RCOND is DOUBLE PRECISION
 *>          RCOND is used to determine the effective rank of A.
 *>          Singular values S(i) <= RCOND*S(1) are treated as zero.
 *>          If RCOND < 0, machine precision is used instead.
 *> \endverbatim
 *>
 *> \param[out] RANK
 *> \verbatim
 *>          RANK is INTEGER
 *>          The effective rank of A, i.e., the number of singular values
 *>          which are greater than RCOND*S(1).
 *> \endverbatim
 *>
 *> \param[out] WORK
 *> \verbatim
 *>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
 *>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 *> \endverbatim
 *>
 *> \param[in] LWORK
 *> \verbatim
 *>          LWORK is INTEGER
 *>          The dimension of the array WORK. LWORK >= 1, and also:
 *>          LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS )
 *>          For good performance, LWORK should generally be larger.
 *>
 *>          If LWORK = -1, then a workspace query is assumed; the routine
 *>          only calculates the optimal size of the WORK array, returns
 *>          this value as the first entry of the WORK array, and no error
 *>          message related to LWORK is issued by XERBLA.
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0:  successful exit
 *>          < 0:  if INFO = -i, the i-th argument had an illegal value.
 *>          > 0:  the algorithm for computing the SVD failed to converge;
 *>                if INFO = i, i off-diagonal elements of an intermediate
 *>                bidiagonal form did not converge to zero.
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup doubleGEsolve
 *
 *  =====================================================================
      SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
     $                   WORK, LWORK, INFO )
 *
 *  -- LAPACK driver routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
      DOUBLE PRECISION   RCOND
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ZERO, ONE
      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
 *     ..
 *     .. Local Scalars ..
      LOGICAL            LQUERY
      INTEGER            BDSPAC, BL, CHUNK, I, IASCL, IBSCL, IE, IL,
     $                   ITAU, ITAUP, ITAUQ, IWORK, LDWORK, MAXMN,
     $                   MAXWRK, MINMN, MINWRK, MM, MNTHR
      INTEGER            LWORK_DGEQRF, LWORK_DORMQR, LWORK_DGEBRD,
     $                   LWORK_DORMBR, LWORK_DORGBR, LWORK_DORMLQ,
     $                   LWORK_DGELQF
      DOUBLE PRECISION   ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM, THR
 *     ..
 *     .. Local Arrays ..
      DOUBLE PRECISION   DUM( 1 )
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DBDSQR, DCOPY, DGEBRD, DGELQF, DGEMM, DGEMV,
     $                   DGEQRF, DLABAD, DLACPY, DLASCL, DLASET, DORGBR,
     $                   DORMBR, DORMLQ, DORMQR, DRSCL, XERBLA
 *     ..
 *     .. External Functions ..
      INTEGER            ILAENV
      DOUBLE PRECISION   DLAMCH, DLANGE
      EXTERNAL           ILAENV, DLAMCH, DLANGE
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          MAX, MIN
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input arguments
 *
      INFO = 0
      MINMN = MIN( M, N )
      MAXMN = MAX( M, N )
      LQUERY = ( LWORK.EQ.-1 )
      IF( M.LT.0 ) THEN
         INFO = -1
      ELSE IF( N.LT.0 ) THEN
         INFO = -2
      ELSE IF( NRHS.LT.0 ) THEN
         INFO = -3
      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
         INFO = -5
      ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN
         INFO = -7
      END IF
 *
 *     Compute workspace
 *      (Note: Comments in the code beginning "Workspace:" describe the
 *       minimal amount of workspace needed at that point in the code,
 *       as well as the preferred amount for good performance.
 *       NB refers to the optimal block size for the immediately
 *       following subroutine, as returned by ILAENV.)
 *
      IF( INFO.EQ.0 ) THEN
         MINWRK = 1
         MAXWRK = 1
         IF( MINMN.GT.0 ) THEN
            MM = M
            MNTHR = ILAENV( 6, 'DGELSS', ' ', M, N, NRHS, -1 )
            IF( M.GE.N .AND. M.GE.MNTHR ) THEN
 *
 *              Path 1a - overdetermined, with many more rows than
 *                        columns
 *
 *              Compute space needed for DGEQRF
               CALL DGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, INFO )
               LWORK_DGEQRF = INT( DUM(1) )
 *              Compute space needed for DORMQR
               CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, DUM(1), B,
     $                   LDB, DUM(1), -1, INFO )
               LWORK_DORMQR = INT( DUM(1) )
               MM = N
               MAXWRK = MAX( MAXWRK, N + LWORK_DGEQRF )
               MAXWRK = MAX( MAXWRK, N + LWORK_DORMQR )
            END IF
            IF( M.GE.N ) THEN
 *
 *              Path 1 - overdetermined or exactly determined
 *
 *              Compute workspace needed for DBDSQR
 *
               BDSPAC = MAX( 1, 5*N )
 *              Compute space needed for DGEBRD
               CALL DGEBRD( MM, N, A, LDA, S, DUM(1), DUM(1),
     $                      DUM(1), DUM(1), -1, INFO )
               LWORK_DGEBRD = INT( DUM(1) )
 *              Compute space needed for DORMBR
               CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, DUM(1),
     $                B, LDB, DUM(1), -1, INFO )
               LWORK_DORMBR = INT( DUM(1) )
 *              Compute space needed for DORGBR
               CALL DORGBR( 'P', N, N, N, A, LDA, DUM(1),
     $                   DUM(1), -1, INFO )
               LWORK_DORGBR = INT( DUM(1) )
 *              Compute total workspace needed
               MAXWRK = MAX( MAXWRK, 3*N + LWORK_DGEBRD )
               MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORMBR )
               MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR )
               MAXWRK = MAX( MAXWRK, BDSPAC )
               MAXWRK = MAX( MAXWRK, N*NRHS )
               MINWRK = MAX( 3*N + MM, 3*N + NRHS, BDSPAC )
               MAXWRK = MAX( MINWRK, MAXWRK )
            END IF
            IF( N.GT.M ) THEN
 *
 *              Compute workspace needed for DBDSQR
 *
               BDSPAC = MAX( 1, 5*M )
               MINWRK = MAX( 3*M+NRHS, 3*M+N, BDSPAC )
               IF( N.GE.MNTHR ) THEN
 *
 *                 Path 2a - underdetermined, with many more columns
 *                 than rows
 *
 *                 Compute space needed for DGELQF
                  CALL DGELQF( M, N, A, LDA, DUM(1), DUM(1),
     $                -1, INFO )
                  LWORK_DGELQF = INT( DUM(1) )
 *                 Compute space needed for DGEBRD
                  CALL DGEBRD( M, M, A, LDA, S, DUM(1), DUM(1),
     $                      DUM(1), DUM(1), -1, INFO )
                  LWORK_DGEBRD = INT( DUM(1) )
 *                 Compute space needed for DORMBR
                  CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA,
     $                DUM(1), B, LDB, DUM(1), -1, INFO )
                  LWORK_DORMBR = INT( DUM(1) )
 *                 Compute space needed for DORGBR
                  CALL DORGBR( 'P', M, M, M, A, LDA, DUM(1),
     $                   DUM(1), -1, INFO )
                  LWORK_DORGBR = INT( DUM(1) )
 *                 Compute space needed for DORMLQ
                  CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, DUM(1),
     $                 B, LDB, DUM(1), -1, INFO )
                  LWORK_DORMLQ = INT( DUM(1) )
 *                 Compute total workspace needed
                  MAXWRK = M + LWORK_DGELQF
                  MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_DGEBRD )
                  MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_DORMBR )
                  MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_DORGBR )
                  MAXWRK = MAX( MAXWRK, M*M + M + BDSPAC )
                  IF( NRHS.GT.1 ) THEN
                     MAXWRK = MAX( MAXWRK, M*M + M + M*NRHS )
                  ELSE
                     MAXWRK = MAX( MAXWRK, M*M + 2*M )
                  END IF
                  MAXWRK = MAX( MAXWRK, M + LWORK_DORMLQ )
               ELSE
 *
 *                 Path 2 - underdetermined
 *
 *                 Compute space needed for DGEBRD
                  CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
     $                      DUM(1), DUM(1), -1, INFO )
                  LWORK_DGEBRD = INT( DUM(1) )
 *                 Compute space needed for DORMBR
                  CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, A, LDA,
     $                DUM(1), B, LDB, DUM(1), -1, INFO )
                  LWORK_DORMBR = INT( DUM(1) )
 *                 Compute space needed for DORGBR
                  CALL DORGBR( 'P', M, N, M, A, LDA, DUM(1),
     $                   DUM(1), -1, INFO )
                  LWORK_DORGBR = INT( DUM(1) )
                  MAXWRK = 3*M + LWORK_DGEBRD
                  MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORMBR )
                  MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR )
                  MAXWRK = MAX( MAXWRK, BDSPAC )
                  MAXWRK = MAX( MAXWRK, N*NRHS )
               END IF
            END IF
            MAXWRK = MAX( MINWRK, MAXWRK )
         END IF
         WORK( 1 ) = MAXWRK
 *
         IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY )
     $      INFO = -12
      END IF
 *
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'DGELSS', -INFO )
         RETURN
      ELSE IF( LQUERY ) THEN
         RETURN
      END IF
 *
 *     Quick return if possible
 *
      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
         RANK = 0
         RETURN
      END IF
 *
 *     Get machine parameters
 *
      EPS = DLAMCH( 'P' )
      SFMIN = DLAMCH( 'S' )
      SMLNUM = SFMIN / EPS
      BIGNUM = ONE / SMLNUM
      CALL DLABAD( SMLNUM, BIGNUM )
 *
 *     Scale A if max element outside range [SMLNUM,BIGNUM]
 *
      ANRM = DLANGE( 'M', M, N, A, LDA, WORK )
      IASCL = 0
      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
 *
 *        Scale matrix norm up to SMLNUM
 *
         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
         IASCL = 1
      ELSE IF( ANRM.GT.BIGNUM ) THEN
 *
 *        Scale matrix norm down to BIGNUM
 *
         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
         IASCL = 2
      ELSE IF( ANRM.EQ.ZERO ) THEN
 *
 *        Matrix all zero. Return zero solution.
 *
         CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
         CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, MINMN )
         RANK = 0
         GO TO 70
      END IF
 *
 *     Scale B if max element outside range [SMLNUM,BIGNUM]
 *
      BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK )
      IBSCL = 0
      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
 *
 *        Scale matrix norm up to SMLNUM
 *
         CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
         IBSCL = 1
      ELSE IF( BNRM.GT.BIGNUM ) THEN
 *
 *        Scale matrix norm down to BIGNUM
 *
         CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
         IBSCL = 2
      END IF
 *
 *     Overdetermined case
 *
      IF( M.GE.N ) THEN
 *
 *        Path 1 - overdetermined or exactly determined
 *
         MM = M
         IF( M.GE.MNTHR ) THEN
 *
 *           Path 1a - overdetermined, with many more rows than columns
 *
            MM = N
            ITAU = 1
            IWORK = ITAU + N
 *
 *           Compute A=Q*R
 *           (Workspace: need 2*N, prefer N+N*NB)
 *
            CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
     $                   LWORK-IWORK+1, INFO )
 *
 *           Multiply B by transpose(Q)
 *           (Workspace: need N+NRHS, prefer N+NRHS*NB)
 *
            CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B,
     $                   LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
 *
 *           Zero out below R
 *
            IF( N.GT.1 )
     $         CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
         END IF
 *
         IE = 1
         ITAUQ = IE + N
         ITAUP = ITAUQ + N
         IWORK = ITAUP + N
 *
 *        Bidiagonalize R in A
 *        (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
 *
         CALL DGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
     $                WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
     $                INFO )
 *
 *        Multiply B by transpose of left bidiagonalizing vectors of R
 *        (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
 *
         CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ),
     $                B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
 *
 *        Generate right bidiagonalizing vectors of R in A
 *        (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
 *
         CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
     $                WORK( IWORK ), LWORK-IWORK+1, INFO )
         IWORK = IE + N
 *
 *        Perform bidiagonal QR iteration
 *          multiply B by transpose of left singular vectors
 *          compute right singular vectors in A
 *        (Workspace: need BDSPAC)
 *
         CALL DBDSQR( 'U', N, N, 0, NRHS, S, WORK( IE ), A, LDA, DUM,
     $                1, B, LDB, WORK( IWORK ), INFO )
         IF( INFO.NE.0 )
     $      GO TO 70
 *
 *        Multiply B by reciprocals of singular values
 *
         THR = MAX( RCOND*S( 1 ), SFMIN )
         IF( RCOND.LT.ZERO )
     $      THR = MAX( EPS*S( 1 ), SFMIN )
         RANK = 0
         DO 10 I = 1, N
            IF( S( I ).GT.THR ) THEN
               CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
               RANK = RANK + 1
            ELSE
               CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
            END IF
   10    CONTINUE
 *
 *        Multiply B by right singular vectors
 *        (Workspace: need N, prefer N*NRHS)
 *
         IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
            CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, A, LDA, B, LDB, ZERO,
     $                  WORK, LDB )
            CALL DLACPY( 'G', N, NRHS, WORK, LDB, B, LDB )
         ELSE IF( NRHS.GT.1 ) THEN
            CHUNK = LWORK / N
            DO 20 I = 1, NRHS, CHUNK
               BL = MIN( NRHS-I+1, CHUNK )
               CALL DGEMM( 'T', 'N', N, BL, N, ONE, A, LDA, B( 1, I ),
     $                     LDB, ZERO, WORK, N )
               CALL DLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB )
   20       CONTINUE
         ELSE
            CALL DGEMV( 'T', N, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
            CALL DCOPY( N, WORK, 1, B, 1 )
         END IF
 *
      ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+
     $         MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN
 *
 *        Path 2a - underdetermined, with many more columns than rows
 *        and sufficient workspace for an efficient algorithm
 *
         LDWORK = M
         IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ),
     $       M*LDA+M+M*NRHS ) )LDWORK = LDA
         ITAU = 1
         IWORK = M + 1
 *
 *        Compute A=L*Q
 *        (Workspace: need 2*M, prefer M+M*NB)
 *
         CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
     $                LWORK-IWORK+1, INFO )
         IL = IWORK
 *
 *        Copy L to WORK(IL), zeroing out above it
 *
         CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK )
         CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ),
     $                LDWORK )
         IE = IL + LDWORK*M
         ITAUQ = IE + M
         ITAUP = ITAUQ + M
         IWORK = ITAUP + M
 *
 *        Bidiagonalize L in WORK(IL)
 *        (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
 *
         CALL DGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ),
     $                WORK( ITAUQ ), WORK( ITAUP ), WORK( IWORK ),
     $                LWORK-IWORK+1, INFO )
 *
 *        Multiply B by transpose of left bidiagonalizing vectors of L
 *        (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
 *
         CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK,
     $                WORK( ITAUQ ), B, LDB, WORK( IWORK ),
     $                LWORK-IWORK+1, INFO )
 *
 *        Generate right bidiagonalizing vectors of R in WORK(IL)
 *        (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB)
 *
         CALL DORGBR( 'P', M, M, M, WORK( IL ), LDWORK, WORK( ITAUP ),
     $                WORK( IWORK ), LWORK-IWORK+1, INFO )
         IWORK = IE + M
 *
 *        Perform bidiagonal QR iteration,
 *           computing right singular vectors of L in WORK(IL) and
 *           multiplying B by transpose of left singular vectors
 *        (Workspace: need M*M+M+BDSPAC)
 *
         CALL DBDSQR( 'U', M, M, 0, NRHS, S, WORK( IE ), WORK( IL ),
     $                LDWORK, A, LDA, B, LDB, WORK( IWORK ), INFO )
         IF( INFO.NE.0 )
     $      GO TO 70
 *
 *        Multiply B by reciprocals of singular values
 *
         THR = MAX( RCOND*S( 1 ), SFMIN )
         IF( RCOND.LT.ZERO )
     $      THR = MAX( EPS*S( 1 ), SFMIN )
         RANK = 0
         DO 30 I = 1, M
            IF( S( I ).GT.THR ) THEN
               CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
               RANK = RANK + 1
            ELSE
               CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
            END IF
   30    CONTINUE
         IWORK = IE
 *
 *        Multiply B by right singular vectors of L in WORK(IL)
 *        (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS)
 *
         IF( LWORK.GE.LDB*NRHS+IWORK-1 .AND. NRHS.GT.1 ) THEN
            CALL DGEMM( 'T', 'N', M, NRHS, M, ONE, WORK( IL ), LDWORK,
     $                  B, LDB, ZERO, WORK( IWORK ), LDB )
            CALL DLACPY( 'G', M, NRHS, WORK( IWORK ), LDB, B, LDB )
         ELSE IF( NRHS.GT.1 ) THEN
            CHUNK = ( LWORK-IWORK+1 ) / M
            DO 40 I = 1, NRHS, CHUNK
               BL = MIN( NRHS-I+1, CHUNK )
               CALL DGEMM( 'T', 'N', M, BL, M, ONE, WORK( IL ), LDWORK,
     $                     B( 1, I ), LDB, ZERO, WORK( IWORK ), M )
               CALL DLACPY( 'G', M, BL, WORK( IWORK ), M, B( 1, I ),
     $                      LDB )
   40       CONTINUE
         ELSE
            CALL DGEMV( 'T', M, M, ONE, WORK( IL ), LDWORK, B( 1, 1 ),
     $                  1, ZERO, WORK( IWORK ), 1 )
            CALL DCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 )
         END IF
 *
 *        Zero out below first M rows of B
 *
         CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
         IWORK = ITAU + M
 *
 *        Multiply transpose(Q) by B
 *        (Workspace: need M+NRHS, prefer M+NRHS*NB)
 *
         CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B,
     $                LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
 *
      ELSE
 *
 *        Path 2 - remaining underdetermined cases
 *
         IE = 1
         ITAUQ = IE + M
         ITAUP = ITAUQ + M
         IWORK = ITAUP + M
 *
 *        Bidiagonalize A
 *        (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
 *
         CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
     $                WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
     $                INFO )
 *
 *        Multiply B by transpose of left bidiagonalizing vectors
 *        (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
 *
         CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ),
     $                B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
 *
 *        Generate right bidiagonalizing vectors in A
 *        (Workspace: need 4*M, prefer 3*M+M*NB)
 *
         CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
     $                WORK( IWORK ), LWORK-IWORK+1, INFO )
         IWORK = IE + M
 *
 *        Perform bidiagonal QR iteration,
 *           computing right singular vectors of A in A and
 *           multiplying B by transpose of left singular vectors
 *        (Workspace: need BDSPAC)
 *
         CALL DBDSQR( 'L', M, N, 0, NRHS, S, WORK( IE ), A, LDA, DUM,
     $                1, B, LDB, WORK( IWORK ), INFO )
         IF( INFO.NE.0 )
     $      GO TO 70
 *
 *        Multiply B by reciprocals of singular values
 *
         THR = MAX( RCOND*S( 1 ), SFMIN )
         IF( RCOND.LT.ZERO )
     $      THR = MAX( EPS*S( 1 ), SFMIN )
         RANK = 0
         DO 50 I = 1, M
            IF( S( I ).GT.THR ) THEN
               CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
               RANK = RANK + 1
            ELSE
               CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
            END IF
   50    CONTINUE
 *
 *        Multiply B by right singular vectors of A
 *        (Workspace: need N, prefer N*NRHS)
 *
         IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
            CALL DGEMM( 'T', 'N', N, NRHS, M, ONE, A, LDA, B, LDB, ZERO,
     $                  WORK, LDB )
            CALL DLACPY( 'F', N, NRHS, WORK, LDB, B, LDB )
         ELSE IF( NRHS.GT.1 ) THEN
            CHUNK = LWORK / N
            DO 60 I = 1, NRHS, CHUNK
               BL = MIN( NRHS-I+1, CHUNK )
               CALL DGEMM( 'T', 'N', N, BL, M, ONE, A, LDA, B( 1, I ),
     $                     LDB, ZERO, WORK, N )
               CALL DLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB )
   60       CONTINUE
         ELSE
            CALL DGEMV( 'T', M, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
            CALL DCOPY( N, WORK, 1, B, 1 )
         END IF
      END IF
 *
 *     Undo scaling
 *
      IF( IASCL.EQ.1 ) THEN
         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
         CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
     $                INFO )
      ELSE IF( IASCL.EQ.2 ) THEN
         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
         CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
     $                INFO )
      END IF
      IF( IBSCL.EQ.1 ) THEN
         CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
      ELSE IF( IBSCL.EQ.2 ) THEN
         CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
      END IF
 *
   70 CONTINUE
      WORK( 1 ) = MAXWRK
      RETURN
 *
 *     End of DGELSS
 *
      END
--- a/lib/linalg/dgemm.cpp
+++ b/lib/linalg/dgemm.cpp
@ -0,0 +1,173 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 int dgemm_(char *transa, char *transb, integer *m, integer *n, integer *k, doublereal *alpha,
           doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *beta,
           doublereal *c__, integer *ldc, ftnlen transa_len, ftnlen transb_len)
 {
    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3;
    integer i__, j, l, info;
    logical nota, notb;
    doublereal temp;
    extern logical lsame_(char *, char *, ftnlen, ftnlen);
    integer nrowa, nrowb;
    extern int xerbla_(char *, integer *, ftnlen);
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1;
    c__ -= c_offset;
    nota = lsame_(transa, (char *)"N", (ftnlen)1, (ftnlen)1);
    notb = lsame_(transb, (char *)"N", (ftnlen)1, (ftnlen)1);
    if (nota) {
        nrowa = *m;
    } else {
        nrowa = *k;
    }
    if (notb) {
        nrowb = *k;
    } else {
        nrowb = *n;
    }
    info = 0;
    if (!nota && !lsame_(transa, (char *)"C", (ftnlen)1, (ftnlen)1) &&
        !lsame_(transa, (char *)"T", (ftnlen)1, (ftnlen)1)) {
        info = 1;
    } else if (!notb && !lsame_(transb, (char *)"C", (ftnlen)1, (ftnlen)1) &&
               !lsame_(transb, (char *)"T", (ftnlen)1, (ftnlen)1)) {
        info = 2;
    } else if (*m < 0) {
        info = 3;
    } else if (*n < 0) {
        info = 4;
    } else if (*k < 0) {
        info = 5;
    } else if (*lda < max(1, nrowa)) {
        info = 8;
    } else if (*ldb < max(1, nrowb)) {
        info = 10;
    } else if (*ldc < max(1, *m)) {
        info = 13;
    }
    if (info != 0) {
        xerbla_((char *)"DGEMM ", &info, (ftnlen)6);
        return 0;
    }
    if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
        return 0;
    }
    if (*alpha == 0.) {
        if (*beta == 0.) {
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                i__2 = *m;
                for (i__ = 1; i__ <= i__2; ++i__) {
                    c__[i__ + j * c_dim1] = 0.;
                }
            }
        } else {
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                i__2 = *m;
                for (i__ = 1; i__ <= i__2; ++i__) {
                    c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
                }
            }
        }
        return 0;
    }
    if (notb) {
        if (nota) {
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                if (*beta == 0.) {
                    i__2 = *m;
                    for (i__ = 1; i__ <= i__2; ++i__) {
                        c__[i__ + j * c_dim1] = 0.;
                    }
                } else if (*beta != 1.) {
                    i__2 = *m;
                    for (i__ = 1; i__ <= i__2; ++i__) {
                        c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
                    }
                }
                i__2 = *k;
                for (l = 1; l <= i__2; ++l) {
                    temp = *alpha * b[l + j * b_dim1];
                    i__3 = *m;
                    for (i__ = 1; i__ <= i__3; ++i__) {
                        c__[i__ + j * c_dim1] += temp * a[i__ + l * a_dim1];
                    }
                }
            }
        } else {
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                i__2 = *m;
                for (i__ = 1; i__ <= i__2; ++i__) {
                    temp = 0.;
                    i__3 = *k;
                    for (l = 1; l <= i__3; ++l) {
                        temp += a[l + i__ * a_dim1] * b[l + j * b_dim1];
                    }
                    if (*beta == 0.) {
                        c__[i__ + j * c_dim1] = *alpha * temp;
                    } else {
                        c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[i__ + j * c_dim1];
                    }
                }
            }
        }
    } else {
        if (nota) {
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                if (*beta == 0.) {
                    i__2 = *m;
                    for (i__ = 1; i__ <= i__2; ++i__) {
                        c__[i__ + j * c_dim1] = 0.;
                    }
                } else if (*beta != 1.) {
                    i__2 = *m;
                    for (i__ = 1; i__ <= i__2; ++i__) {
                        c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
                    }
                }
                i__2 = *k;
                for (l = 1; l <= i__2; ++l) {
                    temp = *alpha * b[j + l * b_dim1];
                    i__3 = *m;
                    for (i__ = 1; i__ <= i__3; ++i__) {
                        c__[i__ + j * c_dim1] += temp * a[i__ + l * a_dim1];
                    }
                }
            }
        } else {
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                i__2 = *m;
                for (i__ = 1; i__ <= i__2; ++i__) {
                    temp = 0.;
                    i__3 = *k;
                    for (l = 1; l <= i__3; ++l) {
                        temp += a[l + i__ * a_dim1] * b[j + l * b_dim1];
                    }
                    if (*beta == 0.) {
                        c__[i__ + j * c_dim1] = *alpha * temp;
                    } else {
                        c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[i__ + j * c_dim1];
                    }
                }
            }
        }
    }
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dgemm.f
+++ b/lib/linalg/dgemm.f
@ -1,379 +0,0 @@
 *> \brief \b DGEMM
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
 *
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION ALPHA,BETA
 *       INTEGER K,LDA,LDB,LDC,M,N
 *       CHARACTER TRANSA,TRANSB
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DGEMM  performs one of the matrix-matrix operations
 *>
 *>    C := alpha*op( A )*op( B ) + beta*C,
 *>
 *> where  op( X ) is one of
 *>
 *>    op( X ) = X   or   op( X ) = X**T,
 *>
 *> alpha and beta are scalars, and A, B and C are matrices, with op( A )
 *> an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] TRANSA
 *> \verbatim
 *>          TRANSA is CHARACTER*1
 *>           On entry, TRANSA specifies the form of op( A ) to be used in
 *>           the matrix multiplication as follows:
 *>
 *>              TRANSA = 'N' or 'n',  op( A ) = A.
 *>
 *>              TRANSA = 'T' or 't',  op( A ) = A**T.
 *>
 *>              TRANSA = 'C' or 'c',  op( A ) = A**T.
 *> \endverbatim
 *>
 *> \param[in] TRANSB
 *> \verbatim
 *>          TRANSB is CHARACTER*1
 *>           On entry, TRANSB specifies the form of op( B ) to be used in
 *>           the matrix multiplication as follows:
 *>
 *>              TRANSB = 'N' or 'n',  op( B ) = B.
 *>
 *>              TRANSB = 'T' or 't',  op( B ) = B**T.
 *>
 *>              TRANSB = 'C' or 'c',  op( B ) = B**T.
 *> \endverbatim
 *>
 *> \param[in] M
 *> \verbatim
 *>          M is INTEGER
 *>           On entry,  M  specifies  the number  of rows  of the  matrix
 *>           op( A )  and of the  matrix  C.  M  must  be at least  zero.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>           On entry,  N  specifies the number  of columns of the matrix
 *>           op( B ) and the number of columns of the matrix C. N must be
 *>           at least zero.
 *> \endverbatim
 *>
 *> \param[in] K
 *> \verbatim
 *>          K is INTEGER
 *>           On entry,  K  specifies  the number of columns of the matrix
 *>           op( A ) and the number of rows of the matrix op( B ). K must
 *>           be at least  zero.
 *> \endverbatim
 *>
 *> \param[in] ALPHA
 *> \verbatim
 *>          ALPHA is DOUBLE PRECISION.
 *>           On entry, ALPHA specifies the scalar alpha.
 *> \endverbatim
 *>
 *> \param[in] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension ( LDA, ka ), where ka is
 *>           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
 *>           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
 *>           part of the array  A  must contain the matrix  A,  otherwise
 *>           the leading  k by m  part of the array  A  must contain  the
 *>           matrix A.
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>           On entry, LDA specifies the first dimension of A as declared
 *>           in the calling (sub) program. When  TRANSA = 'N' or 'n' then
 *>           LDA must be at least  max( 1, m ), otherwise  LDA must be at
 *>           least  max( 1, k ).
 *> \endverbatim
 *>
 *> \param[in] B
 *> \verbatim
 *>          B is DOUBLE PRECISION array, dimension ( LDB, kb ), where kb is
 *>           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
 *>           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
 *>           part of the array  B  must contain the matrix  B,  otherwise
 *>           the leading  n by k  part of the array  B  must contain  the
 *>           matrix B.
 *> \endverbatim
 *>
 *> \param[in] LDB
 *> \verbatim
 *>          LDB is INTEGER
 *>           On entry, LDB specifies the first dimension of B as declared
 *>           in the calling (sub) program. When  TRANSB = 'N' or 'n' then
 *>           LDB must be at least  max( 1, k ), otherwise  LDB must be at
 *>           least  max( 1, n ).
 *> \endverbatim
 *>
 *> \param[in] BETA
 *> \verbatim
 *>          BETA is DOUBLE PRECISION.
 *>           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
 *>           supplied as zero then C need not be set on input.
 *> \endverbatim
 *>
 *> \param[in,out] C
 *> \verbatim
 *>          C is DOUBLE PRECISION array, dimension ( LDC, N )
 *>           Before entry, the leading  m by n  part of the array  C must
 *>           contain the matrix  C,  except when  beta  is zero, in which
 *>           case C need not be set on entry.
 *>           On exit, the array  C  is overwritten by the  m by n  matrix
 *>           ( alpha*op( A )*op( B ) + beta*C ).
 *> \endverbatim
 *>
 *> \param[in] LDC
 *> \verbatim
 *>          LDC is INTEGER
 *>           On entry, LDC specifies the first dimension of C as declared
 *>           in  the  calling  (sub)  program.   LDC  must  be  at  least
 *>           max( 1, m ).
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup double_blas_level3
 *
 *> \par Further Details:
 *  =====================
 *>
 *> \verbatim
 *>
 *>  Level 3 Blas routine.
 *>
 *>  -- Written on 8-February-1989.
 *>     Jack Dongarra, Argonne National Laboratory.
 *>     Iain Duff, AERE Harwell.
 *>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
 *>     Sven Hammarling, Numerical Algorithms Group Ltd.
 *> \endverbatim
 *>
 *  =====================================================================
      SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
 *
 *  -- Reference BLAS level3 routine --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      DOUBLE PRECISION ALPHA,BETA
      INTEGER K,LDA,LDB,LDC,M,N
      CHARACTER TRANSA,TRANSB
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
 *     ..
 *
 *  =====================================================================
 *
 *     .. External Functions ..
      LOGICAL LSAME
      EXTERNAL LSAME
 *     ..
 *     .. External Subroutines ..
      EXTERNAL XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC MAX
 *     ..
 *     .. Local Scalars ..
      DOUBLE PRECISION TEMP
      INTEGER I,INFO,J,L,NROWA,NROWB
      LOGICAL NOTA,NOTB
 *     ..
 *     .. Parameters ..
      DOUBLE PRECISION ONE,ZERO
      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
 *     ..
 *
 *     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
 *     transposed and set  NROWA and NROWB  as the number of rows of  A
 *     and  B  respectively.
 *
      NOTA = LSAME(TRANSA,'N')
      NOTB = LSAME(TRANSB,'N')
      IF (NOTA) THEN
          NROWA = M
      ELSE
          NROWA = K
      END IF
      IF (NOTB) THEN
          NROWB = K
      ELSE
          NROWB = N
      END IF
 *
 *     Test the input parameters.
 *
      INFO = 0
      IF ((.NOT.NOTA) .AND. (.NOT.LSAME(TRANSA,'C')) .AND.
     +    (.NOT.LSAME(TRANSA,'T'))) THEN
          INFO = 1
      ELSE IF ((.NOT.NOTB) .AND. (.NOT.LSAME(TRANSB,'C')) .AND.
     +         (.NOT.LSAME(TRANSB,'T'))) THEN
          INFO = 2
      ELSE IF (M.LT.0) THEN
          INFO = 3
      ELSE IF (N.LT.0) THEN
          INFO = 4
      ELSE IF (K.LT.0) THEN
          INFO = 5
      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
          INFO = 8
      ELSE IF (LDB.LT.MAX(1,NROWB)) THEN
          INFO = 10
      ELSE IF (LDC.LT.MAX(1,M)) THEN
          INFO = 13
      END IF
      IF (INFO.NE.0) THEN
          CALL XERBLA('DGEMM ',INFO)
          RETURN
      END IF
 *
 *     Quick return if possible.
 *
      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
     +    (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
 *
 *     And if  alpha.eq.zero.
 *
      IF (ALPHA.EQ.ZERO) THEN
          IF (BETA.EQ.ZERO) THEN
              DO 20 J = 1,N
                  DO 10 I = 1,M
                      C(I,J) = ZERO
   10             CONTINUE
   20         CONTINUE
          ELSE
              DO 40 J = 1,N
                  DO 30 I = 1,M
                      C(I,J) = BETA*C(I,J)
   30             CONTINUE
   40         CONTINUE
          END IF
          RETURN
      END IF
 *
 *     Start the operations.
 *
      IF (NOTB) THEN
          IF (NOTA) THEN
 *
 *           Form  C := alpha*A*B + beta*C.
 *
              DO 90 J = 1,N
                  IF (BETA.EQ.ZERO) THEN
                      DO 50 I = 1,M
                          C(I,J) = ZERO
   50                 CONTINUE
                  ELSE IF (BETA.NE.ONE) THEN
                      DO 60 I = 1,M
                          C(I,J) = BETA*C(I,J)
   60                 CONTINUE
                  END IF
                  DO 80 L = 1,K
                      TEMP = ALPHA*B(L,J)
                      DO 70 I = 1,M
                          C(I,J) = C(I,J) + TEMP*A(I,L)
   70                 CONTINUE
   80             CONTINUE
   90         CONTINUE
          ELSE
 *
 *           Form  C := alpha*A**T*B + beta*C
 *
              DO 120 J = 1,N
                  DO 110 I = 1,M
                      TEMP = ZERO
                      DO 100 L = 1,K
                          TEMP = TEMP + A(L,I)*B(L,J)
  100                 CONTINUE
                      IF (BETA.EQ.ZERO) THEN
                          C(I,J) = ALPHA*TEMP
                      ELSE
                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
                      END IF
  110             CONTINUE
  120         CONTINUE
          END IF
      ELSE
          IF (NOTA) THEN
 *
 *           Form  C := alpha*A*B**T + beta*C
 *
              DO 170 J = 1,N
                  IF (BETA.EQ.ZERO) THEN
                      DO 130 I = 1,M
                          C(I,J) = ZERO
  130                 CONTINUE
                  ELSE IF (BETA.NE.ONE) THEN
                      DO 140 I = 1,M
                          C(I,J) = BETA*C(I,J)
  140                 CONTINUE
                  END IF
                  DO 160 L = 1,K
                      TEMP = ALPHA*B(J,L)
                      DO 150 I = 1,M
                          C(I,J) = C(I,J) + TEMP*A(I,L)
  150                 CONTINUE
  160             CONTINUE
  170         CONTINUE
          ELSE
 *
 *           Form  C := alpha*A**T*B**T + beta*C
 *
              DO 200 J = 1,N
                  DO 190 I = 1,M
                      TEMP = ZERO
                      DO 180 L = 1,K
                          TEMP = TEMP + A(L,I)*B(J,L)
  180                 CONTINUE
                      IF (BETA.EQ.ZERO) THEN
                          C(I,J) = ALPHA*TEMP
                      ELSE
                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
                      END IF
  190             CONTINUE
  200         CONTINUE
          END IF
      END IF
 *
      RETURN
 *
 *     End of DGEMM
 *
      END
--- a/lib/linalg/dgemv.cpp
+++ b/lib/linalg/dgemv.cpp
@ -0,0 +1,149 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 int dgemv_(char *trans, integer *m, integer *n, doublereal *alpha, doublereal *a, integer *lda,
           doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy,
           ftnlen trans_len)
 {
    integer a_dim1, a_offset, i__1, i__2;
    integer i__, j, ix, iy, jx, jy, kx, ky, info;
    doublereal temp;
    integer lenx, leny;
    extern logical lsame_(char *, char *, ftnlen, ftnlen);
    extern int xerbla_(char *, integer *, ftnlen);
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --x;
    --y;
    info = 0;
    if (!lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && !lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1) &&
        !lsame_(trans, (char *)"C", (ftnlen)1, (ftnlen)1)) {
        info = 1;
    } else if (*m < 0) {
        info = 2;
    } else if (*n < 0) {
        info = 3;
    } else if (*lda < max(1, *m)) {
        info = 6;
    } else if (*incx == 0) {
        info = 8;
    } else if (*incy == 0) {
        info = 11;
    }
    if (info != 0) {
        xerbla_((char *)"DGEMV ", &info, (ftnlen)6);
        return 0;
    }
    if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) {
        return 0;
    }
    if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) {
        lenx = *n;
        leny = *m;
    } else {
        lenx = *m;
        leny = *n;
    }
    if (*incx > 0) {
        kx = 1;
    } else {
        kx = 1 - (lenx - 1) * *incx;
    }
    if (*incy > 0) {
        ky = 1;
    } else {
        ky = 1 - (leny - 1) * *incy;
    }
    if (*beta != 1.) {
        if (*incy == 1) {
            if (*beta == 0.) {
                i__1 = leny;
                for (i__ = 1; i__ <= i__1; ++i__) {
                    y[i__] = 0.;
                }
            } else {
                i__1 = leny;
                for (i__ = 1; i__ <= i__1; ++i__) {
                    y[i__] = *beta * y[i__];
                }
            }
        } else {
            iy = ky;
            if (*beta == 0.) {
                i__1 = leny;
                for (i__ = 1; i__ <= i__1; ++i__) {
                    y[iy] = 0.;
                    iy += *incy;
                }
            } else {
                i__1 = leny;
                for (i__ = 1; i__ <= i__1; ++i__) {
                    y[iy] = *beta * y[iy];
                    iy += *incy;
                }
            }
        }
    }
    if (*alpha == 0.) {
        return 0;
    }
    if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) {
        jx = kx;
        if (*incy == 1) {
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                temp = *alpha * x[jx];
                i__2 = *m;
                for (i__ = 1; i__ <= i__2; ++i__) {
                    y[i__] += temp * a[i__ + j * a_dim1];
                }
                jx += *incx;
            }
        } else {
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                temp = *alpha * x[jx];
                iy = ky;
                i__2 = *m;
                for (i__ = 1; i__ <= i__2; ++i__) {
                    y[iy] += temp * a[i__ + j * a_dim1];
                    iy += *incy;
                }
                jx += *incx;
            }
        }
    } else {
        jy = ky;
        if (*incx == 1) {
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                temp = 0.;
                i__2 = *m;
                for (i__ = 1; i__ <= i__2; ++i__) {
                    temp += a[i__ + j * a_dim1] * x[i__];
                }
                y[jy] += *alpha * temp;
                jy += *incy;
            }
        } else {
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                temp = 0.;
                ix = kx;
                i__2 = *m;
                for (i__ = 1; i__ <= i__2; ++i__) {
                    temp += a[i__ + j * a_dim1] * x[ix];
                    ix += *incx;
                }
                y[jy] += *alpha * temp;
                jy += *incy;
            }
        }
    }
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dgemv.f
+++ b/lib/linalg/dgemv.f
@ -1,327 +0,0 @@
 *> \brief \b DGEMV
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
 *
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION ALPHA,BETA
 *       INTEGER INCX,INCY,LDA,M,N
 *       CHARACTER TRANS
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DGEMV  performs one of the matrix-vector operations
 *>
 *>    y := alpha*A*x + beta*y,   or   y := alpha*A**T*x + beta*y,
 *>
 *> where alpha and beta are scalars, x and y are vectors and A is an
 *> m by n matrix.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] TRANS
 *> \verbatim
 *>          TRANS is CHARACTER*1
 *>           On entry, TRANS specifies the operation to be performed as
 *>           follows:
 *>
 *>              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
 *>
 *>              TRANS = 'T' or 't'   y := alpha*A**T*x + beta*y.
 *>
 *>              TRANS = 'C' or 'c'   y := alpha*A**T*x + beta*y.
 *> \endverbatim
 *>
 *> \param[in] M
 *> \verbatim
 *>          M is INTEGER
 *>           On entry, M specifies the number of rows of the matrix A.
 *>           M must be at least zero.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>           On entry, N specifies the number of columns of the matrix A.
 *>           N must be at least zero.
 *> \endverbatim
 *>
 *> \param[in] ALPHA
 *> \verbatim
 *>          ALPHA is DOUBLE PRECISION.
 *>           On entry, ALPHA specifies the scalar alpha.
 *> \endverbatim
 *>
 *> \param[in] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension ( LDA, N )
 *>           Before entry, the leading m by n part of the array A must
 *>           contain the matrix of coefficients.
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>           On entry, LDA specifies the first dimension of A as declared
 *>           in the calling (sub) program. LDA must be at least
 *>           max( 1, m ).
 *> \endverbatim
 *>
 *> \param[in] X
 *> \verbatim
 *>          X is DOUBLE PRECISION array, dimension at least
 *>           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
 *>           and at least
 *>           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
 *>           Before entry, the incremented array X must contain the
 *>           vector x.
 *> \endverbatim
 *>
 *> \param[in] INCX
 *> \verbatim
 *>          INCX is INTEGER
 *>           On entry, INCX specifies the increment for the elements of
 *>           X. INCX must not be zero.
 *> \endverbatim
 *>
 *> \param[in] BETA
 *> \verbatim
 *>          BETA is DOUBLE PRECISION.
 *>           On entry, BETA specifies the scalar beta. When BETA is
 *>           supplied as zero then Y need not be set on input.
 *> \endverbatim
 *>
 *> \param[in,out] Y
 *> \verbatim
 *>          Y is DOUBLE PRECISION array, dimension at least
 *>           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
 *>           and at least
 *>           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
 *>           Before entry with BETA non-zero, the incremented array Y
 *>           must contain the vector y. On exit, Y is overwritten by the
 *>           updated vector y.
 *> \endverbatim
 *>
 *> \param[in] INCY
 *> \verbatim
 *>          INCY is INTEGER
 *>           On entry, INCY specifies the increment for the elements of
 *>           Y. INCY must not be zero.
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup double_blas_level2
 *
 *> \par Further Details:
 *  =====================
 *>
 *> \verbatim
 *>
 *>  Level 2 Blas routine.
 *>  The vector and matrix arguments are not referenced when N = 0, or M = 0
 *>
 *>  -- Written on 22-October-1986.
 *>     Jack Dongarra, Argonne National Lab.
 *>     Jeremy Du Croz, Nag Central Office.
 *>     Sven Hammarling, Nag Central Office.
 *>     Richard Hanson, Sandia National Labs.
 *> \endverbatim
 *>
 *  =====================================================================
      SUBROUTINE DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
 *
 *  -- Reference BLAS level2 routine --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      DOUBLE PRECISION ALPHA,BETA
      INTEGER INCX,INCY,LDA,M,N
      CHARACTER TRANS
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION A(LDA,*),X(*),Y(*)
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION ONE,ZERO
      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
 *     ..
 *     .. Local Scalars ..
      DOUBLE PRECISION TEMP
      INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY,LENX,LENY
 *     ..
 *     .. External Functions ..
      LOGICAL LSAME
      EXTERNAL LSAME
 *     ..
 *     .. External Subroutines ..
      EXTERNAL XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC MAX
 *     ..
 *
 *     Test the input parameters.
 *
      INFO = 0
      IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
     +    .NOT.LSAME(TRANS,'C')) THEN
          INFO = 1
      ELSE IF (M.LT.0) THEN
          INFO = 2
      ELSE IF (N.LT.0) THEN
          INFO = 3
      ELSE IF (LDA.LT.MAX(1,M)) THEN
          INFO = 6
      ELSE IF (INCX.EQ.0) THEN
          INFO = 8
      ELSE IF (INCY.EQ.0) THEN
          INFO = 11
      END IF
      IF (INFO.NE.0) THEN
          CALL XERBLA('DGEMV ',INFO)
          RETURN
      END IF
 *
 *     Quick return if possible.
 *
      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
     +    ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
 *
 *     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
 *     up the start points in  X  and  Y.
 *
      IF (LSAME(TRANS,'N')) THEN
          LENX = N
          LENY = M
      ELSE
          LENX = M
          LENY = N
      END IF
      IF (INCX.GT.0) THEN
          KX = 1
      ELSE
          KX = 1 - (LENX-1)*INCX
      END IF
      IF (INCY.GT.0) THEN
          KY = 1
      ELSE
          KY = 1 - (LENY-1)*INCY
      END IF
 *
 *     Start the operations. In this version the elements of A are
 *     accessed sequentially with one pass through A.
 *
 *     First form  y := beta*y.
 *
      IF (BETA.NE.ONE) THEN
          IF (INCY.EQ.1) THEN
              IF (BETA.EQ.ZERO) THEN
                  DO 10 I = 1,LENY
                      Y(I) = ZERO
   10             CONTINUE
              ELSE
                  DO 20 I = 1,LENY
                      Y(I) = BETA*Y(I)
   20             CONTINUE
              END IF
          ELSE
              IY = KY
              IF (BETA.EQ.ZERO) THEN
                  DO 30 I = 1,LENY
                      Y(IY) = ZERO
                      IY = IY + INCY
   30             CONTINUE
              ELSE
                  DO 40 I = 1,LENY
                      Y(IY) = BETA*Y(IY)
                      IY = IY + INCY
   40             CONTINUE
              END IF
          END IF
      END IF
      IF (ALPHA.EQ.ZERO) RETURN
      IF (LSAME(TRANS,'N')) THEN
 *
 *        Form  y := alpha*A*x + y.
 *
          JX = KX
          IF (INCY.EQ.1) THEN
              DO 60 J = 1,N
                  TEMP = ALPHA*X(JX)
                  DO 50 I = 1,M
                      Y(I) = Y(I) + TEMP*A(I,J)
   50             CONTINUE
                  JX = JX + INCX
   60         CONTINUE
          ELSE
              DO 80 J = 1,N
                  TEMP = ALPHA*X(JX)
                  IY = KY
                  DO 70 I = 1,M
                      Y(IY) = Y(IY) + TEMP*A(I,J)
                      IY = IY + INCY
   70             CONTINUE
                  JX = JX + INCX
   80         CONTINUE
          END IF
      ELSE
 *
 *        Form  y := alpha*A**T*x + y.
 *
          JY = KY
          IF (INCX.EQ.1) THEN
              DO 100 J = 1,N
                  TEMP = ZERO
                  DO 90 I = 1,M
                      TEMP = TEMP + A(I,J)*X(I)
   90             CONTINUE
                  Y(JY) = Y(JY) + ALPHA*TEMP
                  JY = JY + INCY
  100         CONTINUE
          ELSE
              DO 120 J = 1,N
                  TEMP = ZERO
                  IX = KX
                  DO 110 I = 1,M
                      TEMP = TEMP + A(I,J)*X(IX)
                      IX = IX + INCX
  110             CONTINUE
                  Y(JY) = Y(JY) + ALPHA*TEMP
                  JY = JY + INCY
  120         CONTINUE
          END IF
      END IF
 *
      RETURN
 *
 *     End of DGEMV
 *
      END
--- a/lib/linalg/dgeqr2.cpp
+++ b/lib/linalg/dgeqr2.cpp
@ -0,0 +1,54 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 static integer c__1 = 1;
 int dgeqr2_(integer *m, integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal *work,
            integer *info)
 {
    integer a_dim1, a_offset, i__1, i__2, i__3;
    integer i__, k;
    doublereal aii;
    extern int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *,
                      doublereal *, integer *, doublereal *, ftnlen),
        dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *),
        xerbla_(char *, integer *, ftnlen);
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --tau;
    --work;
    *info = 0;
    if (*m < 0) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*lda < max(1, *m)) {
        *info = -4;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_((char *)"DGEQR2", &i__1, (ftnlen)6);
        return 0;
    }
    k = min(*m, *n);
    i__1 = k;
    for (i__ = 1; i__ <= i__1; ++i__) {
        i__2 = *m - i__ + 1;
        i__3 = i__ + 1;
        dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3, *m) + i__ * a_dim1], &c__1, &tau[i__]);
        if (i__ < *n) {
            aii = a[i__ + i__ * a_dim1];
            a[i__ + i__ * a_dim1] = 1.;
            i__2 = *m - i__ + 1;
            i__3 = *n - i__;
            dlarf_((char *)"Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[i__],
                   &a[i__ + (i__ + 1) * a_dim1], lda, &work[1], (ftnlen)4);
            a[i__ + i__ * a_dim1] = aii;
        }
    }
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dgeqr2.f
+++ b/lib/linalg/dgeqr2.f
@ -1,198 +0,0 @@
 *> \brief \b DGEQR2 computes the QR factorization of a general rectangular matrix using an unblocked algorithm.
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DGEQR2 + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqr2.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqr2.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqr2.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGEQR2( M, N, A, LDA, TAU, WORK, INFO )
 *
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, LDA, M, N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DGEQR2 computes a QR factorization of a real m-by-n matrix A:
 *>
 *>    A = Q * ( R ),
 *>            ( 0 )
 *>
 *> where:
 *>
 *>    Q is a m-by-m orthogonal matrix;
 *>    R is an upper-triangular n-by-n matrix;
 *>    0 is a (m-n)-by-n zero matrix, if m > n.
 *>
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] M
 *> \verbatim
 *>          M is INTEGER
 *>          The number of rows of the matrix A.  M >= 0.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The number of columns of the matrix A.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in,out] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension (LDA,N)
 *>          On entry, the m by n matrix A.
 *>          On exit, the elements on and above the diagonal of the array
 *>          contain the min(m,n) by n upper trapezoidal matrix R (R is
 *>          upper triangular if m >= n); the elements below the diagonal,
 *>          with the array TAU, represent the orthogonal matrix Q as a
 *>          product of elementary reflectors (see Further Details).
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>          The leading dimension of the array A.  LDA >= max(1,M).
 *> \endverbatim
 *>
 *> \param[out] TAU
 *> \verbatim
 *>          TAU is DOUBLE PRECISION array, dimension (min(M,N))
 *>          The scalar factors of the elementary reflectors (see Further
 *>          Details).
 *> \endverbatim
 *>
 *> \param[out] WORK
 *> \verbatim
 *>          WORK is DOUBLE PRECISION array, dimension (N)
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0: successful exit
 *>          < 0: if INFO = -i, the i-th argument had an illegal value
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup doubleGEcomputational
 *
 *> \par Further Details:
 *  =====================
 *>
 *> \verbatim
 *>
 *>  The matrix Q is represented as a product of elementary reflectors
 *>
 *>     Q = H(1) H(2) . . . H(k), where k = min(m,n).
 *>
 *>  Each H(i) has the form
 *>
 *>     H(i) = I - tau * v * v**T
 *>
 *>  where tau is a real scalar, and v is a real vector with
 *>  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
 *>  and tau in TAU(i).
 *> \endverbatim
 *>
 *  =====================================================================
      SUBROUTINE DGEQR2( M, N, A, LDA, TAU, WORK, INFO )
 *
 *  -- LAPACK computational routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER            INFO, LDA, M, N
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ONE
      PARAMETER          ( ONE = 1.0D+0 )
 *     ..
 *     .. Local Scalars ..
      INTEGER            I, K
      DOUBLE PRECISION   AII
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLARF, DLARFG, XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          MAX, MIN
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input arguments
 *
      INFO = 0
      IF( M.LT.0 ) THEN
         INFO = -1
      ELSE IF( N.LT.0 ) THEN
         INFO = -2
      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
         INFO = -4
      END IF
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'DGEQR2', -INFO )
         RETURN
      END IF
 *
      K = MIN( M, N )
 *
      DO 10 I = 1, K
 *
 *        Generate elementary reflector H(i) to annihilate A(i+1:m,i)
 *
         CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1,
     $                TAU( I ) )
         IF( I.LT.N ) THEN
 *
 *           Apply H(i) to A(i:m,i+1:n) from the left
 *
            AII = A( I, I )
            A( I, I ) = ONE
            CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ),
     $                  A( I, I+1 ), LDA, WORK )
            A( I, I ) = AII
         END IF
   10 CONTINUE
      RETURN
 *
 *     End of DGEQR2
 *
      END
--- a/lib/linalg/dgeqrf.cpp
+++ b/lib/linalg/dgeqrf.cpp
@ -0,0 +1,113 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;
 int dgeqrf_(integer *m, integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal *work,
            integer *lwork, integer *info)
 {
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
    integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
    extern int dgeqr2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
                       integer *),
        dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *,
                integer *, doublereal *, integer *, doublereal *, integer *, doublereal *,
                integer *, ftnlen, ftnlen, ftnlen, ftnlen),
        dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *,
                doublereal *, integer *, ftnlen, ftnlen),
        xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
                           ftnlen, ftnlen);
    integer ldwork, lwkopt;
    logical lquery;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --tau;
    --work;
    k = min(*m, *n);
    *info = 0;
    nb = ilaenv_(&c__1, (char *)"DGEQRF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
    lquery = *lwork == -1;
    if (*m < 0) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*lda < max(1, *m)) {
        *info = -4;
    } else if (!lquery) {
        if (*lwork <= 0 || *m > 0 && *lwork < max(1, *n)) {
            *info = -7;
        }
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_((char *)"DGEQRF", &i__1, (ftnlen)6);
        return 0;
    } else if (lquery) {
        if (k == 0) {
            lwkopt = 1;
        } else {
            lwkopt = *n * nb;
        }
        work[1] = (doublereal)lwkopt;
        return 0;
    }
    if (k == 0) {
        work[1] = 1.;
        return 0;
    }
    nbmin = 2;
    nx = 0;
    iws = *n;
    if (nb > 1 && nb < k) {
        i__1 = 0, i__2 = ilaenv_(&c__3, (char *)"DGEQRF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
        nx = max(i__1, i__2);
        if (nx < k) {
            ldwork = *n;
            iws = ldwork * nb;
            if (*lwork < iws) {
                nb = *lwork / ldwork;
                i__1 = 2,
                i__2 = ilaenv_(&c__2, (char *)"DGEQRF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
                nbmin = max(i__1, i__2);
            }
        }
    }
    if (nb >= nbmin && nb < k && nx < k) {
        i__1 = k - nx;
        i__2 = nb;
        for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
            i__3 = k - i__ + 1;
            ib = min(i__3, nb);
            i__3 = *m - i__ + 1;
            dgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1], &iinfo);
            if (i__ + ib <= *n) {
                i__3 = *m - i__ + 1;
                dlarft_((char *)"Forward", (char *)"Columnwise", &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__],
                        &work[1], &ldwork, (ftnlen)7, (ftnlen)10);
                i__3 = *m - i__ + 1;
                i__4 = *n - i__ - ib + 1;
                dlarfb_((char *)"Left", (char *)"Transpose", (char *)"Forward", (char *)"Columnwise", &i__3, &i__4, &ib,
                        &a[i__ + i__ * a_dim1], lda, &work[1], &ldwork,
                        &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib + 1], &ldwork, (ftnlen)4,
                        (ftnlen)9, (ftnlen)7, (ftnlen)10);
            }
        }
    } else {
        i__ = 1;
    }
    if (i__ <= k) {
        i__2 = *m - i__ + 1;
        i__1 = *n - i__ + 1;
        dgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1], &iinfo);
    }
    work[1] = (doublereal)iws;
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dgeqrf.f
+++ b/lib/linalg/dgeqrf.f
@ -1,282 +0,0 @@
 *> \brief \b DGEQRF
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DGEQRF + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqrf.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqrf.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqrf.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
 *
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, LDA, LWORK, M, N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DGEQRF computes a QR factorization of a real M-by-N matrix A:
 *>
 *>    A = Q * ( R ),
 *>            ( 0 )
 *>
 *> where:
 *>
 *>    Q is a M-by-M orthogonal matrix;
 *>    R is an upper-triangular N-by-N matrix;
 *>    0 is a (M-N)-by-N zero matrix, if M > N.
 *>
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] M
 *> \verbatim
 *>          M is INTEGER
 *>          The number of rows of the matrix A.  M >= 0.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The number of columns of the matrix A.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in,out] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension (LDA,N)
 *>          On entry, the M-by-N matrix A.
 *>          On exit, the elements on and above the diagonal of the array
 *>          contain the min(M,N)-by-N upper trapezoidal matrix R (R is
 *>          upper triangular if m >= n); the elements below the diagonal,
 *>          with the array TAU, represent the orthogonal matrix Q as a
 *>          product of min(m,n) elementary reflectors (see Further
 *>          Details).
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>          The leading dimension of the array A.  LDA >= max(1,M).
 *> \endverbatim
 *>
 *> \param[out] TAU
 *> \verbatim
 *>          TAU is DOUBLE PRECISION array, dimension (min(M,N))
 *>          The scalar factors of the elementary reflectors (see Further
 *>          Details).
 *> \endverbatim
 *>
 *> \param[out] WORK
 *> \verbatim
 *>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
 *>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 *> \endverbatim
 *>
 *> \param[in] LWORK
 *> \verbatim
 *>          LWORK is INTEGER
 *>          The dimension of the array WORK.
 *>          LWORK >= 1, if MIN(M,N) = 0, and LWORK >= N, otherwise.
 *>          For optimum performance LWORK >= N*NB, where NB is
 *>          the optimal blocksize.
 *>
 *>          If LWORK = -1, then a workspace query is assumed; the routine
 *>          only calculates the optimal size of the WORK array, returns
 *>          this value as the first entry of the WORK array, and no error
 *>          message related to LWORK is issued by XERBLA.
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0:  successful exit
 *>          < 0:  if INFO = -i, the i-th argument had an illegal value
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup doubleGEcomputational
 *
 *> \par Further Details:
 *  =====================
 *>
 *> \verbatim
 *>
 *>  The matrix Q is represented as a product of elementary reflectors
 *>
 *>     Q = H(1) H(2) . . . H(k), where k = min(m,n).
 *>
 *>  Each H(i) has the form
 *>
 *>     H(i) = I - tau * v * v**T
 *>
 *>  where tau is a real scalar, and v is a real vector with
 *>  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
 *>  and tau in TAU(i).
 *> \endverbatim
 *>
 *  =====================================================================
      SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
 *
 *  -- LAPACK computational routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER            INFO, LDA, LWORK, M, N
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Local Scalars ..
      LOGICAL            LQUERY
      INTEGER            I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB,
     $                   NBMIN, NX
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DGEQR2, DLARFB, DLARFT, XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          MAX, MIN
 *     ..
 *     .. External Functions ..
      INTEGER            ILAENV
      EXTERNAL           ILAENV
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input arguments
 *
      K = MIN( M, N )
      INFO = 0
      NB = ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
      LQUERY = ( LWORK.EQ.-1 )
      IF( M.LT.0 ) THEN
         INFO = -1
      ELSE IF( N.LT.0 ) THEN
         INFO = -2
      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
         INFO = -4
      ELSE IF( .NOT.LQUERY ) THEN
         IF( LWORK.LE.0 .OR. ( M.GT.0 .AND. LWORK.LT.MAX( 1, N ) ) )
     $      INFO = -7
      END IF
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'DGEQRF', -INFO )
         RETURN
      ELSE IF( LQUERY ) THEN
         IF( K.EQ.0 ) THEN
            LWKOPT = 1
         ELSE
            LWKOPT = N*NB
         END IF
         WORK( 1 ) = LWKOPT
         RETURN
      END IF
 *
 *     Quick return if possible
 *
      IF( K.EQ.0 ) THEN
         WORK( 1 ) = 1
         RETURN
      END IF
 *
      NBMIN = 2
      NX = 0
      IWS = N
      IF( NB.GT.1 .AND. NB.LT.K ) THEN
 *
 *        Determine when to cross over from blocked to unblocked code.
 *
         NX = MAX( 0, ILAENV( 3, 'DGEQRF', ' ', M, N, -1, -1 ) )
         IF( NX.LT.K ) THEN
 *
 *           Determine if workspace is large enough for blocked code.
 *
            LDWORK = N
            IWS = LDWORK*NB
            IF( LWORK.LT.IWS ) THEN
 *
 *              Not enough workspace to use optimal NB:  reduce NB and
 *              determine the minimum value of NB.
 *
               NB = LWORK / LDWORK
               NBMIN = MAX( 2, ILAENV( 2, 'DGEQRF', ' ', M, N, -1,
     $                 -1 ) )
            END IF
         END IF
      END IF
 *
      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
 *
 *        Use blocked code initially
 *
         DO 10 I = 1, K - NX, NB
            IB = MIN( K-I+1, NB )
 *
 *           Compute the QR factorization of the current block
 *           A(i:m,i:i+ib-1)
 *
            CALL DGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), WORK,
     $                   IINFO )
            IF( I+IB.LE.N ) THEN
 *
 *              Form the triangular factor of the block reflector
 *              H = H(i) H(i+1) . . . H(i+ib-1)
 *
               CALL DLARFT( 'Forward', 'Columnwise', M-I+1, IB,
     $                      A( I, I ), LDA, TAU( I ), WORK, LDWORK )
 *
 *              Apply H**T to A(i:m,i+ib:n) from the left
 *
               CALL DLARFB( 'Left', 'Transpose', 'Forward',
     $                      'Columnwise', M-I+1, N-I-IB+1, IB,
     $                      A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ),
     $                      LDA, WORK( IB+1 ), LDWORK )
            END IF
   10    CONTINUE
      ELSE
         I = 1
      END IF
 *
 *     Use unblocked code to factor the last or only block.
 *
      IF( I.LE.K )
     $   CALL DGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
     $                IINFO )
 *
      WORK( 1 ) = IWS
      RETURN
 *
 *     End of DGEQRF
 *
      END
--- a/lib/linalg/dger.cpp
+++ b/lib/linalg/dger.cpp
@ -0,0 +1,77 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 int dger_(integer *m, integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *y,
          integer *incy, doublereal *a, integer *lda)
 {
    integer a_dim1, a_offset, i__1, i__2;
    integer i__, j, ix, jy, kx, info;
    doublereal temp;
    extern int xerbla_(char *, integer *, ftnlen);
    --x;
    --y;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    info = 0;
    if (*m < 0) {
        info = 1;
    } else if (*n < 0) {
        info = 2;
    } else if (*incx == 0) {
        info = 5;
    } else if (*incy == 0) {
        info = 7;
    } else if (*lda < max(1, *m)) {
        info = 9;
    }
    if (info != 0) {
        xerbla_((char *)"DGER  ", &info, (ftnlen)6);
        return 0;
    }
    if (*m == 0 || *n == 0 || *alpha == 0.) {
        return 0;
    }
    if (*incy > 0) {
        jy = 1;
    } else {
        jy = 1 - (*n - 1) * *incy;
    }
    if (*incx == 1) {
        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            if (y[jy] != 0.) {
                temp = *alpha * y[jy];
                i__2 = *m;
                for (i__ = 1; i__ <= i__2; ++i__) {
                    a[i__ + j * a_dim1] += x[i__] * temp;
                }
            }
            jy += *incy;
        }
    } else {
        if (*incx > 0) {
            kx = 1;
        } else {
            kx = 1 - (*m - 1) * *incx;
        }
        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            if (y[jy] != 0.) {
                temp = *alpha * y[jy];
                ix = kx;
                i__2 = *m;
                for (i__ = 1; i__ <= i__2; ++i__) {
                    a[i__ + j * a_dim1] += x[ix] * temp;
                    ix += *incx;
                }
            }
            jy += *incy;
        }
    }
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dger.f
+++ b/lib/linalg/dger.f
@ -1,224 +0,0 @@
 *> \brief \b DGER
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGER(M,N,ALPHA,X,INCX,Y,INCY,A,LDA)
 *
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION ALPHA
 *       INTEGER INCX,INCY,LDA,M,N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DGER   performs the rank 1 operation
 *>
 *>    A := alpha*x*y**T + A,
 *>
 *> where alpha is a scalar, x is an m element vector, y is an n element
 *> vector and A is an m by n matrix.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] M
 *> \verbatim
 *>          M is INTEGER
 *>           On entry, M specifies the number of rows of the matrix A.
 *>           M must be at least zero.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>           On entry, N specifies the number of columns of the matrix A.
 *>           N must be at least zero.
 *> \endverbatim
 *>
 *> \param[in] ALPHA
 *> \verbatim
 *>          ALPHA is DOUBLE PRECISION.
 *>           On entry, ALPHA specifies the scalar alpha.
 *> \endverbatim
 *>
 *> \param[in] X
 *> \verbatim
 *>          X is DOUBLE PRECISION array, dimension at least
 *>           ( 1 + ( m - 1 )*abs( INCX ) ).
 *>           Before entry, the incremented array X must contain the m
 *>           element vector x.
 *> \endverbatim
 *>
 *> \param[in] INCX
 *> \verbatim
 *>          INCX is INTEGER
 *>           On entry, INCX specifies the increment for the elements of
 *>           X. INCX must not be zero.
 *> \endverbatim
 *>
 *> \param[in] Y
 *> \verbatim
 *>          Y is DOUBLE PRECISION array, dimension at least
 *>           ( 1 + ( n - 1 )*abs( INCY ) ).
 *>           Before entry, the incremented array Y must contain the n
 *>           element vector y.
 *> \endverbatim
 *>
 *> \param[in] INCY
 *> \verbatim
 *>          INCY is INTEGER
 *>           On entry, INCY specifies the increment for the elements of
 *>           Y. INCY must not be zero.
 *> \endverbatim
 *>
 *> \param[in,out] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension ( LDA, N )
 *>           Before entry, the leading m by n part of the array A must
 *>           contain the matrix of coefficients. On exit, A is
 *>           overwritten by the updated matrix.
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>           On entry, LDA specifies the first dimension of A as declared
 *>           in the calling (sub) program. LDA must be at least
 *>           max( 1, m ).
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup double_blas_level2
 *
 *> \par Further Details:
 *  =====================
 *>
 *> \verbatim
 *>
 *>  Level 2 Blas routine.
 *>
 *>  -- Written on 22-October-1986.
 *>     Jack Dongarra, Argonne National Lab.
 *>     Jeremy Du Croz, Nag Central Office.
 *>     Sven Hammarling, Nag Central Office.
 *>     Richard Hanson, Sandia National Labs.
 *> \endverbatim
 *>
 *  =====================================================================
      SUBROUTINE DGER(M,N,ALPHA,X,INCX,Y,INCY,A,LDA)
 *
 *  -- Reference BLAS level2 routine --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      DOUBLE PRECISION ALPHA
      INTEGER INCX,INCY,LDA,M,N
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION A(LDA,*),X(*),Y(*)
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION ZERO
      PARAMETER (ZERO=0.0D+0)
 *     ..
 *     .. Local Scalars ..
      DOUBLE PRECISION TEMP
      INTEGER I,INFO,IX,J,JY,KX
 *     ..
 *     .. External Subroutines ..
      EXTERNAL XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC MAX
 *     ..
 *
 *     Test the input parameters.
 *
      INFO = 0
      IF (M.LT.0) THEN
          INFO = 1
      ELSE IF (N.LT.0) THEN
          INFO = 2
      ELSE IF (INCX.EQ.0) THEN
          INFO = 5
      ELSE IF (INCY.EQ.0) THEN
          INFO = 7
      ELSE IF (LDA.LT.MAX(1,M)) THEN
          INFO = 9
      END IF
      IF (INFO.NE.0) THEN
          CALL XERBLA('DGER  ',INFO)
          RETURN
      END IF
 *
 *     Quick return if possible.
 *
      IF ((M.EQ.0) .OR. (N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
 *
 *     Start the operations. In this version the elements of A are
 *     accessed sequentially with one pass through A.
 *
      IF (INCY.GT.0) THEN
          JY = 1
      ELSE
          JY = 1 - (N-1)*INCY
      END IF
      IF (INCX.EQ.1) THEN
          DO 20 J = 1,N
              IF (Y(JY).NE.ZERO) THEN
                  TEMP = ALPHA*Y(JY)
                  DO 10 I = 1,M
                      A(I,J) = A(I,J) + X(I)*TEMP
   10             CONTINUE
              END IF
              JY = JY + INCY
   20     CONTINUE
      ELSE
          IF (INCX.GT.0) THEN
              KX = 1
          ELSE
              KX = 1 - (M-1)*INCX
          END IF
          DO 40 J = 1,N
              IF (Y(JY).NE.ZERO) THEN
                  TEMP = ALPHA*Y(JY)
                  IX = KX
                  DO 30 I = 1,M
                      A(I,J) = A(I,J) + X(IX)*TEMP
                      IX = IX + INCX
   30             CONTINUE
              END IF
              JY = JY + INCY
   40     CONTINUE
      END IF
 *
      RETURN
 *
 *     End of DGER
 *
      END
--- a/lib/linalg/dgesv.cpp
+++ b/lib/linalg/dgesv.cpp
@ -0,0 +1,44 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 int dgesv_(integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, doublereal *b,
           integer *ldb, integer *info)
 {
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
    extern int dgetrf_(integer *, integer *, doublereal *, integer *, integer *, integer *),
        xerbla_(char *, integer *, ftnlen),
        dgetrs_(char *, integer *, integer *, doublereal *, integer *, integer *, doublereal *,
                integer *, integer *, ftnlen);
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    *info = 0;
    if (*n < 0) {
        *info = -1;
    } else if (*nrhs < 0) {
        *info = -2;
    } else if (*lda < max(1, *n)) {
        *info = -4;
    } else if (*ldb < max(1, *n)) {
        *info = -7;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_((char *)"DGESV ", &i__1, (ftnlen)6);
        return 0;
    }
    dgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info);
    if (*info == 0) {
        dgetrs_((char *)"No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], ldb, info,
                (ftnlen)12);
    }
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dgesv.f
+++ b/lib/linalg/dgesv.f
@ -1,176 +0,0 @@
 *> \brief <b> DGESV computes the solution to system of linear equations A * X = B for GE matrices</b>
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DGESV + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesv.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesv.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesv.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO )
 *
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, LDA, LDB, N, NRHS
 *       ..
 *       .. Array Arguments ..
 *       INTEGER            IPIV( * )
 *       DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DGESV computes the solution to a real system of linear equations
 *>    A * X = B,
 *> where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
 *>
 *> The LU decomposition with partial pivoting and row interchanges is
 *> used to factor A as
 *>    A = P * L * U,
 *> where P is a permutation matrix, L is unit lower triangular, and U is
 *> upper triangular.  The factored form of A is then used to solve the
 *> system of equations A * X = B.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The number of linear equations, i.e., the order of the
 *>          matrix A.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in] NRHS
 *> \verbatim
 *>          NRHS is INTEGER
 *>          The number of right hand sides, i.e., the number of columns
 *>          of the matrix B.  NRHS >= 0.
 *> \endverbatim
 *>
 *> \param[in,out] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension (LDA,N)
 *>          On entry, the N-by-N coefficient matrix A.
 *>          On exit, the factors L and U from the factorization
 *>          A = P*L*U; the unit diagonal elements of L are not stored.
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>          The leading dimension of the array A.  LDA >= max(1,N).
 *> \endverbatim
 *>
 *> \param[out] IPIV
 *> \verbatim
 *>          IPIV is INTEGER array, dimension (N)
 *>          The pivot indices that define the permutation matrix P;
 *>          row i of the matrix was interchanged with row IPIV(i).
 *> \endverbatim
 *>
 *> \param[in,out] B
 *> \verbatim
 *>          B is DOUBLE PRECISION array, dimension (LDB,NRHS)
 *>          On entry, the N-by-NRHS matrix of right hand side matrix B.
 *>          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
 *> \endverbatim
 *>
 *> \param[in] LDB
 *> \verbatim
 *>          LDB is INTEGER
 *>          The leading dimension of the array B.  LDB >= max(1,N).
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0:  successful exit
 *>          < 0:  if INFO = -i, the i-th argument had an illegal value
 *>          > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization
 *>                has been completed, but the factor U is exactly
 *>                singular, so the solution could not be computed.
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup doubleGEsolve
 *
 *  =====================================================================
      SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO )
 *
 *  -- LAPACK driver routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER            INFO, LDA, LDB, N, NRHS
 *     ..
 *     .. Array Arguments ..
      INTEGER            IPIV( * )
      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. External Subroutines ..
      EXTERNAL           DGETRF, DGETRS, XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          MAX
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input parameters.
 *
      INFO = 0
      IF( N.LT.0 ) THEN
         INFO = -1
      ELSE IF( NRHS.LT.0 ) THEN
         INFO = -2
      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
         INFO = -4
      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
         INFO = -7
      END IF
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'DGESV ', -INFO )
         RETURN
      END IF
 *
 *     Compute the LU factorization of A.
 *
      CALL DGETRF( N, N, A, LDA, IPIV, INFO )
      IF( INFO.EQ.0 ) THEN
 *
 *        Solve the system A*X = B, overwriting B with X.
 *
         CALL DGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, B, LDB,
     $                INFO )
      END IF
      RETURN
 *
 *     End of DGESV
 *
      END
--- a/lib/linalg/dgesvd.cpp
+++ b/lib/linalg/dgesvd.cpp
--- a/lib/linalg/dgesvd.f
+++ b/lib/linalg/dgesvd.f
--- a/lib/linalg/dgetf2.cpp
+++ b/lib/linalg/dgetf2.cpp
@ -0,0 +1,76 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 static integer c__1 = 1;
 static doublereal c_b8 = -1.;
 int dgetf2_(integer *m, integer *n, doublereal *a, integer *lda, integer *ipiv, integer *info)
 {
    integer a_dim1, a_offset, i__1, i__2, i__3;
    doublereal d__1;
    integer i__, j, jp;
    extern int dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *,
                     integer *, doublereal *, integer *),
        dscal_(integer *, doublereal *, doublereal *, integer *);
    doublereal sfmin;
    extern int dswap_(integer *, doublereal *, integer *, doublereal *, integer *);
    extern doublereal dlamch_(char *, ftnlen);
    extern integer idamax_(integer *, doublereal *, integer *);
    extern int xerbla_(char *, integer *, ftnlen);
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --ipiv;
    *info = 0;
    if (*m < 0) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*lda < max(1, *m)) {
        *info = -4;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_((char *)"DGETF2", &i__1, (ftnlen)6);
        return 0;
    }
    if (*m == 0 || *n == 0) {
        return 0;
    }
    sfmin = dlamch_((char *)"S", (ftnlen)1);
    i__1 = min(*m, *n);
    for (j = 1; j <= i__1; ++j) {
        i__2 = *m - j + 1;
        jp = j - 1 + idamax_(&i__2, &a[j + j * a_dim1], &c__1);
        ipiv[j] = jp;
        if (a[jp + j * a_dim1] != 0.) {
            if (jp != j) {
                dswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda);
            }
            if (j < *m) {
                if ((d__1 = a[j + j * a_dim1], abs(d__1)) >= sfmin) {
                    i__2 = *m - j;
                    d__1 = 1. / a[j + j * a_dim1];
                    dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1);
                } else {
                    i__2 = *m - j;
                    for (i__ = 1; i__ <= i__2; ++i__) {
                        a[j + i__ + j * a_dim1] /= a[j + j * a_dim1];
                    }
                }
            }
        } else if (*info == 0) {
            *info = j;
        }
        if (j < min(*m, *n)) {
            i__2 = *m - j;
            i__3 = *n - j;
            dger_(&i__2, &i__3, &c_b8, &a[j + 1 + j * a_dim1], &c__1, &a[j + (j + 1) * a_dim1], lda,
                  &a[j + 1 + (j + 1) * a_dim1], lda);
        }
    }
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dgetf2.f
+++ b/lib/linalg/dgetf2.f
@ -1,210 +0,0 @@
 *> \brief \b DGETF2 computes the LU factorization of a general m-by-n matrix using partial pivoting with row interchanges (unblocked algorithm).
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DGETF2 + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetf2.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetf2.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetf2.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGETF2( M, N, A, LDA, IPIV, INFO )
 *
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, LDA, M, N
 *       ..
 *       .. Array Arguments ..
 *       INTEGER            IPIV( * )
 *       DOUBLE PRECISION   A( LDA, * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DGETF2 computes an LU factorization of a general m-by-n matrix A
 *> using partial pivoting with row interchanges.
 *>
 *> The factorization has the form
 *>    A = P * L * U
 *> where P is a permutation matrix, L is lower triangular with unit
 *> diagonal elements (lower trapezoidal if m > n), and U is upper
 *> triangular (upper trapezoidal if m < n).
 *>
 *> This is the right-looking Level 2 BLAS version of the algorithm.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] M
 *> \verbatim
 *>          M is INTEGER
 *>          The number of rows of the matrix A.  M >= 0.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The number of columns of the matrix A.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in,out] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension (LDA,N)
 *>          On entry, the m by n matrix to be factored.
 *>          On exit, the factors L and U from the factorization
 *>          A = P*L*U; the unit diagonal elements of L are not stored.
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>          The leading dimension of the array A.  LDA >= max(1,M).
 *> \endverbatim
 *>
 *> \param[out] IPIV
 *> \verbatim
 *>          IPIV is INTEGER array, dimension (min(M,N))
 *>          The pivot indices; for 1 <= i <= min(M,N), row i of the
 *>          matrix was interchanged with row IPIV(i).
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0: successful exit
 *>          < 0: if INFO = -k, the k-th argument had an illegal value
 *>          > 0: if INFO = k, U(k,k) is exactly zero. The factorization
 *>               has been completed, but the factor U is exactly
 *>               singular, and division by zero will occur if it is used
 *>               to solve a system of equations.
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup doubleGEcomputational
 *
 *  =====================================================================
      SUBROUTINE DGETF2( M, N, A, LDA, IPIV, INFO )
 *
 *  -- LAPACK computational routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER            INFO, LDA, M, N
 *     ..
 *     .. Array Arguments ..
      INTEGER            IPIV( * )
      DOUBLE PRECISION   A( LDA, * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ONE, ZERO
      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
 *     ..
 *     .. Local Scalars ..
      DOUBLE PRECISION   SFMIN
      INTEGER            I, J, JP
 *     ..
 *     .. External Functions ..
      DOUBLE PRECISION   DLAMCH
      INTEGER            IDAMAX
      EXTERNAL           DLAMCH, IDAMAX
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DGER, DSCAL, DSWAP, XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          MAX, MIN
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input parameters.
 *
      INFO = 0
      IF( M.LT.0 ) THEN
         INFO = -1
      ELSE IF( N.LT.0 ) THEN
         INFO = -2
      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
         INFO = -4
      END IF
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'DGETF2', -INFO )
         RETURN
      END IF
 *
 *     Quick return if possible
 *
      IF( M.EQ.0 .OR. N.EQ.0 )
     $   RETURN
 *
 *     Compute machine safe minimum
 *
      SFMIN = DLAMCH('S')
 *
      DO 10 J = 1, MIN( M, N )
 *
 *        Find pivot and test for singularity.
 *
         JP = J - 1 + IDAMAX( M-J+1, A( J, J ), 1 )
         IPIV( J ) = JP
         IF( A( JP, J ).NE.ZERO ) THEN
 *
 *           Apply the interchange to columns 1:N.
 *
            IF( JP.NE.J )
     $         CALL DSWAP( N, A( J, 1 ), LDA, A( JP, 1 ), LDA )
 *
 *           Compute elements J+1:M of J-th column.
 *
            IF( J.LT.M ) THEN
               IF( ABS(A( J, J )) .GE. SFMIN ) THEN
                  CALL DSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 )
               ELSE
                 DO 20 I = 1, M-J
                    A( J+I, J ) = A( J+I, J ) / A( J, J )
   20            CONTINUE
               END IF
            END IF
 *
         ELSE IF( INFO.EQ.0 ) THEN
 *
            INFO = J
         END IF
 *
         IF( J.LT.MIN( M, N ) ) THEN
 *
 *           Update trailing submatrix.
 *
            CALL DGER( M-J, N-J, -ONE, A( J+1, J ), 1, A( J, J+1 ), LDA,
     $                 A( J+1, J+1 ), LDA )
         END IF
   10 CONTINUE
      RETURN
 *
 *     End of DGETF2
 *
      END
--- a/lib/linalg/dgetrf.cpp
+++ b/lib/linalg/dgetrf.cpp
@ -0,0 +1,90 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 static integer c__1 = 1;
 static integer c_n1 = -1;
 static doublereal c_b16 = 1.;
 static doublereal c_b19 = -1.;
 int dgetrf_(integer *m, integer *n, doublereal *a, integer *lda, integer *ipiv, integer *info)
 {
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
    integer i__, j, jb, nb;
    extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
                      integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
                      ftnlen, ftnlen);
    integer iinfo;
    extern int dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *,
                      doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen,
                      ftnlen),
        xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
                           ftnlen, ftnlen);
    extern int dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *,
                       integer *),
        dgetrf2_(integer *, integer *, doublereal *, integer *, integer *, integer *);
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --ipiv;
    *info = 0;
    if (*m < 0) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*lda < max(1, *m)) {
        *info = -4;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_((char *)"DGETRF", &i__1, (ftnlen)6);
        return 0;
    }
    if (*m == 0 || *n == 0) {
        return 0;
    }
    nb = ilaenv_(&c__1, (char *)"DGETRF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
    if (nb <= 1 || nb >= min(*m, *n)) {
        dgetrf2_(m, n, &a[a_offset], lda, &ipiv[1], info);
    } else {
        i__1 = min(*m, *n);
        i__2 = nb;
        for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
            i__3 = min(*m, *n) - j + 1;
            jb = min(i__3, nb);
            i__3 = *m - j + 1;
            dgetrf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo);
            if (*info == 0 && iinfo > 0) {
                *info = iinfo + j - 1;
            }
            i__4 = *m, i__5 = j + jb - 1;
            i__3 = min(i__4, i__5);
            for (i__ = j; i__ <= i__3; ++i__) {
                ipiv[i__] = j - 1 + ipiv[i__];
            }
            i__3 = j - 1;
            i__4 = j + jb - 1;
            dlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1);
            if (j + jb <= *n) {
                i__3 = *n - j - jb + 1;
                i__4 = j + jb - 1;
                dlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, &ipiv[1], &c__1);
                i__3 = *n - j - jb + 1;
                dtrsm_((char *)"Left", (char *)"Lower", (char *)"No transpose", (char *)"Unit", &jb, &i__3, &c_b16,
                       &a[j + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda, (ftnlen)4,
                       (ftnlen)5, (ftnlen)12, (ftnlen)4);
                if (j + jb <= *m) {
                    i__3 = *m - j - jb + 1;
                    i__4 = *n - j - jb + 1;
                    dgemm_((char *)"No transpose", (char *)"No transpose", &i__3, &i__4, &jb, &c_b19,
                           &a[j + jb + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda, &c_b16,
                           &a[j + jb + (j + jb) * a_dim1], lda, (ftnlen)12, (ftnlen)12);
                }
            }
        }
    }
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dgetrf.f
+++ b/lib/linalg/dgetrf.f
@ -1,222 +0,0 @@
 *> \brief \b DGETRF
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DGETRF + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetrf.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetrf.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetrf.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO )
 *
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, LDA, M, N
 *       ..
 *       .. Array Arguments ..
 *       INTEGER            IPIV( * )
 *       DOUBLE PRECISION   A( LDA, * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DGETRF computes an LU factorization of a general M-by-N matrix A
 *> using partial pivoting with row interchanges.
 *>
 *> The factorization has the form
 *>    A = P * L * U
 *> where P is a permutation matrix, L is lower triangular with unit
 *> diagonal elements (lower trapezoidal if m > n), and U is upper
 *> triangular (upper trapezoidal if m < n).
 *>
 *> This is the right-looking Level 3 BLAS version of the algorithm.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] M
 *> \verbatim
 *>          M is INTEGER
 *>          The number of rows of the matrix A.  M >= 0.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The number of columns of the matrix A.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in,out] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension (LDA,N)
 *>          On entry, the M-by-N matrix to be factored.
 *>          On exit, the factors L and U from the factorization
 *>          A = P*L*U; the unit diagonal elements of L are not stored.
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>          The leading dimension of the array A.  LDA >= max(1,M).
 *> \endverbatim
 *>
 *> \param[out] IPIV
 *> \verbatim
 *>          IPIV is INTEGER array, dimension (min(M,N))
 *>          The pivot indices; for 1 <= i <= min(M,N), row i of the
 *>          matrix was interchanged with row IPIV(i).
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0:  successful exit
 *>          < 0:  if INFO = -i, the i-th argument had an illegal value
 *>          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
 *>                has been completed, but the factor U is exactly
 *>                singular, and division by zero will occur if it is used
 *>                to solve a system of equations.
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup doubleGEcomputational
 *
 *  =====================================================================
      SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO )
 *
 *  -- LAPACK computational routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER            INFO, LDA, M, N
 *     ..
 *     .. Array Arguments ..
      INTEGER            IPIV( * )
      DOUBLE PRECISION   A( LDA, * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ONE
      PARAMETER          ( ONE = 1.0D+0 )
 *     ..
 *     .. Local Scalars ..
      INTEGER            I, IINFO, J, JB, NB
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DGEMM, DGETRF2, DLASWP, DTRSM, XERBLA
 *     ..
 *     .. External Functions ..
      INTEGER            ILAENV
      EXTERNAL           ILAENV
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          MAX, MIN
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input parameters.
 *
      INFO = 0
      IF( M.LT.0 ) THEN
         INFO = -1
      ELSE IF( N.LT.0 ) THEN
         INFO = -2
      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
         INFO = -4
      END IF
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'DGETRF', -INFO )
         RETURN
      END IF
 *
 *     Quick return if possible
 *
      IF( M.EQ.0 .OR. N.EQ.0 )
     $   RETURN
 *
 *     Determine the block size for this environment.
 *
      NB = ILAENV( 1, 'DGETRF', ' ', M, N, -1, -1 )
      IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
 *
 *        Use unblocked code.
 *
         CALL DGETRF2( M, N, A, LDA, IPIV, INFO )
      ELSE
 *
 *        Use blocked code.
 *
         DO 20 J = 1, MIN( M, N ), NB
            JB = MIN( MIN( M, N )-J+1, NB )
 *
 *           Factor diagonal and subdiagonal blocks and test for exact
 *           singularity.
 *
            CALL DGETRF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
 *
 *           Adjust INFO and the pivot indices.
 *
            IF( INFO.EQ.0 .AND. IINFO.GT.0 )
     $         INFO = IINFO + J - 1
            DO 10 I = J, MIN( M, J+JB-1 )
               IPIV( I ) = J - 1 + IPIV( I )
   10       CONTINUE
 *
 *           Apply interchanges to columns 1:J-1.
 *
            CALL DLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 )
 *
            IF( J+JB.LE.N ) THEN
 *
 *              Apply interchanges to columns J+JB:N.
 *
               CALL DLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1,
     $                      IPIV, 1 )
 *
 *              Compute block row of U.
 *
               CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB,
     $                     N-J-JB+1, ONE, A( J, J ), LDA, A( J, J+JB ),
     $                     LDA )
               IF( J+JB.LE.M ) THEN
 *
 *                 Update trailing submatrix.
 *
                  CALL DGEMM( 'No transpose', 'No transpose', M-J-JB+1,
     $                        N-J-JB+1, JB, -ONE, A( J+JB, J ), LDA,
     $                        A( J, J+JB ), LDA, ONE, A( J+JB, J+JB ),
     $                        LDA )
               END IF
            END IF
   20    CONTINUE
      END IF
      RETURN
 *
 *     End of DGETRF
 *
      END
--- a/lib/linalg/dgetrf2.cpp
+++ b/lib/linalg/dgetrf2.cpp
@ -0,0 +1,106 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 static integer c__1 = 1;
 static doublereal c_b13 = 1.;
 static doublereal c_b16 = -1.;
 int dgetrf2_(integer *m, integer *n, doublereal *a, integer *lda, integer *ipiv, integer *info)
 {
    integer a_dim1, a_offset, i__1, i__2;
    doublereal d__1;
    integer i__, n1, n2;
    doublereal temp;
    extern int dscal_(integer *, doublereal *, doublereal *, integer *),
        dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
               integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen,
               ftnlen);
    integer iinfo;
    doublereal sfmin;
    extern int dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *,
                      doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen,
                      ftnlen);
    extern doublereal dlamch_(char *, ftnlen);
    extern integer idamax_(integer *, doublereal *, integer *);
    extern int xerbla_(char *, integer *, ftnlen),
        dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *);
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --ipiv;
    *info = 0;
    if (*m < 0) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*lda < max(1, *m)) {
        *info = -4;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_((char *)"DGETRF2", &i__1, (ftnlen)7);
        return 0;
    }
    if (*m == 0 || *n == 0) {
        return 0;
    }
    if (*m == 1) {
        ipiv[1] = 1;
        if (a[a_dim1 + 1] == 0.) {
            *info = 1;
        }
    } else if (*n == 1) {
        sfmin = dlamch_((char *)"S", (ftnlen)1);
        i__ = idamax_(m, &a[a_dim1 + 1], &c__1);
        ipiv[1] = i__;
        if (a[i__ + a_dim1] != 0.) {
            if (i__ != 1) {
                temp = a[a_dim1 + 1];
                a[a_dim1 + 1] = a[i__ + a_dim1];
                a[i__ + a_dim1] = temp;
            }
            if ((d__1 = a[a_dim1 + 1], abs(d__1)) >= sfmin) {
                i__1 = *m - 1;
                d__1 = 1. / a[a_dim1 + 1];
                dscal_(&i__1, &d__1, &a[a_dim1 + 2], &c__1);
            } else {
                i__1 = *m - 1;
                for (i__ = 1; i__ <= i__1; ++i__) {
                    a[i__ + 1 + a_dim1] /= a[a_dim1 + 1];
                }
            }
        } else {
            *info = 1;
        }
    } else {
        n1 = min(*m, *n) / 2;
        n2 = *n - n1;
        dgetrf2_(m, &n1, &a[a_offset], lda, &ipiv[1], &iinfo);
        if (*info == 0 && iinfo > 0) {
            *info = iinfo;
        }
        dlaswp_(&n2, &a[(n1 + 1) * a_dim1 + 1], lda, &c__1, &n1, &ipiv[1], &c__1);
        dtrsm_((char *)"L", (char *)"L", (char *)"N", (char *)"U", &n1, &n2, &c_b13, &a[a_offset], lda, &a[(n1 + 1) * a_dim1 + 1],
               lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1);
        i__1 = *m - n1;
        dgemm_((char *)"N", (char *)"N", &i__1, &n2, &n1, &c_b16, &a[n1 + 1 + a_dim1], lda,
               &a[(n1 + 1) * a_dim1 + 1], lda, &c_b13, &a[n1 + 1 + (n1 + 1) * a_dim1], lda,
               (ftnlen)1, (ftnlen)1);
        i__1 = *m - n1;
        dgetrf2_(&i__1, &n2, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, &ipiv[n1 + 1], &iinfo);
        if (*info == 0 && iinfo > 0) {
            *info = iinfo + n1;
        }
        i__1 = min(*m, *n);
        for (i__ = n1 + 1; i__ <= i__1; ++i__) {
            ipiv[i__] += n1;
        }
        i__1 = n1 + 1;
        i__2 = min(*m, *n);
        dlaswp_(&n1, &a[a_dim1 + 1], lda, &i__1, &i__2, &ipiv[1], &c__1);
    }
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dgetrf2.f
+++ b/lib/linalg/dgetrf2.f
@ -1,269 +0,0 @@
 *> \brief \b DGETRF2
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       RECURSIVE SUBROUTINE DGETRF2( M, N, A, LDA, IPIV, INFO )
 *
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, LDA, M, N
 *       ..
 *       .. Array Arguments ..
 *       INTEGER            IPIV( * )
 *       DOUBLE PRECISION   A( LDA, * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DGETRF2 computes an LU factorization of a general M-by-N matrix A
 *> using partial pivoting with row interchanges.
 *>
 *> The factorization has the form
 *>    A = P * L * U
 *> where P is a permutation matrix, L is lower triangular with unit
 *> diagonal elements (lower trapezoidal if m > n), and U is upper
 *> triangular (upper trapezoidal if m < n).
 *>
 *> This is the recursive version of the algorithm. It divides
 *> the matrix into four submatrices:
 *>
 *>        [  A11 | A12  ]  where A11 is n1 by n1 and A22 is n2 by n2
 *>    A = [ -----|----- ]  with n1 = min(m,n)/2
 *>        [  A21 | A22  ]       n2 = n-n1
 *>
 *>                                       [ A11 ]
 *> The subroutine calls itself to factor [ --- ],
 *>                                       [ A12 ]
 *>                 [ A12 ]
 *> do the swaps on [ --- ], solve A12, update A22,
 *>                 [ A22 ]
 *>
 *> then calls itself to factor A22 and do the swaps on A21.
 *>
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] M
 *> \verbatim
 *>          M is INTEGER
 *>          The number of rows of the matrix A.  M >= 0.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The number of columns of the matrix A.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in,out] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension (LDA,N)
 *>          On entry, the M-by-N matrix to be factored.
 *>          On exit, the factors L and U from the factorization
 *>          A = P*L*U; the unit diagonal elements of L are not stored.
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>          The leading dimension of the array A.  LDA >= max(1,M).
 *> \endverbatim
 *>
 *> \param[out] IPIV
 *> \verbatim
 *>          IPIV is INTEGER array, dimension (min(M,N))
 *>          The pivot indices; for 1 <= i <= min(M,N), row i of the
 *>          matrix was interchanged with row IPIV(i).
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0:  successful exit
 *>          < 0:  if INFO = -i, the i-th argument had an illegal value
 *>          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
 *>                has been completed, but the factor U is exactly
 *>                singular, and division by zero will occur if it is used
 *>                to solve a system of equations.
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup doubleGEcomputational
 *
 *  =====================================================================
      RECURSIVE SUBROUTINE DGETRF2( M, N, A, LDA, IPIV, INFO )
 *
 *  -- LAPACK computational routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER            INFO, LDA, M, N
 *     ..
 *     .. Array Arguments ..
      INTEGER            IPIV( * )
      DOUBLE PRECISION   A( LDA, * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ONE, ZERO
      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
 *     ..
 *     .. Local Scalars ..
      DOUBLE PRECISION   SFMIN, TEMP
      INTEGER            I, IINFO, N1, N2
 *     ..
 *     .. External Functions ..
      DOUBLE PRECISION   DLAMCH
      INTEGER            IDAMAX
      EXTERNAL           DLAMCH, IDAMAX
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DGEMM, DSCAL, DLASWP, DTRSM, XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          MAX, MIN
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input parameters
 *
      INFO = 0
      IF( M.LT.0 ) THEN
         INFO = -1
      ELSE IF( N.LT.0 ) THEN
         INFO = -2
      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
         INFO = -4
      END IF
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'DGETRF2', -INFO )
         RETURN
      END IF
 *
 *     Quick return if possible
 *
      IF( M.EQ.0 .OR. N.EQ.0 )
     $   RETURN
      IF ( M.EQ.1 ) THEN
 *
 *        Use unblocked code for one row case
 *        Just need to handle IPIV and INFO
 *
         IPIV( 1 ) = 1
         IF ( A(1,1).EQ.ZERO )
     $      INFO = 1
 *
      ELSE IF( N.EQ.1 ) THEN
 *
 *        Use unblocked code for one column case
 *
 *
 *        Compute machine safe minimum
 *
         SFMIN = DLAMCH('S')
 *
 *        Find pivot and test for singularity
 *
         I = IDAMAX( M, A( 1, 1 ), 1 )
         IPIV( 1 ) = I
         IF( A( I, 1 ).NE.ZERO ) THEN
 *
 *           Apply the interchange
 *
            IF( I.NE.1 ) THEN
               TEMP = A( 1, 1 )
               A( 1, 1 ) = A( I, 1 )
               A( I, 1 ) = TEMP
            END IF
 *
 *           Compute elements 2:M of the column
 *
            IF( ABS(A( 1, 1 )) .GE. SFMIN ) THEN
               CALL DSCAL( M-1, ONE / A( 1, 1 ), A( 2, 1 ), 1 )
            ELSE
               DO 10 I = 1, M-1
                  A( 1+I, 1 ) = A( 1+I, 1 ) / A( 1, 1 )
   10          CONTINUE
            END IF
 *
         ELSE
            INFO = 1
         END IF
 *
      ELSE
 *
 *        Use recursive code
 *
         N1 = MIN( M, N ) / 2
         N2 = N-N1
 *
 *               [ A11 ]
 *        Factor [ --- ]
 *               [ A21 ]
 *
         CALL DGETRF2( M, N1, A, LDA, IPIV, IINFO )
         IF ( INFO.EQ.0 .AND. IINFO.GT.0 )
     $      INFO = IINFO
 *
 *                              [ A12 ]
 *        Apply interchanges to [ --- ]
 *                              [ A22 ]
 *
         CALL DLASWP( N2, A( 1, N1+1 ), LDA, 1, N1, IPIV, 1 )
 *
 *        Solve A12
 *
         CALL DTRSM( 'L', 'L', 'N', 'U', N1, N2, ONE, A, LDA,
     $               A( 1, N1+1 ), LDA )
 *
 *        Update A22
 *
         CALL DGEMM( 'N', 'N', M-N1, N2, N1, -ONE, A( N1+1, 1 ), LDA,
     $               A( 1, N1+1 ), LDA, ONE, A( N1+1, N1+1 ), LDA )
 *
 *        Factor A22
 *
         CALL DGETRF2( M-N1, N2, A( N1+1, N1+1 ), LDA, IPIV( N1+1 ),
     $                 IINFO )
 *
 *        Adjust INFO and the pivot indices
 *
         IF ( INFO.EQ.0 .AND. IINFO.GT.0 )
     $      INFO = IINFO + N1
         DO 20 I = N1+1, MIN( M, N )
            IPIV( I ) = IPIV( I ) + N1
   20    CONTINUE
 *
 *        Apply interchanges to A21
 *
         CALL DLASWP( N1, A( 1, 1 ), LDA, N1+1, MIN( M, N), IPIV, 1 )
 *
      END IF
      RETURN
 *
 *     End of DGETRF2
 *
      END
--- a/lib/linalg/dgetri.cpp
+++ b/lib/linalg/dgetri.cpp
@ -0,0 +1,125 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__2 = 2;
 static doublereal c_b20 = -1.;
 static doublereal c_b22 = 1.;
 int dgetri_(integer *n, doublereal *a, integer *lda, integer *ipiv, doublereal *work,
            integer *lwork, integer *info)
 {
    integer a_dim1, a_offset, i__1, i__2, i__3;
    integer i__, j, jb, nb, jj, jp, nn, iws;
    extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
                      integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
                      ftnlen, ftnlen),
        dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *,
               integer *, doublereal *, doublereal *, integer *, ftnlen);
    integer nbmin;
    extern int dswap_(integer *, doublereal *, integer *, doublereal *, integer *),
        dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *,
               integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen),
        xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
                           ftnlen, ftnlen);
    integer ldwork;
    extern int dtrtri_(char *, char *, integer *, doublereal *, integer *, integer *, ftnlen,
                       ftnlen);
    integer lwkopt;
    logical lquery;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --ipiv;
    --work;
    *info = 0;
    nb = ilaenv_(&c__1, (char *)"DGETRI", (char *)" ", n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
    lwkopt = *n * nb;
    work[1] = (doublereal)lwkopt;
    lquery = *lwork == -1;
    if (*n < 0) {
        *info = -1;
    } else if (*lda < max(1, *n)) {
        *info = -3;
    } else if (*lwork < max(1, *n) && !lquery) {
        *info = -6;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_((char *)"DGETRI", &i__1, (ftnlen)6);
        return 0;
    } else if (lquery) {
        return 0;
    }
    if (*n == 0) {
        return 0;
    }
    dtrtri_((char *)"Upper", (char *)"Non-unit", n, &a[a_offset], lda, info, (ftnlen)5, (ftnlen)8);
    if (*info > 0) {
        return 0;
    }
    nbmin = 2;
    ldwork = *n;
    if (nb > 1 && nb < *n) {
        i__1 = ldwork * nb;
        iws = max(i__1, 1);
        if (*lwork < iws) {
            nb = *lwork / ldwork;
            i__1 = 2,
            i__2 = ilaenv_(&c__2, (char *)"DGETRI", (char *)" ", n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
            nbmin = max(i__1, i__2);
        }
    } else {
        iws = *n;
    }
    if (nb < nbmin || nb >= *n) {
        for (j = *n; j >= 1; --j) {
            i__1 = *n;
            for (i__ = j + 1; i__ <= i__1; ++i__) {
                work[i__] = a[i__ + j * a_dim1];
                a[i__ + j * a_dim1] = 0.;
            }
            if (j < *n) {
                i__1 = *n - j;
                dgemv_((char *)"No transpose", n, &i__1, &c_b20, &a[(j + 1) * a_dim1 + 1], lda,
                       &work[j + 1], &c__1, &c_b22, &a[j * a_dim1 + 1], &c__1, (ftnlen)12);
            }
        }
    } else {
        nn = (*n - 1) / nb * nb + 1;
        i__1 = -nb;
        for (j = nn; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) {
            i__2 = nb, i__3 = *n - j + 1;
            jb = min(i__2, i__3);
            i__2 = j + jb - 1;
            for (jj = j; jj <= i__2; ++jj) {
                i__3 = *n;
                for (i__ = jj + 1; i__ <= i__3; ++i__) {
                    work[i__ + (jj - j) * ldwork] = a[i__ + jj * a_dim1];
                    a[i__ + jj * a_dim1] = 0.;
                }
            }
            if (j + jb <= *n) {
                i__2 = *n - j - jb + 1;
                dgemm_((char *)"No transpose", (char *)"No transpose", n, &jb, &i__2, &c_b20,
                       &a[(j + jb) * a_dim1 + 1], lda, &work[j + jb], &ldwork, &c_b22,
                       &a[j * a_dim1 + 1], lda, (ftnlen)12, (ftnlen)12);
            }
            dtrsm_((char *)"Right", (char *)"Lower", (char *)"No transpose", (char *)"Unit", n, &jb, &c_b22, &work[j], &ldwork,
                   &a[j * a_dim1 + 1], lda, (ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)4);
        }
    }
    for (j = *n - 1; j >= 1; --j) {
        jp = ipiv[j];
        if (jp != j) {
            dswap_(n, &a[j * a_dim1 + 1], &c__1, &a[jp * a_dim1 + 1], &c__1);
        }
    }
    work[1] = (doublereal)iws;
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dgetri.f
+++ b/lib/linalg/dgetri.f
@ -1,258 +0,0 @@
 *> \brief \b DGETRI
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DGETRI + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetri.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetri.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetri.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
 *
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, LDA, LWORK, N
 *       ..
 *       .. Array Arguments ..
 *       INTEGER            IPIV( * )
 *       DOUBLE PRECISION   A( LDA, * ), WORK( * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DGETRI computes the inverse of a matrix using the LU factorization
 *> computed by DGETRF.
 *>
 *> This method inverts U and then computes inv(A) by solving the system
 *> inv(A)*L = inv(U) for inv(A).
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The order of the matrix A.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in,out] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension (LDA,N)
 *>          On entry, the factors L and U from the factorization
 *>          A = P*L*U as computed by DGETRF.
 *>          On exit, if INFO = 0, the inverse of the original matrix A.
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>          The leading dimension of the array A.  LDA >= max(1,N).
 *> \endverbatim
 *>
 *> \param[in] IPIV
 *> \verbatim
 *>          IPIV is INTEGER array, dimension (N)
 *>          The pivot indices from DGETRF; for 1<=i<=N, row i of the
 *>          matrix was interchanged with row IPIV(i).
 *> \endverbatim
 *>
 *> \param[out] WORK
 *> \verbatim
 *>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
 *>          On exit, if INFO=0, then WORK(1) returns the optimal LWORK.
 *> \endverbatim
 *>
 *> \param[in] LWORK
 *> \verbatim
 *>          LWORK is INTEGER
 *>          The dimension of the array WORK.  LWORK >= max(1,N).
 *>          For optimal performance LWORK >= N*NB, where NB is
 *>          the optimal blocksize returned by ILAENV.
 *>
 *>          If LWORK = -1, then a workspace query is assumed; the routine
 *>          only calculates the optimal size of the WORK array, returns
 *>          this value as the first entry of the WORK array, and no error
 *>          message related to LWORK is issued by XERBLA.
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0:  successful exit
 *>          < 0:  if INFO = -i, the i-th argument had an illegal value
 *>          > 0:  if INFO = i, U(i,i) is exactly zero; the matrix is
 *>                singular and its inverse could not be computed.
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup doubleGEcomputational
 *
 *  =====================================================================
      SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
 *
 *  -- LAPACK computational routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER            INFO, LDA, LWORK, N
 *     ..
 *     .. Array Arguments ..
      INTEGER            IPIV( * )
      DOUBLE PRECISION   A( LDA, * ), WORK( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ZERO, ONE
      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
 *     ..
 *     .. Local Scalars ..
      LOGICAL            LQUERY
      INTEGER            I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB,
     $                   NBMIN, NN
 *     ..
 *     .. External Functions ..
      INTEGER            ILAENV
      EXTERNAL           ILAENV
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DGEMM, DGEMV, DSWAP, DTRSM, DTRTRI, XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          MAX, MIN
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input parameters.
 *
      INFO = 0
      NB = ILAENV( 1, 'DGETRI', ' ', N, -1, -1, -1 )
      LWKOPT = N*NB
      WORK( 1 ) = LWKOPT
      LQUERY = ( LWORK.EQ.-1 )
      IF( N.LT.0 ) THEN
         INFO = -1
      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
         INFO = -3
      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
         INFO = -6
      END IF
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'DGETRI', -INFO )
         RETURN
      ELSE IF( LQUERY ) THEN
         RETURN
      END IF
 *
 *     Quick return if possible
 *
      IF( N.EQ.0 )
     $   RETURN
 *
 *     Form inv(U).  If INFO > 0 from DTRTRI, then U is singular,
 *     and the inverse is not computed.
 *
      CALL DTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO )
      IF( INFO.GT.0 )
     $   RETURN
 *
      NBMIN = 2
      LDWORK = N
      IF( NB.GT.1 .AND. NB.LT.N ) THEN
         IWS = MAX( LDWORK*NB, 1 )
         IF( LWORK.LT.IWS ) THEN
            NB = LWORK / LDWORK
            NBMIN = MAX( 2, ILAENV( 2, 'DGETRI', ' ', N, -1, -1, -1 ) )
         END IF
      ELSE
         IWS = N
      END IF
 *
 *     Solve the equation inv(A)*L = inv(U) for inv(A).
 *
      IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN
 *
 *        Use unblocked code.
 *
         DO 20 J = N, 1, -1
 *
 *           Copy current column of L to WORK and replace with zeros.
 *
            DO 10 I = J + 1, N
               WORK( I ) = A( I, J )
               A( I, J ) = ZERO
   10       CONTINUE
 *
 *           Compute current column of inv(A).
 *
            IF( J.LT.N )
     $         CALL DGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ),
     $                     LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 )
   20    CONTINUE
      ELSE
 *
 *        Use blocked code.
 *
         NN = ( ( N-1 ) / NB )*NB + 1
         DO 50 J = NN, 1, -NB
            JB = MIN( NB, N-J+1 )
 *
 *           Copy current block column of L to WORK and replace with
 *           zeros.
 *
            DO 40 JJ = J, J + JB - 1
               DO 30 I = JJ + 1, N
                  WORK( I+( JJ-J )*LDWORK ) = A( I, JJ )
                  A( I, JJ ) = ZERO
   30          CONTINUE
   40       CONTINUE
 *
 *           Compute current block column of inv(A).
 *
            IF( J+JB.LE.N )
     $         CALL DGEMM( 'No transpose', 'No transpose', N, JB,
     $                     N-J-JB+1, -ONE, A( 1, J+JB ), LDA,
     $                     WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA )
            CALL DTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB,
     $                  ONE, WORK( J ), LDWORK, A( 1, J ), LDA )
   50    CONTINUE
      END IF
 *
 *     Apply column interchanges.
 *
      DO 60 J = N - 1, 1, -1
         JP = IPIV( J )
         IF( JP.NE.J )
     $      CALL DSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 )
   60 CONTINUE
 *
      WORK( 1 ) = IWS
      RETURN
 *
 *     End of DGETRI
 *
      END
--- a/lib/linalg/dgetrs.cpp
+++ b/lib/linalg/dgetrs.cpp
@ -0,0 +1,65 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 static integer c__1 = 1;
 static doublereal c_b12 = 1.;
 static integer c_n1 = -1;
 int dgetrs_(char *trans, integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv,
            doublereal *b, integer *ldb, integer *info, ftnlen trans_len)
 {
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
    extern logical lsame_(char *, char *, ftnlen, ftnlen);
    extern int dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *,
                      doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen,
                      ftnlen),
        xerbla_(char *, integer *, ftnlen),
        dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *);
    logical notran;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    *info = 0;
    notran = lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1);
    if (!notran && !lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1) &&
        !lsame_(trans, (char *)"C", (ftnlen)1, (ftnlen)1)) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*nrhs < 0) {
        *info = -3;
    } else if (*lda < max(1, *n)) {
        *info = -5;
    } else if (*ldb < max(1, *n)) {
        *info = -8;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_((char *)"DGETRS", &i__1, (ftnlen)6);
        return 0;
    }
    if (*n == 0 || *nrhs == 0) {
        return 0;
    }
    if (notran) {
        dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1);
        dtrsm_((char *)"Left", (char *)"Lower", (char *)"No transpose", (char *)"Unit", n, nrhs, &c_b12, &a[a_offset], lda,
               &b[b_offset], ldb, (ftnlen)4, (ftnlen)5, (ftnlen)12, (ftnlen)4);
        dtrsm_((char *)"Left", (char *)"Upper", (char *)"No transpose", (char *)"Non-unit", n, nrhs, &c_b12, &a[a_offset], lda,
               &b[b_offset], ldb, (ftnlen)4, (ftnlen)5, (ftnlen)12, (ftnlen)8);
    } else {
        dtrsm_((char *)"Left", (char *)"Upper", (char *)"Transpose", (char *)"Non-unit", n, nrhs, &c_b12, &a[a_offset], lda,
               &b[b_offset], ldb, (ftnlen)4, (ftnlen)5, (ftnlen)9, (ftnlen)8);
        dtrsm_((char *)"Left", (char *)"Lower", (char *)"Transpose", (char *)"Unit", n, nrhs, &c_b12, &a[a_offset], lda,
               &b[b_offset], ldb, (ftnlen)4, (ftnlen)5, (ftnlen)9, (ftnlen)4);
        dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1);
    }
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dgetrs.f
+++ b/lib/linalg/dgetrs.f
@ -1,222 +0,0 @@
 *> \brief \b DGETRS
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DGETRS + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetrs.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetrs.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetrs.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
 *
 *       .. Scalar Arguments ..
 *       CHARACTER          TRANS
 *       INTEGER            INFO, LDA, LDB, N, NRHS
 *       ..
 *       .. Array Arguments ..
 *       INTEGER            IPIV( * )
 *       DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DGETRS solves a system of linear equations
 *>    A * X = B  or  A**T * X = B
 *> with a general N-by-N matrix A using the LU factorization computed
 *> by DGETRF.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] TRANS
 *> \verbatim
 *>          TRANS is CHARACTER*1
 *>          Specifies the form of the system of equations:
 *>          = 'N':  A * X = B  (No transpose)
 *>          = 'T':  A**T* X = B  (Transpose)
 *>          = 'C':  A**T* X = B  (Conjugate transpose = Transpose)
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The order of the matrix A.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in] NRHS
 *> \verbatim
 *>          NRHS is INTEGER
 *>          The number of right hand sides, i.e., the number of columns
 *>          of the matrix B.  NRHS >= 0.
 *> \endverbatim
 *>
 *> \param[in] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension (LDA,N)
 *>          The factors L and U from the factorization A = P*L*U
 *>          as computed by DGETRF.
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>          The leading dimension of the array A.  LDA >= max(1,N).
 *> \endverbatim
 *>
 *> \param[in] IPIV
 *> \verbatim
 *>          IPIV is INTEGER array, dimension (N)
 *>          The pivot indices from DGETRF; for 1<=i<=N, row i of the
 *>          matrix was interchanged with row IPIV(i).
 *> \endverbatim
 *>
 *> \param[in,out] B
 *> \verbatim
 *>          B is DOUBLE PRECISION array, dimension (LDB,NRHS)
 *>          On entry, the right hand side matrix B.
 *>          On exit, the solution matrix X.
 *> \endverbatim
 *>
 *> \param[in] LDB
 *> \verbatim
 *>          LDB is INTEGER
 *>          The leading dimension of the array B.  LDB >= max(1,N).
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0:  successful exit
 *>          < 0:  if INFO = -i, the i-th argument had an illegal value
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup doubleGEcomputational
 *
 *  =====================================================================
      SUBROUTINE DGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
 *
 *  -- LAPACK computational routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      CHARACTER          TRANS
      INTEGER            INFO, LDA, LDB, N, NRHS
 *     ..
 *     .. Array Arguments ..
      INTEGER            IPIV( * )
      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ONE
      PARAMETER          ( ONE = 1.0D+0 )
 *     ..
 *     .. Local Scalars ..
      LOGICAL            NOTRAN
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME
      EXTERNAL           LSAME
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLASWP, DTRSM, XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          MAX
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input parameters.
 *
      INFO = 0
      NOTRAN = LSAME( TRANS, 'N' )
      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
     $    LSAME( TRANS, 'C' ) ) THEN
         INFO = -1
      ELSE IF( N.LT.0 ) THEN
         INFO = -2
      ELSE IF( NRHS.LT.0 ) THEN
         INFO = -3
      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
         INFO = -5
      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
         INFO = -8
      END IF
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'DGETRS', -INFO )
         RETURN
      END IF
 *
 *     Quick return if possible
 *
      IF( N.EQ.0 .OR. NRHS.EQ.0 )
     $   RETURN
 *
      IF( NOTRAN ) THEN
 *
 *        Solve A * X = B.
 *
 *        Apply row interchanges to the right hand sides.
 *
         CALL DLASWP( NRHS, B, LDB, 1, N, IPIV, 1 )
 *
 *        Solve L*X = B, overwriting B with X.
 *
         CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', N, NRHS,
     $               ONE, A, LDA, B, LDB )
 *
 *        Solve U*X = B, overwriting B with X.
 *
         CALL DTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N,
     $               NRHS, ONE, A, LDA, B, LDB )
      ELSE
 *
 *        Solve A**T * X = B.
 *
 *        Solve U**T *X = B, overwriting B with X.
 *
         CALL DTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, NRHS,
     $               ONE, A, LDA, B, LDB )
 *
 *        Solve L**T *X = B, overwriting B with X.
 *
         CALL DTRSM( 'Left', 'Lower', 'Transpose', 'Unit', N, NRHS, ONE,
     $               A, LDA, B, LDB )
 *
 *        Apply row interchanges to the solution vectors.
 *
         CALL DLASWP( NRHS, B, LDB, 1, N, IPIV, -1 )
      END IF
 *
      RETURN
 *
 *     End of DGETRS
 *
      END
--- a/lib/linalg/disnan.cpp
+++ b/lib/linalg/disnan.cpp
@ -0,0 +1,14 @@
 #include <cmath>
 extern "C" {
 #include "lmp_f2c.h"
 logical disnan_(const doublereal *din)
 {
    if (!din) return TRUE_;
    return std::isnan(*din) ? TRUE_ : FALSE_;
 }
 }
--- a/lib/linalg/disnan.f
+++ b/lib/linalg/disnan.f
@ -1,77 +0,0 @@
 *> \brief \b DISNAN tests input for NaN.
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DISNAN + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/disnan.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/disnan.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/disnan.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       LOGICAL FUNCTION DISNAN( DIN )
 *
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION, INTENT(IN) :: DIN
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DISNAN returns .TRUE. if its argument is NaN, and .FALSE.
 *> otherwise.  To be replaced by the Fortran 2003 intrinsic in the
 *> future.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] DIN
 *> \verbatim
 *>          DIN is DOUBLE PRECISION
 *>          Input to test for NaN.
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup OTHERauxiliary
 *
 *  =====================================================================
      LOGICAL FUNCTION DISNAN( DIN )
 *
 *  -- LAPACK auxiliary routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      DOUBLE PRECISION, INTENT(IN) :: DIN
 *     ..
 *
 *  =====================================================================
 *
 *  .. External Functions ..
      LOGICAL DLAISNAN
      EXTERNAL DLAISNAN
 *  ..
 *  .. Executable Statements ..
      DISNAN = DLAISNAN(DIN,DIN)
      RETURN
      END
--- a/lib/linalg/dlabad.cpp
+++ b/lib/linalg/dlabad.cpp
@ -0,0 +1,16 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 int dlabad_(doublereal *small, doublereal *large)
 {
    double d_lmp_lg10(doublereal *), sqrt(doublereal);
    if (d_lmp_lg10(large) > 2e3) {
        *small = sqrt(*small);
        *large = sqrt(*large);
    }
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dlabad.f
+++ b/lib/linalg/dlabad.f
@ -1,102 +0,0 @@
 *> \brief \b DLABAD
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DLABAD + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlabad.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlabad.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlabad.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLABAD( SMALL, LARGE )
 *
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION   LARGE, SMALL
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DLABAD takes as input the values computed by DLAMCH for underflow and
 *> overflow, and returns the square root of each of these values if the
 *> log of LARGE is sufficiently large.  This subroutine is intended to
 *> identify machines with a large exponent range, such as the Crays, and
 *> redefine the underflow and overflow limits to be the square roots of
 *> the values computed by DLAMCH.  This subroutine is needed because
 *> DLAMCH does not compensate for poor arithmetic in the upper half of
 *> the exponent range, as is found on a Cray.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in,out] SMALL
 *> \verbatim
 *>          SMALL is DOUBLE PRECISION
 *>          On entry, the underflow threshold as computed by DLAMCH.
 *>          On exit, if LOG10(LARGE) is sufficiently large, the square
 *>          root of SMALL, otherwise unchanged.
 *> \endverbatim
 *>
 *> \param[in,out] LARGE
 *> \verbatim
 *>          LARGE is DOUBLE PRECISION
 *>          On entry, the overflow threshold as computed by DLAMCH.
 *>          On exit, if LOG10(LARGE) is sufficiently large, the square
 *>          root of LARGE, otherwise unchanged.
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup OTHERauxiliary
 *
 *  =====================================================================
      SUBROUTINE DLABAD( SMALL, LARGE )
 *
 *  -- LAPACK auxiliary routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      DOUBLE PRECISION   LARGE, SMALL
 *     ..
 *
 *  =====================================================================
 *
 *     .. Intrinsic Functions ..
      INTRINSIC          LOG10, SQRT
 *     ..
 *     .. Executable Statements ..
 *
 *     If it looks like we're on a Cray, take the square root of
 *     SMALL and LARGE to avoid overflow and underflow problems.
 *
      IF( LOG10( LARGE ).GT.2000.D0 ) THEN
         SMALL = SQRT( SMALL )
         LARGE = SQRT( LARGE )
      END IF
 *
      RETURN
 *
 *     End of DLABAD
 *
      END
--- a/lib/linalg/dlabrd.cpp
+++ b/lib/linalg/dlabrd.cpp
@ -0,0 +1,210 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 static doublereal c_b4 = -1.;
 static doublereal c_b5 = 1.;
 static integer c__1 = 1;
 static doublereal c_b16 = 0.;
 int dlabrd_(integer *m, integer *n, integer *nb, doublereal *a, integer *lda, doublereal *d__,
            doublereal *e, doublereal *tauq, doublereal *taup, doublereal *x, integer *ldx,
            doublereal *y, integer *ldy)
 {
    integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2, i__3;
    integer i__;
    extern int dscal_(integer *, doublereal *, doublereal *, integer *),
        dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *,
               integer *, doublereal *, doublereal *, integer *, ftnlen),
        dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *);
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --d__;
    --e;
    --tauq;
    --taup;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1;
    x -= x_offset;
    y_dim1 = *ldy;
    y_offset = 1 + y_dim1;
    y -= y_offset;
    if (*m <= 0 || *n <= 0) {
        return 0;
    }
    if (*m >= *n) {
        i__1 = *nb;
        for (i__ = 1; i__ <= i__1; ++i__) {
            i__2 = *m - i__ + 1;
            i__3 = i__ - 1;
            dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &a[i__ + a_dim1], lda, &y[i__ + y_dim1],
                   ldy, &c_b5, &a[i__ + i__ * a_dim1], &c__1, (ftnlen)12);
            i__2 = *m - i__ + 1;
            i__3 = i__ - 1;
            dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &x[i__ + x_dim1], ldx, &a[i__ * a_dim1 + 1],
                   &c__1, &c_b5, &a[i__ + i__ * a_dim1], &c__1, (ftnlen)12);
            i__2 = *m - i__ + 1;
            i__3 = i__ + 1;
            dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3, *m) + i__ * a_dim1], &c__1,
                    &tauq[i__]);
            d__[i__] = a[i__ + i__ * a_dim1];
            if (i__ < *n) {
                a[i__ + i__ * a_dim1] = 1.;
                i__2 = *m - i__ + 1;
                i__3 = *n - i__;
                dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[i__ + (i__ + 1) * a_dim1], lda,
                       &a[i__ + i__ * a_dim1], &c__1, &c_b16, &y[i__ + 1 + i__ * y_dim1], &c__1,
                       (ftnlen)9);
                i__2 = *m - i__ + 1;
                i__3 = i__ - 1;
                dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[i__ + a_dim1], lda,
                       &a[i__ + i__ * a_dim1], &c__1, &c_b16, &y[i__ * y_dim1 + 1], &c__1,
                       (ftnlen)9);
                i__2 = *n - i__;
                i__3 = i__ - 1;
                dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &y[i__ + 1 + y_dim1], ldy,
                       &y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[i__ + 1 + i__ * y_dim1], &c__1,
                       (ftnlen)12);
                i__2 = *m - i__ + 1;
                i__3 = i__ - 1;
                dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &x[i__ + x_dim1], ldx,
                       &a[i__ + i__ * a_dim1], &c__1, &c_b16, &y[i__ * y_dim1 + 1], &c__1,
                       (ftnlen)9);
                i__2 = i__ - 1;
                i__3 = *n - i__;
                dgemv_((char *)"Transpose", &i__2, &i__3, &c_b4, &a[(i__ + 1) * a_dim1 + 1], lda,
                       &y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[i__ + 1 + i__ * y_dim1], &c__1,
                       (ftnlen)9);
                i__2 = *n - i__;
                dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
                i__2 = *n - i__;
                dgemv_((char *)"No transpose", &i__2, &i__, &c_b4, &y[i__ + 1 + y_dim1], ldy,
                       &a[i__ + a_dim1], lda, &c_b5, &a[i__ + (i__ + 1) * a_dim1], lda, (ftnlen)12);
                i__2 = i__ - 1;
                i__3 = *n - i__;
                dgemv_((char *)"Transpose", &i__2, &i__3, &c_b4, &a[(i__ + 1) * a_dim1 + 1], lda,
                       &x[i__ + x_dim1], ldx, &c_b5, &a[i__ + (i__ + 1) * a_dim1], lda, (ftnlen)9);
                i__2 = *n - i__;
                i__3 = i__ + 2;
                dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min(i__3, *n) * a_dim1], lda,
                        &taup[i__]);
                e[i__] = a[i__ + (i__ + 1) * a_dim1];
                a[i__ + (i__ + 1) * a_dim1] = 1.;
                i__2 = *m - i__;
                i__3 = *n - i__;
                dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + (i__ + 1) * a_dim1], lda,
                       &a[i__ + (i__ + 1) * a_dim1], lda, &c_b16, &x[i__ + 1 + i__ * x_dim1], &c__1,
                       (ftnlen)12);
                i__2 = *n - i__;
                dgemv_((char *)"Transpose", &i__2, &i__, &c_b5, &y[i__ + 1 + y_dim1], ldy,
                       &a[i__ + (i__ + 1) * a_dim1], lda, &c_b16, &x[i__ * x_dim1 + 1], &c__1,
                       (ftnlen)9);
                i__2 = *m - i__;
                dgemv_((char *)"No transpose", &i__2, &i__, &c_b4, &a[i__ + 1 + a_dim1], lda,
                       &x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[i__ + 1 + i__ * x_dim1], &c__1,
                       (ftnlen)12);
                i__2 = i__ - 1;
                i__3 = *n - i__;
                dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &a[(i__ + 1) * a_dim1 + 1], lda,
                       &a[i__ + (i__ + 1) * a_dim1], lda, &c_b16, &x[i__ * x_dim1 + 1], &c__1,
                       (ftnlen)12);
                i__2 = *m - i__;
                i__3 = i__ - 1;
                dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &x[i__ + 1 + x_dim1], ldx,
                       &x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[i__ + 1 + i__ * x_dim1], &c__1,
                       (ftnlen)12);
                i__2 = *m - i__;
                dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
            }
        }
    } else {
        i__1 = *nb;
        for (i__ = 1; i__ <= i__1; ++i__) {
            i__2 = *n - i__ + 1;
            i__3 = i__ - 1;
            dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &y[i__ + y_dim1], ldy, &a[i__ + a_dim1],
                   lda, &c_b5, &a[i__ + i__ * a_dim1], lda, (ftnlen)12);
            i__2 = i__ - 1;
            i__3 = *n - i__ + 1;
            dgemv_((char *)"Transpose", &i__2, &i__3, &c_b4, &a[i__ * a_dim1 + 1], lda, &x[i__ + x_dim1],
                   ldx, &c_b5, &a[i__ + i__ * a_dim1], lda, (ftnlen)9);
            i__2 = *n - i__ + 1;
            i__3 = i__ + 1;
            dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3, *n) * a_dim1], lda,
                    &taup[i__]);
            d__[i__] = a[i__ + i__ * a_dim1];
            if (i__ < *m) {
                a[i__ + i__ * a_dim1] = 1.;
                i__2 = *m - i__;
                i__3 = *n - i__ + 1;
                dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + i__ * a_dim1], lda,
                       &a[i__ + i__ * a_dim1], lda, &c_b16, &x[i__ + 1 + i__ * x_dim1], &c__1,
                       (ftnlen)12);
                i__2 = *n - i__ + 1;
                i__3 = i__ - 1;
                dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &y[i__ + y_dim1], ldy,
                       &a[i__ + i__ * a_dim1], lda, &c_b16, &x[i__ * x_dim1 + 1], &c__1, (ftnlen)9);
                i__2 = *m - i__;
                i__3 = i__ - 1;
                dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &a[i__ + 1 + a_dim1], lda,
                       &x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[i__ + 1 + i__ * x_dim1], &c__1,
                       (ftnlen)12);
                i__2 = i__ - 1;
                i__3 = *n - i__ + 1;
                dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &a[i__ * a_dim1 + 1], lda,
                       &a[i__ + i__ * a_dim1], lda, &c_b16, &x[i__ * x_dim1 + 1], &c__1,
                       (ftnlen)12);
                i__2 = *m - i__;
                i__3 = i__ - 1;
                dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &x[i__ + 1 + x_dim1], ldx,
                       &x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[i__ + 1 + i__ * x_dim1], &c__1,
                       (ftnlen)12);
                i__2 = *m - i__;
                dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
                i__2 = *m - i__;
                i__3 = i__ - 1;
                dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &a[i__ + 1 + a_dim1], lda,
                       &y[i__ + y_dim1], ldy, &c_b5, &a[i__ + 1 + i__ * a_dim1], &c__1, (ftnlen)12);
                i__2 = *m - i__;
                dgemv_((char *)"No transpose", &i__2, &i__, &c_b4, &x[i__ + 1 + x_dim1], ldx,
                       &a[i__ * a_dim1 + 1], &c__1, &c_b5, &a[i__ + 1 + i__ * a_dim1], &c__1,
                       (ftnlen)12);
                i__2 = *m - i__;
                i__3 = i__ + 2;
                dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3, *m) + i__ * a_dim1], &c__1,
                        &tauq[i__]);
                e[i__] = a[i__ + 1 + i__ * a_dim1];
                a[i__ + 1 + i__ * a_dim1] = 1.;
                i__2 = *m - i__;
                i__3 = *n - i__;
                dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + (i__ + 1) * a_dim1], lda,
                       &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &y[i__ + 1 + i__ * y_dim1], &c__1,
                       (ftnlen)9);
                i__2 = *m - i__;
                i__3 = i__ - 1;
                dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + a_dim1], lda,
                       &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &y[i__ * y_dim1 + 1], &c__1,
                       (ftnlen)9);
                i__2 = *n - i__;
                i__3 = i__ - 1;
                dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &y[i__ + 1 + y_dim1], ldy,
                       &y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[i__ + 1 + i__ * y_dim1], &c__1,
                       (ftnlen)12);
                i__2 = *m - i__;
                dgemv_((char *)"Transpose", &i__2, &i__, &c_b5, &x[i__ + 1 + x_dim1], ldx,
                       &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &y[i__ * y_dim1 + 1], &c__1,
                       (ftnlen)9);
                i__2 = *n - i__;
                dgemv_((char *)"Transpose", &i__, &i__2, &c_b4, &a[(i__ + 1) * a_dim1 + 1], lda,
                       &y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[i__ + 1 + i__ * y_dim1], &c__1,
                       (ftnlen)9);
                i__2 = *n - i__;
                dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
            }
        }
    }
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dlabrd.f
+++ b/lib/linalg/dlabrd.f
@ -1,378 +0,0 @@
 *> \brief \b DLABRD reduces the first nb rows and columns of a general matrix to a bidiagonal form.
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DLABRD + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlabrd.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlabrd.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlabrd.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y,
 *                          LDY )
 *
 *       .. Scalar Arguments ..
 *       INTEGER            LDA, LDX, LDY, M, N, NB
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAUP( * ),
 *      $                   TAUQ( * ), X( LDX, * ), Y( LDY, * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DLABRD reduces the first NB rows and columns of a real general
 *> m by n matrix A to upper or lower bidiagonal form by an orthogonal
 *> transformation Q**T * A * P, and returns the matrices X and Y which
 *> are needed to apply the transformation to the unreduced part of A.
 *>
 *> If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower
 *> bidiagonal form.
 *>
 *> This is an auxiliary routine called by DGEBRD
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] M
 *> \verbatim
 *>          M is INTEGER
 *>          The number of rows in the matrix A.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The number of columns in the matrix A.
 *> \endverbatim
 *>
 *> \param[in] NB
 *> \verbatim
 *>          NB is INTEGER
 *>          The number of leading rows and columns of A to be reduced.
 *> \endverbatim
 *>
 *> \param[in,out] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension (LDA,N)
 *>          On entry, the m by n general matrix to be reduced.
 *>          On exit, the first NB rows and columns of the matrix are
 *>          overwritten; the rest of the array is unchanged.
 *>          If m >= n, elements on and below the diagonal in the first NB
 *>            columns, with the array TAUQ, represent the orthogonal
 *>            matrix Q as a product of elementary reflectors; and
 *>            elements above the diagonal in the first NB rows, with the
 *>            array TAUP, represent the orthogonal matrix P as a product
 *>            of elementary reflectors.
 *>          If m < n, elements below the diagonal in the first NB
 *>            columns, with the array TAUQ, represent the orthogonal
 *>            matrix Q as a product of elementary reflectors, and
 *>            elements on and above the diagonal in the first NB rows,
 *>            with the array TAUP, represent the orthogonal matrix P as
 *>            a product of elementary reflectors.
 *>          See Further Details.
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>          The leading dimension of the array A.  LDA >= max(1,M).
 *> \endverbatim
 *>
 *> \param[out] D
 *> \verbatim
 *>          D is DOUBLE PRECISION array, dimension (NB)
 *>          The diagonal elements of the first NB rows and columns of
 *>          the reduced matrix.  D(i) = A(i,i).
 *> \endverbatim
 *>
 *> \param[out] E
 *> \verbatim
 *>          E is DOUBLE PRECISION array, dimension (NB)
 *>          The off-diagonal elements of the first NB rows and columns of
 *>          the reduced matrix.
 *> \endverbatim
 *>
 *> \param[out] TAUQ
 *> \verbatim
 *>          TAUQ is DOUBLE PRECISION array, dimension (NB)
 *>          The scalar factors of the elementary reflectors which
 *>          represent the orthogonal matrix Q. See Further Details.
 *> \endverbatim
 *>
 *> \param[out] TAUP
 *> \verbatim
 *>          TAUP is DOUBLE PRECISION array, dimension (NB)
 *>          The scalar factors of the elementary reflectors which
 *>          represent the orthogonal matrix P. See Further Details.
 *> \endverbatim
 *>
 *> \param[out] X
 *> \verbatim
 *>          X is DOUBLE PRECISION array, dimension (LDX,NB)
 *>          The m-by-nb matrix X required to update the unreduced part
 *>          of A.
 *> \endverbatim
 *>
 *> \param[in] LDX
 *> \verbatim
 *>          LDX is INTEGER
 *>          The leading dimension of the array X. LDX >= max(1,M).
 *> \endverbatim
 *>
 *> \param[out] Y
 *> \verbatim
 *>          Y is DOUBLE PRECISION array, dimension (LDY,NB)
 *>          The n-by-nb matrix Y required to update the unreduced part
 *>          of A.
 *> \endverbatim
 *>
 *> \param[in] LDY
 *> \verbatim
 *>          LDY is INTEGER
 *>          The leading dimension of the array Y. LDY >= max(1,N).
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup doubleOTHERauxiliary
 *
 *> \par Further Details:
 *  =====================
 *>
 *> \verbatim
 *>
 *>  The matrices Q and P are represented as products of elementary
 *>  reflectors:
 *>
 *>     Q = H(1) H(2) . . . H(nb)  and  P = G(1) G(2) . . . G(nb)
 *>
 *>  Each H(i) and G(i) has the form:
 *>
 *>     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T
 *>
 *>  where tauq and taup are real scalars, and v and u are real vectors.
 *>
 *>  If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in
 *>  A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in
 *>  A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
 *>
 *>  If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in
 *>  A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in
 *>  A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
 *>
 *>  The elements of the vectors v and u together form the m-by-nb matrix
 *>  V and the nb-by-n matrix U**T which are needed, with X and Y, to apply
 *>  the transformation to the unreduced part of the matrix, using a block
 *>  update of the form:  A := A - V*Y**T - X*U**T.
 *>
 *>  The contents of A on exit are illustrated by the following examples
 *>  with nb = 2:
 *>
 *>  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
 *>
 *>    (  1   1   u1  u1  u1 )           (  1   u1  u1  u1  u1  u1 )
 *>    (  v1  1   1   u2  u2 )           (  1   1   u2  u2  u2  u2 )
 *>    (  v1  v2  a   a   a  )           (  v1  1   a   a   a   a  )
 *>    (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
 *>    (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
 *>    (  v1  v2  a   a   a  )
 *>
 *>  where a denotes an element of the original matrix which is unchanged,
 *>  vi denotes an element of the vector defining H(i), and ui an element
 *>  of the vector defining G(i).
 *> \endverbatim
 *>
 *  =====================================================================
      SUBROUTINE DLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y,
     $                   LDY )
 *
 *  -- LAPACK auxiliary routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER            LDA, LDX, LDY, M, N, NB
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAUP( * ),
     $                   TAUQ( * ), X( LDX, * ), Y( LDY, * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ZERO, ONE
      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
 *     ..
 *     .. Local Scalars ..
      INTEGER            I
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DGEMV, DLARFG, DSCAL
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          MIN
 *     ..
 *     .. Executable Statements ..
 *
 *     Quick return if possible
 *
      IF( M.LE.0 .OR. N.LE.0 )
     $   RETURN
 *
      IF( M.GE.N ) THEN
 *
 *        Reduce to upper bidiagonal form
 *
         DO 10 I = 1, NB
 *
 *           Update A(i:m,i)
 *
            CALL DGEMV( 'No transpose', M-I+1, I-1, -ONE, A( I, 1 ),
     $                  LDA, Y( I, 1 ), LDY, ONE, A( I, I ), 1 )
            CALL DGEMV( 'No transpose', M-I+1, I-1, -ONE, X( I, 1 ),
     $                  LDX, A( 1, I ), 1, ONE, A( I, I ), 1 )
 *
 *           Generate reflection Q(i) to annihilate A(i+1:m,i)
 *
            CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1,
     $                   TAUQ( I ) )
            D( I ) = A( I, I )
            IF( I.LT.N ) THEN
               A( I, I ) = ONE
 *
 *              Compute Y(i+1:n,i)
 *
               CALL DGEMV( 'Transpose', M-I+1, N-I, ONE, A( I, I+1 ),
     $                     LDA, A( I, I ), 1, ZERO, Y( I+1, I ), 1 )
               CALL DGEMV( 'Transpose', M-I+1, I-1, ONE, A( I, 1 ), LDA,
     $                     A( I, I ), 1, ZERO, Y( 1, I ), 1 )
               CALL DGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ),
     $                     LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
               CALL DGEMV( 'Transpose', M-I+1, I-1, ONE, X( I, 1 ), LDX,
     $                     A( I, I ), 1, ZERO, Y( 1, I ), 1 )
               CALL DGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ),
     $                     LDA, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
               CALL DSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 )
 *
 *              Update A(i,i+1:n)
 *
               CALL DGEMV( 'No transpose', N-I, I, -ONE, Y( I+1, 1 ),
     $                     LDY, A( I, 1 ), LDA, ONE, A( I, I+1 ), LDA )
               CALL DGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ),
     $                     LDA, X( I, 1 ), LDX, ONE, A( I, I+1 ), LDA )
 *
 *              Generate reflection P(i) to annihilate A(i,i+2:n)
 *
               CALL DLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ),
     $                      LDA, TAUP( I ) )
               E( I ) = A( I, I+1 )
               A( I, I+1 ) = ONE
 *
 *              Compute X(i+1:m,i)
 *
               CALL DGEMV( 'No transpose', M-I, N-I, ONE, A( I+1, I+1 ),
     $                     LDA, A( I, I+1 ), LDA, ZERO, X( I+1, I ), 1 )
               CALL DGEMV( 'Transpose', N-I, I, ONE, Y( I+1, 1 ), LDY,
     $                     A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 )
               CALL DGEMV( 'No transpose', M-I, I, -ONE, A( I+1, 1 ),
     $                     LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
               CALL DGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ),
     $                     LDA, A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 )
               CALL DGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ),
     $                     LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
               CALL DSCAL( M-I, TAUP( I ), X( I+1, I ), 1 )
            END IF
   10    CONTINUE
      ELSE
 *
 *        Reduce to lower bidiagonal form
 *
         DO 20 I = 1, NB
 *
 *           Update A(i,i:n)
 *
            CALL DGEMV( 'No transpose', N-I+1, I-1, -ONE, Y( I, 1 ),
     $                  LDY, A( I, 1 ), LDA, ONE, A( I, I ), LDA )
            CALL DGEMV( 'Transpose', I-1, N-I+1, -ONE, A( 1, I ), LDA,
     $                  X( I, 1 ), LDX, ONE, A( I, I ), LDA )
 *
 *           Generate reflection P(i) to annihilate A(i,i+1:n)
 *
            CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA,
     $                   TAUP( I ) )
            D( I ) = A( I, I )
            IF( I.LT.M ) THEN
               A( I, I ) = ONE
 *
 *              Compute X(i+1:m,i)
 *
               CALL DGEMV( 'No transpose', M-I, N-I+1, ONE, A( I+1, I ),
     $                     LDA, A( I, I ), LDA, ZERO, X( I+1, I ), 1 )
               CALL DGEMV( 'Transpose', N-I+1, I-1, ONE, Y( I, 1 ), LDY,
     $                     A( I, I ), LDA, ZERO, X( 1, I ), 1 )
               CALL DGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ),
     $                     LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
               CALL DGEMV( 'No transpose', I-1, N-I+1, ONE, A( 1, I ),
     $                     LDA, A( I, I ), LDA, ZERO, X( 1, I ), 1 )
               CALL DGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ),
     $                     LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
               CALL DSCAL( M-I, TAUP( I ), X( I+1, I ), 1 )
 *
 *              Update A(i+1:m,i)
 *
               CALL DGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ),
     $                     LDA, Y( I, 1 ), LDY, ONE, A( I+1, I ), 1 )
               CALL DGEMV( 'No transpose', M-I, I, -ONE, X( I+1, 1 ),
     $                     LDX, A( 1, I ), 1, ONE, A( I+1, I ), 1 )
 *
 *              Generate reflection Q(i) to annihilate A(i+2:m,i)
 *
               CALL DLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1,
     $                      TAUQ( I ) )
               E( I ) = A( I+1, I )
               A( I+1, I ) = ONE
 *
 *              Compute Y(i+1:n,i)
 *
               CALL DGEMV( 'Transpose', M-I, N-I, ONE, A( I+1, I+1 ),
     $                     LDA, A( I+1, I ), 1, ZERO, Y( I+1, I ), 1 )
               CALL DGEMV( 'Transpose', M-I, I-1, ONE, A( I+1, 1 ), LDA,
     $                     A( I+1, I ), 1, ZERO, Y( 1, I ), 1 )
               CALL DGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ),
     $                     LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
               CALL DGEMV( 'Transpose', M-I, I, ONE, X( I+1, 1 ), LDX,
     $                     A( I+1, I ), 1, ZERO, Y( 1, I ), 1 )
               CALL DGEMV( 'Transpose', I, N-I, -ONE, A( 1, I+1 ), LDA,
     $                     Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
               CALL DSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 )
            END IF
   20    CONTINUE
      END IF
      RETURN
 *
 *     End of DLABRD
 *
      END
--- a/lib/linalg/dlacn2.cpp
+++ b/lib/linalg/dlacn2.cpp
@ -0,0 +1,136 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 static integer c__1 = 1;
 int dlacn2_(integer *n, doublereal *v, doublereal *x, integer *isgn, doublereal *est, integer *kase,
            integer *isave)
 {
    integer i__1;
    doublereal d__1;
    integer i_lmp_dnnt(doublereal *);
    integer i__;
    doublereal xs, temp;
    extern doublereal dasum_(integer *, doublereal *, integer *);
    integer jlast;
    extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *);
    extern integer idamax_(integer *, doublereal *, integer *);
    doublereal altsgn, estold;
    --isave;
    --isgn;
    --x;
    --v;
    if (*kase == 0) {
        i__1 = *n;
        for (i__ = 1; i__ <= i__1; ++i__) {
            x[i__] = 1. / (doublereal)(*n);
        }
        *kase = 1;
        isave[1] = 1;
        return 0;
    }
    switch (isave[1]) {
        case 1:
            goto L20;
        case 2:
            goto L40;
        case 3:
            goto L70;
        case 4:
            goto L110;
        case 5:
            goto L140;
    }
 L20:
    if (*n == 1) {
        v[1] = x[1];
        *est = abs(v[1]);
        goto L150;
    }
    *est = dasum_(n, &x[1], &c__1);
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
        if (x[i__] >= 0.) {
            x[i__] = 1.;
        } else {
            x[i__] = -1.;
        }
        isgn[i__] = i_lmp_dnnt(&x[i__]);
    }
    *kase = 2;
    isave[1] = 2;
    return 0;
 L40:
    isave[2] = idamax_(n, &x[1], &c__1);
    isave[3] = 2;
 L50:
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
        x[i__] = 0.;
    }
    x[isave[2]] = 1.;
    *kase = 1;
    isave[1] = 3;
    return 0;
 L70:
    dcopy_(n, &x[1], &c__1, &v[1], &c__1);
    estold = *est;
    *est = dasum_(n, &v[1], &c__1);
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
        if (x[i__] >= 0.) {
            xs = 1.;
        } else {
            xs = -1.;
        }
        if (i_lmp_dnnt(&xs) != isgn[i__]) {
            goto L90;
        }
    }
    goto L120;
 L90:
    if (*est <= estold) {
        goto L120;
    }
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
        if (x[i__] >= 0.) {
            x[i__] = 1.;
        } else {
            x[i__] = -1.;
        }
        isgn[i__] = i_lmp_dnnt(&x[i__]);
    }
    *kase = 2;
    isave[1] = 4;
    return 0;
 L110:
    jlast = isave[2];
    isave[2] = idamax_(n, &x[1], &c__1);
    if (x[jlast] != (d__1 = x[isave[2]], abs(d__1)) && isave[3] < 5) {
        ++isave[3];
        goto L50;
    }
 L120:
    altsgn = 1.;
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
        x[i__] = altsgn * ((doublereal)(i__ - 1) / (doublereal)(*n - 1) + 1.);
        altsgn = -altsgn;
    }
    *kase = 1;
    isave[1] = 5;
    return 0;
 L140:
    temp = dasum_(n, &x[1], &c__1) / (doublereal)(*n * 3) * 2.;
    if (temp > *est) {
        dcopy_(n, &x[1], &c__1, &v[1], &c__1);
        *est = temp;
    }
 L150:
    *kase = 0;
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dlacn2.f
+++ b/lib/linalg/dlacn2.f
@ -1,304 +0,0 @@
 *> \brief \b DLACN2 estimates the 1-norm of a square matrix, using reverse communication for evaluating matrix-vector products.
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DLACN2 + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlacn2.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlacn2.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlacn2.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLACN2( N, V, X, ISGN, EST, KASE, ISAVE )
 *
 *       .. Scalar Arguments ..
 *       INTEGER            KASE, N
 *       DOUBLE PRECISION   EST
 *       ..
 *       .. Array Arguments ..
 *       INTEGER            ISGN( * ), ISAVE( 3 )
 *       DOUBLE PRECISION   V( * ), X( * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DLACN2 estimates the 1-norm of a square, real matrix A.
 *> Reverse communication is used for evaluating matrix-vector products.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>         The order of the matrix.  N >= 1.
 *> \endverbatim
 *>
 *> \param[out] V
 *> \verbatim
 *>          V is DOUBLE PRECISION array, dimension (N)
 *>         On the final return, V = A*W,  where  EST = norm(V)/norm(W)
 *>         (W is not returned).
 *> \endverbatim
 *>
 *> \param[in,out] X
 *> \verbatim
 *>          X is DOUBLE PRECISION array, dimension (N)
 *>         On an intermediate return, X should be overwritten by
 *>               A * X,   if KASE=1,
 *>               A**T * X,  if KASE=2,
 *>         and DLACN2 must be re-called with all the other parameters
 *>         unchanged.
 *> \endverbatim
 *>
 *> \param[out] ISGN
 *> \verbatim
 *>          ISGN is INTEGER array, dimension (N)
 *> \endverbatim
 *>
 *> \param[in,out] EST
 *> \verbatim
 *>          EST is DOUBLE PRECISION
 *>         On entry with KASE = 1 or 2 and ISAVE(1) = 3, EST should be
 *>         unchanged from the previous call to DLACN2.
 *>         On exit, EST is an estimate (a lower bound) for norm(A).
 *> \endverbatim
 *>
 *> \param[in,out] KASE
 *> \verbatim
 *>          KASE is INTEGER
 *>         On the initial call to DLACN2, KASE should be 0.
 *>         On an intermediate return, KASE will be 1 or 2, indicating
 *>         whether X should be overwritten by A * X  or A**T * X.
 *>         On the final return from DLACN2, KASE will again be 0.
 *> \endverbatim
 *>
 *> \param[in,out] ISAVE
 *> \verbatim
 *>          ISAVE is INTEGER array, dimension (3)
 *>         ISAVE is used to save variables between calls to DLACN2
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup doubleOTHERauxiliary
 *
 *> \par Further Details:
 *  =====================
 *>
 *> \verbatim
 *>
 *>  Originally named SONEST, dated March 16, 1988.
 *>
 *>  This is a thread safe version of DLACON, which uses the array ISAVE
 *>  in place of a SAVE statement, as follows:
 *>
 *>     DLACON     DLACN2
 *>      JUMP     ISAVE(1)
 *>      J        ISAVE(2)
 *>      ITER     ISAVE(3)
 *> \endverbatim
 *
 *> \par Contributors:
 *  ==================
 *>
 *>     Nick Higham, University of Manchester
 *
 *> \par References:
 *  ================
 *>
 *>  N.J. Higham, "FORTRAN codes for estimating the one-norm of
 *>  a real or complex matrix, with applications to condition estimation",
 *>  ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.
 *>
 *  =====================================================================
      SUBROUTINE DLACN2( N, V, X, ISGN, EST, KASE, ISAVE )
 *
 *  -- LAPACK auxiliary routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER            KASE, N
      DOUBLE PRECISION   EST
 *     ..
 *     .. Array Arguments ..
      INTEGER            ISGN( * ), ISAVE( 3 )
      DOUBLE PRECISION   V( * ), X( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      INTEGER            ITMAX
      PARAMETER          ( ITMAX = 5 )
      DOUBLE PRECISION   ZERO, ONE, TWO
      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 )
 *     ..
 *     .. Local Scalars ..
      INTEGER            I, JLAST
      DOUBLE PRECISION   ALTSGN, ESTOLD, TEMP, XS
 *     ..
 *     .. External Functions ..
      INTEGER            IDAMAX
      DOUBLE PRECISION   DASUM
      EXTERNAL           IDAMAX, DASUM
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DCOPY
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, DBLE, NINT
 *     ..
 *     .. Executable Statements ..
 *
      IF( KASE.EQ.0 ) THEN
         DO 10 I = 1, N
            X( I ) = ONE / DBLE( N )
   10    CONTINUE
         KASE = 1
         ISAVE( 1 ) = 1
         RETURN
      END IF
 *
      GO TO ( 20, 40, 70, 110, 140 )ISAVE( 1 )
 *
 *     ................ ENTRY   (ISAVE( 1 ) = 1)
 *     FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY A*X.
 *
   20 CONTINUE
      IF( N.EQ.1 ) THEN
         V( 1 ) = X( 1 )
         EST = ABS( V( 1 ) )
 *        ... QUIT
         GO TO 150
      END IF
      EST = DASUM( N, X, 1 )
 *
      DO 30 I = 1, N
         IF( X(I).GE.ZERO ) THEN
            X(I) = ONE
         ELSE
            X(I) = -ONE
         END IF
         ISGN( I ) = NINT( X( I ) )
   30 CONTINUE
      KASE = 2
      ISAVE( 1 ) = 2
      RETURN
 *
 *     ................ ENTRY   (ISAVE( 1 ) = 2)
 *     FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X.
 *
   40 CONTINUE
      ISAVE( 2 ) = IDAMAX( N, X, 1 )
      ISAVE( 3 ) = 2
 *
 *     MAIN LOOP - ITERATIONS 2,3,...,ITMAX.
 *
   50 CONTINUE
      DO 60 I = 1, N
         X( I ) = ZERO
   60 CONTINUE
      X( ISAVE( 2 ) ) = ONE
      KASE = 1
      ISAVE( 1 ) = 3
      RETURN
 *
 *     ................ ENTRY   (ISAVE( 1 ) = 3)
 *     X HAS BEEN OVERWRITTEN BY A*X.
 *
   70 CONTINUE
      CALL DCOPY( N, X, 1, V, 1 )
      ESTOLD = EST
      EST = DASUM( N, V, 1 )
      DO 80 I = 1, N
         IF( X(I).GE.ZERO ) THEN
            XS = ONE
         ELSE
            XS = -ONE
         END IF
         IF( NINT( XS ).NE.ISGN( I ) )
     $      GO TO 90
   80 CONTINUE
 *     REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED.
      GO TO 120
 *
   90 CONTINUE
 *     TEST FOR CYCLING.
      IF( EST.LE.ESTOLD )
     $   GO TO 120
 *
      DO 100 I = 1, N
         IF( X(I).GE.ZERO ) THEN
            X(I) = ONE
         ELSE
            X(I) = -ONE
         END IF
         ISGN( I ) = NINT( X( I ) )
  100 CONTINUE
      KASE = 2
      ISAVE( 1 ) = 4
      RETURN
 *
 *     ................ ENTRY   (ISAVE( 1 ) = 4)
 *     X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X.
 *
  110 CONTINUE
      JLAST = ISAVE( 2 )
      ISAVE( 2 ) = IDAMAX( N, X, 1 )
      IF( ( X( JLAST ).NE.ABS( X( ISAVE( 2 ) ) ) ) .AND.
     $    ( ISAVE( 3 ).LT.ITMAX ) ) THEN
         ISAVE( 3 ) = ISAVE( 3 ) + 1
         GO TO 50
      END IF
 *
 *     ITERATION COMPLETE.  FINAL STAGE.
 *
  120 CONTINUE
      ALTSGN = ONE
      DO 130 I = 1, N
         X( I ) = ALTSGN*( ONE+DBLE( I-1 ) / DBLE( N-1 ) )
         ALTSGN = -ALTSGN
  130 CONTINUE
      KASE = 1
      ISAVE( 1 ) = 5
      RETURN
 *
 *     ................ ENTRY   (ISAVE( 1 ) = 5)
 *     X HAS BEEN OVERWRITTEN BY A*X.
 *
  140 CONTINUE
      TEMP = TWO*( DASUM( N, X, 1 ) / DBLE( 3*N ) )
      IF( TEMP.GT.EST ) THEN
         CALL DCOPY( N, X, 1, V, 1 )
         EST = TEMP
      END IF
 *
  150 CONTINUE
      KASE = 0
      RETURN
 *
 *     End of DLACN2
 *
      END
--- a/lib/linalg/dlacpy.cpp
+++ b/lib/linalg/dlacpy.cpp
@ -0,0 +1,46 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 int dlacpy_(char *uplo, integer *m, integer *n, doublereal *a, integer *lda, doublereal *b,
            integer *ldb, ftnlen uplo_len)
 {
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
    integer i__, j;
    extern logical lsame_(char *, char *, ftnlen, ftnlen);
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) {
        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            i__2 = min(j, *m);
            for (i__ = 1; i__ <= i__2; ++i__) {
                b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
            }
        }
    } else if (lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) {
        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            i__2 = *m;
            for (i__ = j; i__ <= i__2; ++i__) {
                b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
            }
        }
    } else {
        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            i__2 = *m;
            for (i__ = 1; i__ <= i__2; ++i__) {
                b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
            }
        }
    }
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dlacpy.f
+++ b/lib/linalg/dlacpy.f
@ -1,153 +0,0 @@
 *> \brief \b DLACPY copies all or part of one two-dimensional array to another.
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DLACPY + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlacpy.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlacpy.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlacpy.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLACPY( UPLO, M, N, A, LDA, B, LDB )
 *
 *       .. Scalar Arguments ..
 *       CHARACTER          UPLO
 *       INTEGER            LDA, LDB, M, N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DLACPY copies all or part of a two-dimensional matrix A to another
 *> matrix B.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] UPLO
 *> \verbatim
 *>          UPLO is CHARACTER*1
 *>          Specifies the part of the matrix A to be copied to B.
 *>          = 'U':      Upper triangular part
 *>          = 'L':      Lower triangular part
 *>          Otherwise:  All of the matrix A
 *> \endverbatim
 *>
 *> \param[in] M
 *> \verbatim
 *>          M is INTEGER
 *>          The number of rows of the matrix A.  M >= 0.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The number of columns of the matrix A.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension (LDA,N)
 *>          The m by n matrix A.  If UPLO = 'U', only the upper triangle
 *>          or trapezoid is accessed; if UPLO = 'L', only the lower
 *>          triangle or trapezoid is accessed.
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>          The leading dimension of the array A.  LDA >= max(1,M).
 *> \endverbatim
 *>
 *> \param[out] B
 *> \verbatim
 *>          B is DOUBLE PRECISION array, dimension (LDB,N)
 *>          On exit, B = A in the locations specified by UPLO.
 *> \endverbatim
 *>
 *> \param[in] LDB
 *> \verbatim
 *>          LDB is INTEGER
 *>          The leading dimension of the array B.  LDB >= max(1,M).
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup OTHERauxiliary
 *
 *  =====================================================================
      SUBROUTINE DLACPY( UPLO, M, N, A, LDA, B, LDB )
 *
 *  -- LAPACK auxiliary routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      CHARACTER          UPLO
      INTEGER            LDA, LDB, M, N
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Local Scalars ..
      INTEGER            I, J
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME
      EXTERNAL           LSAME
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          MIN
 *     ..
 *     .. Executable Statements ..
 *
      IF( LSAME( UPLO, 'U' ) ) THEN
         DO 20 J = 1, N
            DO 10 I = 1, MIN( J, M )
               B( I, J ) = A( I, J )
   10       CONTINUE
   20    CONTINUE
      ELSE IF( LSAME( UPLO, 'L' ) ) THEN
         DO 40 J = 1, N
            DO 30 I = J, M
               B( I, J ) = A( I, J )
   30       CONTINUE
   40    CONTINUE
      ELSE
         DO 60 J = 1, N
            DO 50 I = 1, M
               B( I, J ) = A( I, J )
   50       CONTINUE
   60    CONTINUE
      END IF
      RETURN
 *
 *     End of DLACPY
 *
      END
--- a/lib/linalg/dladiv.cpp
+++ b/lib/linalg/dladiv.cpp
@ -0,0 +1,88 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 int dladiv_(doublereal *a, doublereal *b, doublereal *c__, doublereal *d__, doublereal *p,
            doublereal *q)
 {
    doublereal d__1, d__2;
    doublereal s, aa, ab, bb, cc, cd, dd, be, un, ov, eps;
    extern doublereal dlamch_(char *, ftnlen);
    extern int dladiv1_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *,
                        doublereal *);
    aa = *a;
    bb = *b;
    cc = *c__;
    dd = *d__;
    d__1 = abs(*a), d__2 = abs(*b);
    ab = max(d__1, d__2);
    d__1 = abs(*c__), d__2 = abs(*d__);
    cd = max(d__1, d__2);
    s = 1.;
    ov = dlamch_((char *)"Overflow threshold", (ftnlen)18);
    un = dlamch_((char *)"Safe minimum", (ftnlen)12);
    eps = dlamch_((char *)"Epsilon", (ftnlen)7);
    be = 2. / (eps * eps);
    if (ab >= ov * .5) {
        aa *= .5;
        bb *= .5;
        s *= 2.;
    }
    if (cd >= ov * .5) {
        cc *= .5;
        dd *= .5;
        s *= .5;
    }
    if (ab <= un * 2. / eps) {
        aa *= be;
        bb *= be;
        s /= be;
    }
    if (cd <= un * 2. / eps) {
        cc *= be;
        dd *= be;
        s *= be;
    }
    if (abs(*d__) <= abs(*c__)) {
        dladiv1_(&aa, &bb, &cc, &dd, p, q);
    } else {
        dladiv1_(&bb, &aa, &dd, &cc, p, q);
        *q = -(*q);
    }
    *p *= s;
    *q *= s;
    return 0;
 }
 int dladiv1_(doublereal *a, doublereal *b, doublereal *c__, doublereal *d__, doublereal *p,
             doublereal *q)
 {
    doublereal r__, t;
    extern doublereal dladiv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *,
                               doublereal *);
    r__ = *d__ / *c__;
    t = 1. / (*c__ + *d__ * r__);
    *p = dladiv2_(a, b, c__, d__, &r__, &t);
    *a = -(*a);
    *q = dladiv2_(b, a, c__, d__, &r__, &t);
    return 0;
 }
 doublereal dladiv2_(doublereal *a, doublereal *b, doublereal *c__, doublereal *d__, doublereal *r__,
                    doublereal *t)
 {
    doublereal ret_val;
    doublereal br;
    if (*r__ != 0.) {
        br = *b * *r__;
        if (br != 0.) {
            ret_val = (*a + br) * *t;
        } else {
            ret_val = *a * *t + *b * *t * *r__;
        }
    } else {
        ret_val = (*a + *d__ * (*b / *c__)) * *t;
    }
    return ret_val;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dladiv.f
+++ b/lib/linalg/dladiv.f
@ -1,251 +0,0 @@
 *> \brief \b DLADIV performs complex division in real arithmetic, avoiding unnecessary overflow.
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DLADIV + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dladiv.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dladiv.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dladiv.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLADIV( A, B, C, D, P, Q )
 *
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION   A, B, C, D, P, Q
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DLADIV performs complex division in  real arithmetic
 *>
 *>                       a + i*b
 *>            p + i*q = ---------
 *>                       c + i*d
 *>
 *> The algorithm is due to Michael Baudin and Robert L. Smith
 *> and can be found in the paper
 *> "A Robust Complex Division in Scilab"
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] A
 *> \verbatim
 *>          A is DOUBLE PRECISION
 *> \endverbatim
 *>
 *> \param[in] B
 *> \verbatim
 *>          B is DOUBLE PRECISION
 *> \endverbatim
 *>
 *> \param[in] C
 *> \verbatim
 *>          C is DOUBLE PRECISION
 *> \endverbatim
 *>
 *> \param[in] D
 *> \verbatim
 *>          D is DOUBLE PRECISION
 *>          The scalars a, b, c, and d in the above expression.
 *> \endverbatim
 *>
 *> \param[out] P
 *> \verbatim
 *>          P is DOUBLE PRECISION
 *> \endverbatim
 *>
 *> \param[out] Q
 *> \verbatim
 *>          Q is DOUBLE PRECISION
 *>          The scalars p and q in the above expression.
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup doubleOTHERauxiliary
 *
 *  =====================================================================
      SUBROUTINE DLADIV( A, B, C, D, P, Q )
 *
 *  -- LAPACK auxiliary routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      DOUBLE PRECISION   A, B, C, D, P, Q
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   BS
      PARAMETER          ( BS = 2.0D0 )
      DOUBLE PRECISION   HALF
      PARAMETER          ( HALF = 0.5D0 )
      DOUBLE PRECISION   TWO
      PARAMETER          ( TWO = 2.0D0 )
 *
 *     .. Local Scalars ..
      DOUBLE PRECISION   AA, BB, CC, DD, AB, CD, S, OV, UN, BE, EPS
 *     ..
 *     .. External Functions ..
      DOUBLE PRECISION   DLAMCH
      EXTERNAL           DLAMCH
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLADIV1
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, MAX
 *     ..
 *     .. Executable Statements ..
 *
      AA = A
      BB = B
      CC = C
      DD = D
      AB = MAX( ABS(A), ABS(B) )
      CD = MAX( ABS(C), ABS(D) )
      S = 1.0D0
      OV = DLAMCH( 'Overflow threshold' )
      UN = DLAMCH( 'Safe minimum' )
      EPS = DLAMCH( 'Epsilon' )
      BE = BS / (EPS*EPS)
      IF( AB >= HALF*OV ) THEN
         AA = HALF * AA
         BB = HALF * BB
         S  = TWO * S
      END IF
      IF( CD >= HALF*OV ) THEN
         CC = HALF * CC
         DD = HALF * DD
         S  = HALF * S
      END IF
      IF( AB <= UN*BS/EPS ) THEN
         AA = AA * BE
         BB = BB * BE
         S  = S / BE
      END IF
      IF( CD <= UN*BS/EPS ) THEN
         CC = CC * BE
         DD = DD * BE
         S  = S * BE
      END IF
      IF( ABS( D ).LE.ABS( C ) ) THEN
         CALL DLADIV1(AA, BB, CC, DD, P, Q)
      ELSE
         CALL DLADIV1(BB, AA, DD, CC, P, Q)
         Q = -Q
      END IF
      P = P * S
      Q = Q * S
 *
      RETURN
 *
 *     End of DLADIV
 *
      END
 *> \ingroup doubleOTHERauxiliary
      SUBROUTINE DLADIV1( A, B, C, D, P, Q )
 *
 *  -- LAPACK auxiliary routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      DOUBLE PRECISION   A, B, C, D, P, Q
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ONE
      PARAMETER          ( ONE = 1.0D0 )
 *
 *     .. Local Scalars ..
      DOUBLE PRECISION   R, T
 *     ..
 *     .. External Functions ..
      DOUBLE PRECISION   DLADIV2
      EXTERNAL           DLADIV2
 *     ..
 *     .. Executable Statements ..
 *
      R = D / C
      T = ONE / (C + D * R)
      P = DLADIV2(A, B, C, D, R, T)
      A = -A
      Q = DLADIV2(B, A, C, D, R, T)
 *
      RETURN
 *
 *     End of DLADIV1
 *
      END
 *> \ingroup doubleOTHERauxiliary
      DOUBLE PRECISION FUNCTION DLADIV2( A, B, C, D, R, T )
 *
 *  -- LAPACK auxiliary routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      DOUBLE PRECISION   A, B, C, D, R, T
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ZERO
      PARAMETER          ( ZERO = 0.0D0 )
 *
 *     .. Local Scalars ..
      DOUBLE PRECISION   BR
 *     ..
 *     .. Executable Statements ..
 *
      IF( R.NE.ZERO ) THEN
         BR = B * R
         IF( BR.NE.ZERO ) THEN
            DLADIV2 = (A + BR) * T
         ELSE
            DLADIV2 = A * T + (B * T) * R
         END IF
      ELSE
         DLADIV2 = (A + D * (B / C)) * T
      END IF
 *
      RETURN
 *
 *     End of DLADIV2
 *
      END
--- a/lib/linalg/dlae2.cpp
+++ b/lib/linalg/dlae2.cpp
@ -0,0 +1,45 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 int dlae2_(doublereal *a, doublereal *b, doublereal *c__, doublereal *rt1, doublereal *rt2)
 {
    doublereal d__1;
    double sqrt(doublereal);
    doublereal ab, df, tb, sm, rt, adf, acmn, acmx;
    sm = *a + *c__;
    df = *a - *c__;
    adf = abs(df);
    tb = *b + *b;
    ab = abs(tb);
    if (abs(*a) > abs(*c__)) {
        acmx = *a;
        acmn = *c__;
    } else {
        acmx = *c__;
        acmn = *a;
    }
    if (adf > ab) {
        d__1 = ab / adf;
        rt = adf * sqrt(d__1 * d__1 + 1.);
    } else if (adf < ab) {
        d__1 = adf / ab;
        rt = ab * sqrt(d__1 * d__1 + 1.);
    } else {
        rt = ab * sqrt(2.);
    }
    if (sm < 0.) {
        *rt1 = (sm - rt) * .5;
        *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
    } else if (sm > 0.) {
        *rt1 = (sm + rt) * .5;
        *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
    } else {
        *rt1 = rt * .5;
        *rt2 = rt * -.5;
    }
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dlae2.f
+++ b/lib/linalg/dlae2.f
@ -1,182 +0,0 @@
 *> \brief \b DLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix.
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DLAE2 + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlae2.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlae2.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlae2.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLAE2( A, B, C, RT1, RT2 )
 *
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION   A, B, C, RT1, RT2
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DLAE2  computes the eigenvalues of a 2-by-2 symmetric matrix
 *>    [  A   B  ]
 *>    [  B   C  ].
 *> On return, RT1 is the eigenvalue of larger absolute value, and RT2
 *> is the eigenvalue of smaller absolute value.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] A
 *> \verbatim
 *>          A is DOUBLE PRECISION
 *>          The (1,1) element of the 2-by-2 matrix.
 *> \endverbatim
 *>
 *> \param[in] B
 *> \verbatim
 *>          B is DOUBLE PRECISION
 *>          The (1,2) and (2,1) elements of the 2-by-2 matrix.
 *> \endverbatim
 *>
 *> \param[in] C
 *> \verbatim
 *>          C is DOUBLE PRECISION
 *>          The (2,2) element of the 2-by-2 matrix.
 *> \endverbatim
 *>
 *> \param[out] RT1
 *> \verbatim
 *>          RT1 is DOUBLE PRECISION
 *>          The eigenvalue of larger absolute value.
 *> \endverbatim
 *>
 *> \param[out] RT2
 *> \verbatim
 *>          RT2 is DOUBLE PRECISION
 *>          The eigenvalue of smaller absolute value.
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup OTHERauxiliary
 *
 *> \par Further Details:
 *  =====================
 *>
 *> \verbatim
 *>
 *>  RT1 is accurate to a few ulps barring over/underflow.
 *>
 *>  RT2 may be inaccurate if there is massive cancellation in the
 *>  determinant A*C-B*B; higher precision or correctly rounded or
 *>  correctly truncated arithmetic would be needed to compute RT2
 *>  accurately in all cases.
 *>
 *>  Overflow is possible only if RT1 is within a factor of 5 of overflow.
 *>  Underflow is harmless if the input data is 0 or exceeds
 *>     underflow_threshold / macheps.
 *> \endverbatim
 *>
 *  =====================================================================
      SUBROUTINE DLAE2( A, B, C, RT1, RT2 )
 *
 *  -- LAPACK auxiliary routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      DOUBLE PRECISION   A, B, C, RT1, RT2
 *     ..
 *
 * =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ONE
      PARAMETER          ( ONE = 1.0D0 )
      DOUBLE PRECISION   TWO
      PARAMETER          ( TWO = 2.0D0 )
      DOUBLE PRECISION   ZERO
      PARAMETER          ( ZERO = 0.0D0 )
      DOUBLE PRECISION   HALF
      PARAMETER          ( HALF = 0.5D0 )
 *     ..
 *     .. Local Scalars ..
      DOUBLE PRECISION   AB, ACMN, ACMX, ADF, DF, RT, SM, TB
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, SQRT
 *     ..
 *     .. Executable Statements ..
 *
 *     Compute the eigenvalues
 *
      SM = A + C
      DF = A - C
      ADF = ABS( DF )
      TB = B + B
      AB = ABS( TB )
      IF( ABS( A ).GT.ABS( C ) ) THEN
         ACMX = A
         ACMN = C
      ELSE
         ACMX = C
         ACMN = A
      END IF
      IF( ADF.GT.AB ) THEN
         RT = ADF*SQRT( ONE+( AB / ADF )**2 )
      ELSE IF( ADF.LT.AB ) THEN
         RT = AB*SQRT( ONE+( ADF / AB )**2 )
      ELSE
 *
 *        Includes case AB=ADF=0
 *
         RT = AB*SQRT( TWO )
      END IF
      IF( SM.LT.ZERO ) THEN
         RT1 = HALF*( SM-RT )
 *
 *        Order of execution important.
 *        To get fully accurate smaller eigenvalue,
 *        next line needs to be executed in higher precision.
 *
         RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B
      ELSE IF( SM.GT.ZERO ) THEN
         RT1 = HALF*( SM+RT )
 *
 *        Order of execution important.
 *        To get fully accurate smaller eigenvalue,
 *        next line needs to be executed in higher precision.
 *
         RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B
      ELSE
 *
 *        Includes case RT1 = RT2 = 0
 *
         RT1 = HALF*RT
         RT2 = -HALF*RT
      END IF
      RETURN
 *
 *     End of DLAE2
 *
      END
--- a/lib/linalg/dlaed0.cpp
+++ b/lib/linalg/dlaed0.cpp
@ -0,0 +1,236 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 static integer c__9 = 9;
 static integer c__0 = 0;
 static integer c__2 = 2;
 static doublereal c_b23 = 1.;
 static doublereal c_b24 = 0.;
 static integer c__1 = 1;
 int dlaed0_(integer *icompq, integer *qsiz, integer *n, doublereal *d__, doublereal *e,
            doublereal *q, integer *ldq, doublereal *qstore, integer *ldqs, doublereal *work,
            integer *iwork, integer *info)
 {
    integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2;
    doublereal d__1;
    double log(doublereal);
    integer pow_lmp_ii(integer *, integer *);
    integer i__, j, k, iq, lgn, msd2, smm1, spm1, spm2;
    doublereal temp;
    integer curr;
    extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
                      integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
                      ftnlen, ftnlen);
    integer iperm;
    extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *);
    integer indxq, iwrem;
    extern int dlaed1_(integer *, doublereal *, doublereal *, integer *, integer *, doublereal *,
                       integer *, doublereal *, integer *, integer *);
    integer iqptr;
    extern int dlaed7_(integer *, integer *, integer *, integer *, integer *, integer *,
                       doublereal *, doublereal *, integer *, integer *, doublereal *, integer *,
                       doublereal *, integer *, integer *, integer *, integer *, integer *,
                       doublereal *, doublereal *, integer *, integer *);
    integer tlvls;
    extern int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *,
                       integer *, ftnlen);
    integer igivcl;
    extern int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
                           ftnlen, ftnlen);
    integer igivnm, submat, curprb, subpbs, igivpt;
    extern int dsteqr_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *,
                       doublereal *, integer *, ftnlen);
    integer curlvl, matsiz, iprmpt, smlsiz;
    --d__;
    --e;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1;
    q -= q_offset;
    qstore_dim1 = *ldqs;
    qstore_offset = 1 + qstore_dim1;
    qstore -= qstore_offset;
    --work;
    --iwork;
    *info = 0;
    if (*icompq < 0 || *icompq > 2) {
        *info = -1;
    } else if (*icompq == 1 && *qsiz < max(0, *n)) {
        *info = -2;
    } else if (*n < 0) {
        *info = -3;
    } else if (*ldq < max(1, *n)) {
        *info = -7;
    } else if (*ldqs < max(1, *n)) {
        *info = -9;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_((char *)"DLAED0", &i__1, (ftnlen)6);
        return 0;
    }
    if (*n == 0) {
        return 0;
    }
    smlsiz = ilaenv_(&c__9, (char *)"DLAED0", (char *)" ", &c__0, &c__0, &c__0, &c__0, (ftnlen)6, (ftnlen)1);
    iwork[1] = *n;
    subpbs = 1;
    tlvls = 0;
 L10:
    if (iwork[subpbs] > smlsiz) {
        for (j = subpbs; j >= 1; --j) {
            iwork[j * 2] = (iwork[j] + 1) / 2;
            iwork[(j << 1) - 1] = iwork[j] / 2;
        }
        ++tlvls;
        subpbs <<= 1;
        goto L10;
    }
    i__1 = subpbs;
    for (j = 2; j <= i__1; ++j) {
        iwork[j] += iwork[j - 1];
    }
    spm1 = subpbs - 1;
    i__1 = spm1;
    for (i__ = 1; i__ <= i__1; ++i__) {
        submat = iwork[i__] + 1;
        smm1 = submat - 1;
        d__[smm1] -= (d__1 = e[smm1], abs(d__1));
        d__[submat] -= (d__1 = e[smm1], abs(d__1));
    }
    indxq = (*n << 2) + 3;
    if (*icompq != 2) {
        temp = log((doublereal)(*n)) / log(2.);
        lgn = (integer)temp;
        if (pow_lmp_ii(&c__2, &lgn) < *n) {
            ++lgn;
        }
        if (pow_lmp_ii(&c__2, &lgn) < *n) {
            ++lgn;
        }
        iprmpt = indxq + *n + 1;
        iperm = iprmpt + *n * lgn;
        iqptr = iperm + *n * lgn;
        igivpt = iqptr + *n + 2;
        igivcl = igivpt + *n * lgn;
        igivnm = 1;
        iq = igivnm + (*n << 1) * lgn;
        i__1 = *n;
        iwrem = iq + i__1 * i__1 + 1;
        i__1 = subpbs;
        for (i__ = 0; i__ <= i__1; ++i__) {
            iwork[iprmpt + i__] = 1;
            iwork[igivpt + i__] = 1;
        }
        iwork[iqptr] = 1;
    }
    curr = 0;
    i__1 = spm1;
    for (i__ = 0; i__ <= i__1; ++i__) {
        if (i__ == 0) {
            submat = 1;
            matsiz = iwork[1];
        } else {
            submat = iwork[i__] + 1;
            matsiz = iwork[i__ + 1] - iwork[i__];
        }
        if (*icompq == 2) {
            dsteqr_((char *)"I", &matsiz, &d__[submat], &e[submat], &q[submat + submat * q_dim1], ldq,
                    &work[1], info, (ftnlen)1);
            if (*info != 0) {
                goto L130;
            }
        } else {
            dsteqr_((char *)"I", &matsiz, &d__[submat], &e[submat], &work[iq - 1 + iwork[iqptr + curr]],
                    &matsiz, &work[1], info, (ftnlen)1);
            if (*info != 0) {
                goto L130;
            }
            if (*icompq == 1) {
                dgemm_((char *)"N", (char *)"N", qsiz, &matsiz, &matsiz, &c_b23, &q[submat * q_dim1 + 1], ldq,
                       &work[iq - 1 + iwork[iqptr + curr]], &matsiz, &c_b24,
                       &qstore[submat * qstore_dim1 + 1], ldqs, (ftnlen)1, (ftnlen)1);
            }
            i__2 = matsiz;
            iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2;
            ++curr;
        }
        k = 1;
        i__2 = iwork[i__ + 1];
        for (j = submat; j <= i__2; ++j) {
            iwork[indxq + j] = k;
            ++k;
        }
    }
    curlvl = 1;
 L80:
    if (subpbs > 1) {
        spm2 = subpbs - 2;
        i__1 = spm2;
        for (i__ = 0; i__ <= i__1; i__ += 2) {
            if (i__ == 0) {
                submat = 1;
                matsiz = iwork[2];
                msd2 = iwork[1];
                curprb = 0;
            } else {
                submat = iwork[i__] + 1;
                matsiz = iwork[i__ + 2] - iwork[i__];
                msd2 = matsiz / 2;
                ++curprb;
            }
            if (*icompq == 2) {
                dlaed1_(&matsiz, &d__[submat], &q[submat + submat * q_dim1], ldq,
                        &iwork[indxq + submat], &e[submat + msd2 - 1], &msd2, &work[1],
                        &iwork[subpbs + 1], info);
            } else {
                dlaed7_(icompq, &matsiz, qsiz, &tlvls, &curlvl, &curprb, &d__[submat],
                        &qstore[submat * qstore_dim1 + 1], ldqs, &iwork[indxq + submat],
                        &e[submat + msd2 - 1], &msd2, &work[iq], &iwork[iqptr], &iwork[iprmpt],
                        &iwork[iperm], &iwork[igivpt], &iwork[igivcl], &work[igivnm], &work[iwrem],
                        &iwork[subpbs + 1], info);
            }
            if (*info != 0) {
                goto L130;
            }
            iwork[i__ / 2 + 1] = iwork[i__ + 2];
        }
        subpbs /= 2;
        ++curlvl;
        goto L80;
    }
    if (*icompq == 1) {
        i__1 = *n;
        for (i__ = 1; i__ <= i__1; ++i__) {
            j = iwork[indxq + i__];
            work[i__] = d__[j];
            dcopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1 + 1], &c__1);
        }
        dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
    } else if (*icompq == 2) {
        i__1 = *n;
        for (i__ = 1; i__ <= i__1; ++i__) {
            j = iwork[indxq + i__];
            work[i__] = d__[j];
            dcopy_(n, &q[j * q_dim1 + 1], &c__1, &work[*n * i__ + 1], &c__1);
        }
        dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
        dlacpy_((char *)"A", n, n, &work[*n + 1], n, &q[q_offset], ldq, (ftnlen)1);
    } else {
        i__1 = *n;
        for (i__ = 1; i__ <= i__1; ++i__) {
            j = iwork[indxq + i__];
            work[i__] = d__[j];
        }
        dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
    }
    goto L140;
 L130:
    *info = submat * (*n + 1) + submat + matsiz - 1;
 L140:
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dlaed0.f
+++ b/lib/linalg/dlaed0.f
@ -1,431 +0,0 @@
 *> \brief \b DLAED0 used by DSTEDC. Computes all eigenvalues and corresponding eigenvectors of an unreduced symmetric tridiagonal matrix using the divide and conquer method.
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DLAED0 + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed0.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed0.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed0.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLAED0( ICOMPQ, QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS,
 *                          WORK, IWORK, INFO )
 *
 *       .. Scalar Arguments ..
 *       INTEGER            ICOMPQ, INFO, LDQ, LDQS, N, QSIZ
 *       ..
 *       .. Array Arguments ..
 *       INTEGER            IWORK( * )
 *       DOUBLE PRECISION   D( * ), E( * ), Q( LDQ, * ), QSTORE( LDQS, * ),
 *      $                   WORK( * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DLAED0 computes all eigenvalues and corresponding eigenvectors of a
 *> symmetric tridiagonal matrix using the divide and conquer method.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] ICOMPQ
 *> \verbatim
 *>          ICOMPQ is INTEGER
 *>          = 0:  Compute eigenvalues only.
 *>          = 1:  Compute eigenvectors of original dense symmetric matrix
 *>                also.  On entry, Q contains the orthogonal matrix used
 *>                to reduce the original matrix to tridiagonal form.
 *>          = 2:  Compute eigenvalues and eigenvectors of tridiagonal
 *>                matrix.
 *> \endverbatim
 *>
 *> \param[in] QSIZ
 *> \verbatim
 *>          QSIZ is INTEGER
 *>         The dimension of the orthogonal matrix used to reduce
 *>         the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>         The dimension of the symmetric tridiagonal matrix.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in,out] D
 *> \verbatim
 *>          D is DOUBLE PRECISION array, dimension (N)
 *>         On entry, the main diagonal of the tridiagonal matrix.
 *>         On exit, its eigenvalues.
 *> \endverbatim
 *>
 *> \param[in] E
 *> \verbatim
 *>          E is DOUBLE PRECISION array, dimension (N-1)
 *>         The off-diagonal elements of the tridiagonal matrix.
 *>         On exit, E has been destroyed.
 *> \endverbatim
 *>
 *> \param[in,out] Q
 *> \verbatim
 *>          Q is DOUBLE PRECISION array, dimension (LDQ, N)
 *>         On entry, Q must contain an N-by-N orthogonal matrix.
 *>         If ICOMPQ = 0    Q is not referenced.
 *>         If ICOMPQ = 1    On entry, Q is a subset of the columns of the
 *>                          orthogonal matrix used to reduce the full
 *>                          matrix to tridiagonal form corresponding to
 *>                          the subset of the full matrix which is being
 *>                          decomposed at this time.
 *>         If ICOMPQ = 2    On entry, Q will be the identity matrix.
 *>                          On exit, Q contains the eigenvectors of the
 *>                          tridiagonal matrix.
 *> \endverbatim
 *>
 *> \param[in] LDQ
 *> \verbatim
 *>          LDQ is INTEGER
 *>         The leading dimension of the array Q.  If eigenvectors are
 *>         desired, then  LDQ >= max(1,N).  In any case,  LDQ >= 1.
 *> \endverbatim
 *>
 *> \param[out] QSTORE
 *> \verbatim
 *>          QSTORE is DOUBLE PRECISION array, dimension (LDQS, N)
 *>         Referenced only when ICOMPQ = 1.  Used to store parts of
 *>         the eigenvector matrix when the updating matrix multiplies
 *>         take place.
 *> \endverbatim
 *>
 *> \param[in] LDQS
 *> \verbatim
 *>          LDQS is INTEGER
 *>         The leading dimension of the array QSTORE.  If ICOMPQ = 1,
 *>         then  LDQS >= max(1,N).  In any case,  LDQS >= 1.
 *> \endverbatim
 *>
 *> \param[out] WORK
 *> \verbatim
 *>          WORK is DOUBLE PRECISION array,
 *>         If ICOMPQ = 0 or 1, the dimension of WORK must be at least
 *>                     1 + 3*N + 2*N*lg N + 3*N**2
 *>                     ( lg( N ) = smallest integer k
 *>                                 such that 2^k >= N )
 *>         If ICOMPQ = 2, the dimension of WORK must be at least
 *>                     4*N + N**2.
 *> \endverbatim
 *>
 *> \param[out] IWORK
 *> \verbatim
 *>          IWORK is INTEGER array,
 *>         If ICOMPQ = 0 or 1, the dimension of IWORK must be at least
 *>                        6 + 6*N + 5*N*lg N.
 *>                        ( lg( N ) = smallest integer k
 *>                                    such that 2^k >= N )
 *>         If ICOMPQ = 2, the dimension of IWORK must be at least
 *>                        3 + 5*N.
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0:  successful exit.
 *>          < 0:  if INFO = -i, the i-th argument had an illegal value.
 *>          > 0:  The algorithm failed to compute an eigenvalue while
 *>                working on the submatrix lying in rows and columns
 *>                INFO/(N+1) through mod(INFO,N+1).
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup auxOTHERcomputational
 *
 *> \par Contributors:
 *  ==================
 *>
 *> Jeff Rutter, Computer Science Division, University of California
 *> at Berkeley, USA
 *
 *  =====================================================================
      SUBROUTINE DLAED0( ICOMPQ, QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS,
     $                   WORK, IWORK, INFO )
 *
 *  -- LAPACK computational routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER            ICOMPQ, INFO, LDQ, LDQS, N, QSIZ
 *     ..
 *     .. Array Arguments ..
      INTEGER            IWORK( * )
      DOUBLE PRECISION   D( * ), E( * ), Q( LDQ, * ), QSTORE( LDQS, * ),
     $                   WORK( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ZERO, ONE, TWO
      PARAMETER          ( ZERO = 0.D0, ONE = 1.D0, TWO = 2.D0 )
 *     ..
 *     .. Local Scalars ..
      INTEGER            CURLVL, CURPRB, CURR, I, IGIVCL, IGIVNM,
     $                   IGIVPT, INDXQ, IPERM, IPRMPT, IQ, IQPTR, IWREM,
     $                   J, K, LGN, MATSIZ, MSD2, SMLSIZ, SMM1, SPM1,
     $                   SPM2, SUBMAT, SUBPBS, TLVLS
      DOUBLE PRECISION   TEMP
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DCOPY, DGEMM, DLACPY, DLAED1, DLAED7, DSTEQR,
     $                   XERBLA
 *     ..
 *     .. External Functions ..
      INTEGER            ILAENV
      EXTERNAL           ILAENV
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, DBLE, INT, LOG, MAX
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input parameters.
 *
      INFO = 0
 *
      IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.2 ) THEN
         INFO = -1
      ELSE IF( ( ICOMPQ.EQ.1 ) .AND. ( QSIZ.LT.MAX( 0, N ) ) ) THEN
         INFO = -2
      ELSE IF( N.LT.0 ) THEN
         INFO = -3
      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
         INFO = -7
      ELSE IF( LDQS.LT.MAX( 1, N ) ) THEN
         INFO = -9
      END IF
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'DLAED0', -INFO )
         RETURN
      END IF
 *
 *     Quick return if possible
 *
      IF( N.EQ.0 )
     $   RETURN
 *
      SMLSIZ = ILAENV( 9, 'DLAED0', ' ', 0, 0, 0, 0 )
 *
 *     Determine the size and placement of the submatrices, and save in
 *     the leading elements of IWORK.
 *
      IWORK( 1 ) = N
      SUBPBS = 1
      TLVLS = 0
   10 CONTINUE
      IF( IWORK( SUBPBS ).GT.SMLSIZ ) THEN
         DO 20 J = SUBPBS, 1, -1
            IWORK( 2*J ) = ( IWORK( J )+1 ) / 2
            IWORK( 2*J-1 ) = IWORK( J ) / 2
   20    CONTINUE
         TLVLS = TLVLS + 1
         SUBPBS = 2*SUBPBS
         GO TO 10
      END IF
      DO 30 J = 2, SUBPBS
         IWORK( J ) = IWORK( J ) + IWORK( J-1 )
   30 CONTINUE
 *
 *     Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1
 *     using rank-1 modifications (cuts).
 *
      SPM1 = SUBPBS - 1
      DO 40 I = 1, SPM1
         SUBMAT = IWORK( I ) + 1
         SMM1 = SUBMAT - 1
         D( SMM1 ) = D( SMM1 ) - ABS( E( SMM1 ) )
         D( SUBMAT ) = D( SUBMAT ) - ABS( E( SMM1 ) )
   40 CONTINUE
 *
      INDXQ = 4*N + 3
      IF( ICOMPQ.NE.2 ) THEN
 *
 *        Set up workspaces for eigenvalues only/accumulate new vectors
 *        routine
 *
         TEMP = LOG( DBLE( N ) ) / LOG( TWO )
         LGN = INT( TEMP )
         IF( 2**LGN.LT.N )
     $      LGN = LGN + 1
         IF( 2**LGN.LT.N )
     $      LGN = LGN + 1
         IPRMPT = INDXQ + N + 1
         IPERM = IPRMPT + N*LGN
         IQPTR = IPERM + N*LGN
         IGIVPT = IQPTR + N + 2
         IGIVCL = IGIVPT + N*LGN
 *
         IGIVNM = 1
         IQ = IGIVNM + 2*N*LGN
         IWREM = IQ + N**2 + 1
 *
 *        Initialize pointers
 *
         DO 50 I = 0, SUBPBS
            IWORK( IPRMPT+I ) = 1
            IWORK( IGIVPT+I ) = 1
   50    CONTINUE
         IWORK( IQPTR ) = 1
      END IF
 *
 *     Solve each submatrix eigenproblem at the bottom of the divide and
 *     conquer tree.
 *
      CURR = 0
      DO 70 I = 0, SPM1
         IF( I.EQ.0 ) THEN
            SUBMAT = 1
            MATSIZ = IWORK( 1 )
         ELSE
            SUBMAT = IWORK( I ) + 1
            MATSIZ = IWORK( I+1 ) - IWORK( I )
         END IF
         IF( ICOMPQ.EQ.2 ) THEN
            CALL DSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ),
     $                   Q( SUBMAT, SUBMAT ), LDQ, WORK, INFO )
            IF( INFO.NE.0 )
     $         GO TO 130
         ELSE
            CALL DSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ),
     $                   WORK( IQ-1+IWORK( IQPTR+CURR ) ), MATSIZ, WORK,
     $                   INFO )
            IF( INFO.NE.0 )
     $         GO TO 130
            IF( ICOMPQ.EQ.1 ) THEN
               CALL DGEMM( 'N', 'N', QSIZ, MATSIZ, MATSIZ, ONE,
     $                     Q( 1, SUBMAT ), LDQ, WORK( IQ-1+IWORK( IQPTR+
     $                     CURR ) ), MATSIZ, ZERO, QSTORE( 1, SUBMAT ),
     $                     LDQS )
            END IF
            IWORK( IQPTR+CURR+1 ) = IWORK( IQPTR+CURR ) + MATSIZ**2
            CURR = CURR + 1
         END IF
         K = 1
         DO 60 J = SUBMAT, IWORK( I+1 )
            IWORK( INDXQ+J ) = K
            K = K + 1
   60    CONTINUE
   70 CONTINUE
 *
 *     Successively merge eigensystems of adjacent submatrices
 *     into eigensystem for the corresponding larger matrix.
 *
 *     while ( SUBPBS > 1 )
 *
      CURLVL = 1
   80 CONTINUE
      IF( SUBPBS.GT.1 ) THEN
         SPM2 = SUBPBS - 2
         DO 90 I = 0, SPM2, 2
            IF( I.EQ.0 ) THEN
               SUBMAT = 1
               MATSIZ = IWORK( 2 )
               MSD2 = IWORK( 1 )
               CURPRB = 0
            ELSE
               SUBMAT = IWORK( I ) + 1
               MATSIZ = IWORK( I+2 ) - IWORK( I )
               MSD2 = MATSIZ / 2
               CURPRB = CURPRB + 1
            END IF
 *
 *     Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2)
 *     into an eigensystem of size MATSIZ.
 *     DLAED1 is used only for the full eigensystem of a tridiagonal
 *     matrix.
 *     DLAED7 handles the cases in which eigenvalues only or eigenvalues
 *     and eigenvectors of a full symmetric matrix (which was reduced to
 *     tridiagonal form) are desired.
 *
            IF( ICOMPQ.EQ.2 ) THEN
               CALL DLAED1( MATSIZ, D( SUBMAT ), Q( SUBMAT, SUBMAT ),
     $                      LDQ, IWORK( INDXQ+SUBMAT ),
     $                      E( SUBMAT+MSD2-1 ), MSD2, WORK,
     $                      IWORK( SUBPBS+1 ), INFO )
            ELSE
               CALL DLAED7( ICOMPQ, MATSIZ, QSIZ, TLVLS, CURLVL, CURPRB,
     $                      D( SUBMAT ), QSTORE( 1, SUBMAT ), LDQS,
     $                      IWORK( INDXQ+SUBMAT ), E( SUBMAT+MSD2-1 ),
     $                      MSD2, WORK( IQ ), IWORK( IQPTR ),
     $                      IWORK( IPRMPT ), IWORK( IPERM ),
     $                      IWORK( IGIVPT ), IWORK( IGIVCL ),
     $                      WORK( IGIVNM ), WORK( IWREM ),
     $                      IWORK( SUBPBS+1 ), INFO )
            END IF
            IF( INFO.NE.0 )
     $         GO TO 130
            IWORK( I / 2+1 ) = IWORK( I+2 )
   90    CONTINUE
         SUBPBS = SUBPBS / 2
         CURLVL = CURLVL + 1
         GO TO 80
      END IF
 *
 *     end while
 *
 *     Re-merge the eigenvalues/vectors which were deflated at the final
 *     merge step.
 *
      IF( ICOMPQ.EQ.1 ) THEN
         DO 100 I = 1, N
            J = IWORK( INDXQ+I )
            WORK( I ) = D( J )
            CALL DCOPY( QSIZ, QSTORE( 1, J ), 1, Q( 1, I ), 1 )
  100    CONTINUE
         CALL DCOPY( N, WORK, 1, D, 1 )
      ELSE IF( ICOMPQ.EQ.2 ) THEN
         DO 110 I = 1, N
            J = IWORK( INDXQ+I )
            WORK( I ) = D( J )
            CALL DCOPY( N, Q( 1, J ), 1, WORK( N*I+1 ), 1 )
  110    CONTINUE
         CALL DCOPY( N, WORK, 1, D, 1 )
         CALL DLACPY( 'A', N, N, WORK( N+1 ), N, Q, LDQ )
      ELSE
         DO 120 I = 1, N
            J = IWORK( INDXQ+I )
            WORK( I ) = D( J )
  120    CONTINUE
         CALL DCOPY( N, WORK, 1, D, 1 )
      END IF
      GO TO 140
 *
  130 CONTINUE
      INFO = SUBMAT*( N+1 ) + SUBMAT + MATSIZ - 1
 *
  140 CONTINUE
      RETURN
 *
 *     End of DLAED0
 *
      END
--- a/lib/linalg/dlaed1.cpp
+++ b/lib/linalg/dlaed1.cpp
@ -0,0 +1,90 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 static integer c__1 = 1;
 static integer c_n1 = -1;
 int dlaed1_(integer *n, doublereal *d__, doublereal *q, integer *ldq, integer *indxq,
            doublereal *rho, integer *cutpnt, doublereal *work, integer *iwork, integer *info)
 {
    integer q_dim1, q_offset, i__1, i__2;
    integer i__, k, n1, n2, is, iw, iz, iq2, zpp1, indx, indxc;
    extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *);
    integer indxp;
    extern int dlaed2_(integer *, integer *, integer *, doublereal *, doublereal *, integer *,
                       integer *, doublereal *, doublereal *, doublereal *, doublereal *,
                       doublereal *, integer *, integer *, integer *, integer *, integer *),
        dlaed3_(integer *, integer *, integer *, doublereal *, doublereal *, integer *,
                doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *,
                doublereal *, integer *);
    integer idlmda;
    extern int dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *),
        xerbla_(char *, integer *, ftnlen);
    integer coltyp;
    --d__;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1;
    q -= q_offset;
    --indxq;
    --work;
    --iwork;
    *info = 0;
    if (*n < 0) {
        *info = -1;
    } else if (*ldq < max(1, *n)) {
        *info = -4;
    } else {
        i__1 = 1, i__2 = *n / 2;
        if (min(i__1, i__2) > *cutpnt || *n / 2 < *cutpnt) {
            *info = -7;
        }
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_((char *)"DLAED1", &i__1, (ftnlen)6);
        return 0;
    }
    if (*n == 0) {
        return 0;
    }
    iz = 1;
    idlmda = iz + *n;
    iw = idlmda + *n;
    iq2 = iw + *n;
    indx = 1;
    indxc = indx + *n;
    coltyp = indxc + *n;
    indxp = coltyp + *n;
    dcopy_(cutpnt, &q[*cutpnt + q_dim1], ldq, &work[iz], &c__1);
    zpp1 = *cutpnt + 1;
    i__1 = *n - *cutpnt;
    dcopy_(&i__1, &q[zpp1 + zpp1 * q_dim1], ldq, &work[iz + *cutpnt], &c__1);
    dlaed2_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, &indxq[1], rho, &work[iz], &work[idlmda],
            &work[iw], &work[iq2], &iwork[indx], &iwork[indxc], &iwork[indxp], &iwork[coltyp],
            info);
    if (*info != 0) {
        goto L20;
    }
    if (k != 0) {
        is = (iwork[coltyp] + iwork[coltyp + 1]) * *cutpnt +
             (iwork[coltyp + 1] + iwork[coltyp + 2]) * (*n - *cutpnt) + iq2;
        dlaed3_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, rho, &work[idlmda], &work[iq2],
                &iwork[indxc], &iwork[coltyp], &work[iw], &work[is], info);
        if (*info != 0) {
            goto L20;
        }
        n1 = k;
        n2 = *n - k;
        dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
    } else {
        i__1 = *n;
        for (i__ = 1; i__ <= i__1; ++i__) {
            indxq[i__] = i__;
        }
    }
 L20:
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dlaed1.f
+++ b/lib/linalg/dlaed1.f
@ -1,271 +0,0 @@
 *> \brief \b DLAED1 used by DSTEDC. Computes the updated eigensystem of a diagonal matrix after modification by a rank-one symmetric matrix. Used when the original matrix is tridiagonal.
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DLAED1 + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed1.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed1.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed1.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLAED1( N, D, Q, LDQ, INDXQ, RHO, CUTPNT, WORK, IWORK,
 *                          INFO )
 *
 *       .. Scalar Arguments ..
 *       INTEGER            CUTPNT, INFO, LDQ, N
 *       DOUBLE PRECISION   RHO
 *       ..
 *       .. Array Arguments ..
 *       INTEGER            INDXQ( * ), IWORK( * )
 *       DOUBLE PRECISION   D( * ), Q( LDQ, * ), WORK( * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DLAED1 computes the updated eigensystem of a diagonal
 *> matrix after modification by a rank-one symmetric matrix.  This
 *> routine is used only for the eigenproblem which requires all
 *> eigenvalues and eigenvectors of a tridiagonal matrix.  DLAED7 handles
 *> the case in which eigenvalues only or eigenvalues and eigenvectors
 *> of a full symmetric matrix (which was reduced to tridiagonal form)
 *> are desired.
 *>
 *>   T = Q(in) ( D(in) + RHO * Z*Z**T ) Q**T(in) = Q(out) * D(out) * Q**T(out)
 *>
 *>    where Z = Q**T*u, u is a vector of length N with ones in the
 *>    CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
 *>
 *>    The eigenvectors of the original matrix are stored in Q, and the
 *>    eigenvalues are in D.  The algorithm consists of three stages:
 *>
 *>       The first stage consists of deflating the size of the problem
 *>       when there are multiple eigenvalues or if there is a zero in
 *>       the Z vector.  For each such occurrence the dimension of the
 *>       secular equation problem is reduced by one.  This stage is
 *>       performed by the routine DLAED2.
 *>
 *>       The second stage consists of calculating the updated
 *>       eigenvalues. This is done by finding the roots of the secular
 *>       equation via the routine DLAED4 (as called by DLAED3).
 *>       This routine also calculates the eigenvectors of the current
 *>       problem.
 *>
 *>       The final stage consists of computing the updated eigenvectors
 *>       directly using the updated eigenvalues.  The eigenvectors for
 *>       the current problem are multiplied with the eigenvectors from
 *>       the overall problem.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>         The dimension of the symmetric tridiagonal matrix.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in,out] D
 *> \verbatim
 *>          D is DOUBLE PRECISION array, dimension (N)
 *>         On entry, the eigenvalues of the rank-1-perturbed matrix.
 *>         On exit, the eigenvalues of the repaired matrix.
 *> \endverbatim
 *>
 *> \param[in,out] Q
 *> \verbatim
 *>          Q is DOUBLE PRECISION array, dimension (LDQ,N)
 *>         On entry, the eigenvectors of the rank-1-perturbed matrix.
 *>         On exit, the eigenvectors of the repaired tridiagonal matrix.
 *> \endverbatim
 *>
 *> \param[in] LDQ
 *> \verbatim
 *>          LDQ is INTEGER
 *>         The leading dimension of the array Q.  LDQ >= max(1,N).
 *> \endverbatim
 *>
 *> \param[in,out] INDXQ
 *> \verbatim
 *>          INDXQ is INTEGER array, dimension (N)
 *>         On entry, the permutation which separately sorts the two
 *>         subproblems in D into ascending order.
 *>         On exit, the permutation which will reintegrate the
 *>         subproblems back into sorted order,
 *>         i.e. D( INDXQ( I = 1, N ) ) will be in ascending order.
 *> \endverbatim
 *>
 *> \param[in] RHO
 *> \verbatim
 *>          RHO is DOUBLE PRECISION
 *>         The subdiagonal entry used to create the rank-1 modification.
 *> \endverbatim
 *>
 *> \param[in] CUTPNT
 *> \verbatim
 *>          CUTPNT is INTEGER
 *>         The location of the last eigenvalue in the leading sub-matrix.
 *>         min(1,N) <= CUTPNT <= N/2.
 *> \endverbatim
 *>
 *> \param[out] WORK
 *> \verbatim
 *>          WORK is DOUBLE PRECISION array, dimension (4*N + N**2)
 *> \endverbatim
 *>
 *> \param[out] IWORK
 *> \verbatim
 *>          IWORK is INTEGER array, dimension (4*N)
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0:  successful exit.
 *>          < 0:  if INFO = -i, the i-th argument had an illegal value.
 *>          > 0:  if INFO = 1, an eigenvalue did not converge
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup auxOTHERcomputational
 *
 *> \par Contributors:
 *  ==================
 *>
 *> Jeff Rutter, Computer Science Division, University of California
 *> at Berkeley, USA \n
 *>  Modified by Francoise Tisseur, University of Tennessee
 *>
 *  =====================================================================
      SUBROUTINE DLAED1( N, D, Q, LDQ, INDXQ, RHO, CUTPNT, WORK, IWORK,
     $                   INFO )
 *
 *  -- LAPACK computational routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER            CUTPNT, INFO, LDQ, N
      DOUBLE PRECISION   RHO
 *     ..
 *     .. Array Arguments ..
      INTEGER            INDXQ( * ), IWORK( * )
      DOUBLE PRECISION   D( * ), Q( LDQ, * ), WORK( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Local Scalars ..
      INTEGER            COLTYP, I, IDLMDA, INDX, INDXC, INDXP, IQ2, IS,
     $                   IW, IZ, K, N1, N2, ZPP1
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DCOPY, DLAED2, DLAED3, DLAMRG, XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          MAX, MIN
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input parameters.
 *
      INFO = 0
 *
      IF( N.LT.0 ) THEN
         INFO = -1
      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
         INFO = -4
      ELSE IF( MIN( 1, N / 2 ).GT.CUTPNT .OR. ( N / 2 ).LT.CUTPNT ) THEN
         INFO = -7
      END IF
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'DLAED1', -INFO )
         RETURN
      END IF
 *
 *     Quick return if possible
 *
      IF( N.EQ.0 )
     $   RETURN
 *
 *     The following values are integer pointers which indicate
 *     the portion of the workspace
 *     used by a particular array in DLAED2 and DLAED3.
 *
      IZ = 1
      IDLMDA = IZ + N
      IW = IDLMDA + N
      IQ2 = IW + N
 *
      INDX = 1
      INDXC = INDX + N
      COLTYP = INDXC + N
      INDXP = COLTYP + N
 *
 *
 *     Form the z-vector which consists of the last row of Q_1 and the
 *     first row of Q_2.
 *
      CALL DCOPY( CUTPNT, Q( CUTPNT, 1 ), LDQ, WORK( IZ ), 1 )
      ZPP1 = CUTPNT + 1
      CALL DCOPY( N-CUTPNT, Q( ZPP1, ZPP1 ), LDQ, WORK( IZ+CUTPNT ), 1 )
 *
 *     Deflate eigenvalues.
 *
      CALL DLAED2( K, N, CUTPNT, D, Q, LDQ, INDXQ, RHO, WORK( IZ ),
     $             WORK( IDLMDA ), WORK( IW ), WORK( IQ2 ),
     $             IWORK( INDX ), IWORK( INDXC ), IWORK( INDXP ),
     $             IWORK( COLTYP ), INFO )
 *
      IF( INFO.NE.0 )
     $   GO TO 20
 *
 *     Solve Secular Equation.
 *
      IF( K.NE.0 ) THEN
         IS = ( IWORK( COLTYP )+IWORK( COLTYP+1 ) )*CUTPNT +
     $        ( IWORK( COLTYP+1 )+IWORK( COLTYP+2 ) )*( N-CUTPNT ) + IQ2
         CALL DLAED3( K, N, CUTPNT, D, Q, LDQ, RHO, WORK( IDLMDA ),
     $                WORK( IQ2 ), IWORK( INDXC ), IWORK( COLTYP ),
     $                WORK( IW ), WORK( IS ), INFO )
         IF( INFO.NE.0 )
     $      GO TO 20
 *
 *     Prepare the INDXQ sorting permutation.
 *
         N1 = K
         N2 = N - K
         CALL DLAMRG( N1, N2, D, 1, -1, INDXQ )
      ELSE
         DO 10 I = 1, N
            INDXQ( I ) = I
   10    CONTINUE
      END IF
 *
   20 CONTINUE
      RETURN
 *
 *     End of DLAED1
 *
      END
--- a/lib/linalg/dlaed2.cpp
+++ b/lib/linalg/dlaed2.cpp
@ -0,0 +1,263 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 static doublereal c_b3 = -1.;
 static integer c__1 = 1;
 int dlaed2_(integer *k, integer *n, integer *n1, doublereal *d__, doublereal *q, integer *ldq,
            integer *indxq, doublereal *rho, doublereal *z__, doublereal *dlamda, doublereal *w,
            doublereal *q2, integer *indx, integer *indxc, integer *indxp, integer *coltyp,
            integer *info)
 {
    integer q_dim1, q_offset, i__1, i__2;
    doublereal d__1, d__2, d__3, d__4;
    double sqrt(doublereal);
    doublereal c__;
    integer i__, j;
    doublereal s, t;
    integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1;
    doublereal eps, tau, tol;
    integer psm[4], imax, jmax;
    extern int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *,
                     doublereal *);
    integer ctot[4];
    extern int dscal_(integer *, doublereal *, doublereal *, integer *),
        dcopy_(integer *, doublereal *, integer *, doublereal *, integer *);
    extern doublereal dlapy2_(doublereal *, doublereal *), dlamch_(char *, ftnlen);
    extern integer idamax_(integer *, doublereal *, integer *);
    extern int dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *),
        dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *,
                ftnlen),
        xerbla_(char *, integer *, ftnlen);
    --d__;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1;
    q -= q_offset;
    --indxq;
    --z__;
    --dlamda;
    --w;
    --q2;
    --indx;
    --indxc;
    --indxp;
    --coltyp;
    *info = 0;
    if (*n < 0) {
        *info = -2;
    } else if (*ldq < max(1, *n)) {
        *info = -6;
    } else {
        i__1 = 1, i__2 = *n / 2;
        if (min(i__1, i__2) > *n1 || *n / 2 < *n1) {
            *info = -3;
        }
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_((char *)"DLAED2", &i__1, (ftnlen)6);
        return 0;
    }
    if (*n == 0) {
        return 0;
    }
    n2 = *n - *n1;
    n1p1 = *n1 + 1;
    if (*rho < 0.) {
        dscal_(&n2, &c_b3, &z__[n1p1], &c__1);
    }
    t = 1. / sqrt(2.);
    dscal_(n, &t, &z__[1], &c__1);
    *rho = (d__1 = *rho * 2., abs(d__1));
    i__1 = *n;
    for (i__ = n1p1; i__ <= i__1; ++i__) {
        indxq[i__] += *n1;
    }
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
        dlamda[i__] = d__[indxq[i__]];
    }
    dlamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]);
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
        indx[i__] = indxq[indxc[i__]];
    }
    imax = idamax_(n, &z__[1], &c__1);
    jmax = idamax_(n, &d__[1], &c__1);
    eps = dlamch_((char *)"Epsilon", (ftnlen)7);
    d__3 = (d__1 = d__[jmax], abs(d__1)), d__4 = (d__2 = z__[imax], abs(d__2));
    tol = eps * 8. * max(d__3, d__4);
    if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) {
        *k = 0;
        iq2 = 1;
        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            i__ = indx[j];
            dcopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
            dlamda[j] = d__[i__];
            iq2 += *n;
        }
        dlacpy_((char *)"A", n, n, &q2[1], n, &q[q_offset], ldq, (ftnlen)1);
        dcopy_(n, &dlamda[1], &c__1, &d__[1], &c__1);
        goto L190;
    }
    i__1 = *n1;
    for (i__ = 1; i__ <= i__1; ++i__) {
        coltyp[i__] = 1;
    }
    i__1 = *n;
    for (i__ = n1p1; i__ <= i__1; ++i__) {
        coltyp[i__] = 3;
    }
    *k = 0;
    k2 = *n + 1;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
        nj = indx[j];
        if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) {
            --k2;
            coltyp[nj] = 4;
            indxp[k2] = nj;
            if (j == *n) {
                goto L100;
            }
        } else {
            pj = nj;
            goto L80;
        }
    }
 L80:
    ++j;
    nj = indx[j];
    if (j > *n) {
        goto L100;
    }
    if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) {
        --k2;
        coltyp[nj] = 4;
        indxp[k2] = nj;
    } else {
        s = z__[pj];
        c__ = z__[nj];
        tau = dlapy2_(&c__, &s);
        t = d__[nj] - d__[pj];
        c__ /= tau;
        s = -s / tau;
        if ((d__1 = t * c__ * s, abs(d__1)) <= tol) {
            z__[nj] = tau;
            z__[pj] = 0.;
            if (coltyp[nj] != coltyp[pj]) {
                coltyp[nj] = 2;
            }
            coltyp[pj] = 4;
            drot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, &c__, &s);
            d__1 = c__;
            d__2 = s;
            t = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2);
            d__1 = s;
            d__2 = c__;
            d__[nj] = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2);
            d__[pj] = t;
            --k2;
            i__ = 1;
        L90:
            if (k2 + i__ <= *n) {
                if (d__[pj] < d__[indxp[k2 + i__]]) {
                    indxp[k2 + i__ - 1] = indxp[k2 + i__];
                    indxp[k2 + i__] = pj;
                    ++i__;
                    goto L90;
                } else {
                    indxp[k2 + i__ - 1] = pj;
                }
            } else {
                indxp[k2 + i__ - 1] = pj;
            }
            pj = nj;
        } else {
            ++(*k);
            dlamda[*k] = d__[pj];
            w[*k] = z__[pj];
            indxp[*k] = pj;
            pj = nj;
        }
    }
    goto L80;
 L100:
    ++(*k);
    dlamda[*k] = d__[pj];
    w[*k] = z__[pj];
    indxp[*k] = pj;
    for (j = 1; j <= 4; ++j) {
        ctot[j - 1] = 0;
    }
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
        ct = coltyp[j];
        ++ctot[ct - 1];
    }
    psm[0] = 1;
    psm[1] = ctot[0] + 1;
    psm[2] = psm[1] + ctot[1];
    psm[3] = psm[2] + ctot[2];
    *k = *n - ctot[3];
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
        js = indxp[j];
        ct = coltyp[js];
        indx[psm[ct - 1]] = js;
        indxc[psm[ct - 1]] = j;
        ++psm[ct - 1];
    }
    i__ = 1;
    iq1 = 1;
    iq2 = (ctot[0] + ctot[1]) * *n1 + 1;
    i__1 = ctot[0];
    for (j = 1; j <= i__1; ++j) {
        js = indx[i__];
        dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
        z__[i__] = d__[js];
        ++i__;
        iq1 += *n1;
    }
    i__1 = ctot[1];
    for (j = 1; j <= i__1; ++j) {
        js = indx[i__];
        dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
        dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
        z__[i__] = d__[js];
        ++i__;
        iq1 += *n1;
        iq2 += n2;
    }
    i__1 = ctot[2];
    for (j = 1; j <= i__1; ++j) {
        js = indx[i__];
        dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
        z__[i__] = d__[js];
        ++i__;
        iq2 += n2;
    }
    iq1 = iq2;
    i__1 = ctot[3];
    for (j = 1; j <= i__1; ++j) {
        js = indx[i__];
        dcopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
        iq2 += *n;
        z__[i__] = d__[js];
        ++i__;
    }
    if (*k < *n) {
        dlacpy_((char *)"A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq, (ftnlen)1);
        i__1 = *n - *k;
        dcopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1);
    }
    for (j = 1; j <= 4; ++j) {
        coltyp[j] = ctot[j - 1];
    }
 L190:
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dlaed2.f
+++ b/lib/linalg/dlaed2.f
@ -1,536 +0,0 @@
 *> \brief \b DLAED2 used by DSTEDC. Merges eigenvalues and deflates secular equation. Used when the original matrix is tridiagonal.
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DLAED2 + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed2.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed2.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed2.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
 *                          Q2, INDX, INDXC, INDXP, COLTYP, INFO )
 *
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, K, LDQ, N, N1
 *       DOUBLE PRECISION   RHO
 *       ..
 *       .. Array Arguments ..
 *       INTEGER            COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),
 *      $                   INDXQ( * )
 *       DOUBLE PRECISION   D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
 *      $                   W( * ), Z( * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DLAED2 merges the two sets of eigenvalues together into a single
 *> sorted set.  Then it tries to deflate the size of the problem.
 *> There are two ways in which deflation can occur:  when two or more
 *> eigenvalues are close together or if there is a tiny entry in the
 *> Z vector.  For each such occurrence the order of the related secular
 *> equation problem is reduced by one.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[out] K
 *> \verbatim
 *>          K is INTEGER
 *>         The number of non-deflated eigenvalues, and the order of the
 *>         related secular equation. 0 <= K <=N.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>         The dimension of the symmetric tridiagonal matrix.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in] N1
 *> \verbatim
 *>          N1 is INTEGER
 *>         The location of the last eigenvalue in the leading sub-matrix.
 *>         min(1,N) <= N1 <= N/2.
 *> \endverbatim
 *>
 *> \param[in,out] D
 *> \verbatim
 *>          D is DOUBLE PRECISION array, dimension (N)
 *>         On entry, D contains the eigenvalues of the two submatrices to
 *>         be combined.
 *>         On exit, D contains the trailing (N-K) updated eigenvalues
 *>         (those which were deflated) sorted into increasing order.
 *> \endverbatim
 *>
 *> \param[in,out] Q
 *> \verbatim
 *>          Q is DOUBLE PRECISION array, dimension (LDQ, N)
 *>         On entry, Q contains the eigenvectors of two submatrices in
 *>         the two square blocks with corners at (1,1), (N1,N1)
 *>         and (N1+1, N1+1), (N,N).
 *>         On exit, Q contains the trailing (N-K) updated eigenvectors
 *>         (those which were deflated) in its last N-K columns.
 *> \endverbatim
 *>
 *> \param[in] LDQ
 *> \verbatim
 *>          LDQ is INTEGER
 *>         The leading dimension of the array Q.  LDQ >= max(1,N).
 *> \endverbatim
 *>
 *> \param[in,out] INDXQ
 *> \verbatim
 *>          INDXQ is INTEGER array, dimension (N)
 *>         The permutation which separately sorts the two sub-problems
 *>         in D into ascending order.  Note that elements in the second
 *>         half of this permutation must first have N1 added to their
 *>         values. Destroyed on exit.
 *> \endverbatim
 *>
 *> \param[in,out] RHO
 *> \verbatim
 *>          RHO is DOUBLE PRECISION
 *>         On entry, the off-diagonal element associated with the rank-1
 *>         cut which originally split the two submatrices which are now
 *>         being recombined.
 *>         On exit, RHO has been modified to the value required by
 *>         DLAED3.
 *> \endverbatim
 *>
 *> \param[in] Z
 *> \verbatim
 *>          Z is DOUBLE PRECISION array, dimension (N)
 *>         On entry, Z contains the updating vector (the last
 *>         row of the first sub-eigenvector matrix and the first row of
 *>         the second sub-eigenvector matrix).
 *>         On exit, the contents of Z have been destroyed by the updating
 *>         process.
 *> \endverbatim
 *>
 *> \param[out] DLAMDA
 *> \verbatim
 *>          DLAMDA is DOUBLE PRECISION array, dimension (N)
 *>         A copy of the first K eigenvalues which will be used by
 *>         DLAED3 to form the secular equation.
 *> \endverbatim
 *>
 *> \param[out] W
 *> \verbatim
 *>          W is DOUBLE PRECISION array, dimension (N)
 *>         The first k values of the final deflation-altered z-vector
 *>         which will be passed to DLAED3.
 *> \endverbatim
 *>
 *> \param[out] Q2
 *> \verbatim
 *>          Q2 is DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2)
 *>         A copy of the first K eigenvectors which will be used by
 *>         DLAED3 in a matrix multiply (DGEMM) to solve for the new
 *>         eigenvectors.
 *> \endverbatim
 *>
 *> \param[out] INDX
 *> \verbatim
 *>          INDX is INTEGER array, dimension (N)
 *>         The permutation used to sort the contents of DLAMDA into
 *>         ascending order.
 *> \endverbatim
 *>
 *> \param[out] INDXC
 *> \verbatim
 *>          INDXC is INTEGER array, dimension (N)
 *>         The permutation used to arrange the columns of the deflated
 *>         Q matrix into three groups:  the first group contains non-zero
 *>         elements only at and above N1, the second contains
 *>         non-zero elements only below N1, and the third is dense.
 *> \endverbatim
 *>
 *> \param[out] INDXP
 *> \verbatim
 *>          INDXP is INTEGER array, dimension (N)
 *>         The permutation used to place deflated values of D at the end
 *>         of the array.  INDXP(1:K) points to the nondeflated D-values
 *>         and INDXP(K+1:N) points to the deflated eigenvalues.
 *> \endverbatim
 *>
 *> \param[out] COLTYP
 *> \verbatim
 *>          COLTYP is INTEGER array, dimension (N)
 *>         During execution, a label which will indicate which of the
 *>         following types a column in the Q2 matrix is:
 *>         1 : non-zero in the upper half only;
 *>         2 : dense;
 *>         3 : non-zero in the lower half only;
 *>         4 : deflated.
 *>         On exit, COLTYP(i) is the number of columns of type i,
 *>         for i=1 to 4 only.
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0:  successful exit.
 *>          < 0:  if INFO = -i, the i-th argument had an illegal value.
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup auxOTHERcomputational
 *
 *> \par Contributors:
 *  ==================
 *>
 *> Jeff Rutter, Computer Science Division, University of California
 *> at Berkeley, USA \n
 *>  Modified by Francoise Tisseur, University of Tennessee
 *>
 *  =====================================================================
      SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
     $                   Q2, INDX, INDXC, INDXP, COLTYP, INFO )
 *
 *  -- LAPACK computational routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER            INFO, K, LDQ, N, N1
      DOUBLE PRECISION   RHO
 *     ..
 *     .. Array Arguments ..
      INTEGER            COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),
     $                   INDXQ( * )
      DOUBLE PRECISION   D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
     $                   W( * ), Z( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   MONE, ZERO, ONE, TWO, EIGHT
      PARAMETER          ( MONE = -1.0D0, ZERO = 0.0D0, ONE = 1.0D0,
     $                   TWO = 2.0D0, EIGHT = 8.0D0 )
 *     ..
 *     .. Local Arrays ..
      INTEGER            CTOT( 4 ), PSM( 4 )
 *     ..
 *     .. Local Scalars ..
      INTEGER            CT, I, IMAX, IQ1, IQ2, J, JMAX, JS, K2, N1P1,
     $                   N2, NJ, PJ
      DOUBLE PRECISION   C, EPS, S, T, TAU, TOL
 *     ..
 *     .. External Functions ..
      INTEGER            IDAMAX
      DOUBLE PRECISION   DLAMCH, DLAPY2
      EXTERNAL           IDAMAX, DLAMCH, DLAPY2
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DCOPY, DLACPY, DLAMRG, DROT, DSCAL, XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, MAX, MIN, SQRT
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input parameters.
 *
      INFO = 0
 *
      IF( N.LT.0 ) THEN
         INFO = -2
      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
         INFO = -6
      ELSE IF( MIN( 1, ( N / 2 ) ).GT.N1 .OR. ( N / 2 ).LT.N1 ) THEN
         INFO = -3
      END IF
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'DLAED2', -INFO )
         RETURN
      END IF
 *
 *     Quick return if possible
 *
      IF( N.EQ.0 )
     $   RETURN
 *
      N2 = N - N1
      N1P1 = N1 + 1
 *
      IF( RHO.LT.ZERO ) THEN
         CALL DSCAL( N2, MONE, Z( N1P1 ), 1 )
      END IF
 *
 *     Normalize z so that norm(z) = 1.  Since z is the concatenation of
 *     two normalized vectors, norm2(z) = sqrt(2).
 *
      T = ONE / SQRT( TWO )
      CALL DSCAL( N, T, Z, 1 )
 *
 *     RHO = ABS( norm(z)**2 * RHO )
 *
      RHO = ABS( TWO*RHO )
 *
 *     Sort the eigenvalues into increasing order
 *
      DO 10 I = N1P1, N
         INDXQ( I ) = INDXQ( I ) + N1
   10 CONTINUE
 *
 *     re-integrate the deflated parts from the last pass
 *
      DO 20 I = 1, N
         DLAMDA( I ) = D( INDXQ( I ) )
   20 CONTINUE
      CALL DLAMRG( N1, N2, DLAMDA, 1, 1, INDXC )
      DO 30 I = 1, N
         INDX( I ) = INDXQ( INDXC( I ) )
   30 CONTINUE
 *
 *     Calculate the allowable deflation tolerance
 *
      IMAX = IDAMAX( N, Z, 1 )
      JMAX = IDAMAX( N, D, 1 )
      EPS = DLAMCH( 'Epsilon' )
      TOL = EIGHT*EPS*MAX( ABS( D( JMAX ) ), ABS( Z( IMAX ) ) )
 *
 *     If the rank-1 modifier is small enough, no more needs to be done
 *     except to reorganize Q so that its columns correspond with the
 *     elements in D.
 *
      IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN
         K = 0
         IQ2 = 1
         DO 40 J = 1, N
            I = INDX( J )
            CALL DCOPY( N, Q( 1, I ), 1, Q2( IQ2 ), 1 )
            DLAMDA( J ) = D( I )
            IQ2 = IQ2 + N
   40    CONTINUE
         CALL DLACPY( 'A', N, N, Q2, N, Q, LDQ )
         CALL DCOPY( N, DLAMDA, 1, D, 1 )
         GO TO 190
      END IF
 *
 *     If there are multiple eigenvalues then the problem deflates.  Here
 *     the number of equal eigenvalues are found.  As each equal
 *     eigenvalue is found, an elementary reflector is computed to rotate
 *     the corresponding eigensubspace so that the corresponding
 *     components of Z are zero in this new basis.
 *
      DO 50 I = 1, N1
         COLTYP( I ) = 1
   50 CONTINUE
      DO 60 I = N1P1, N
         COLTYP( I ) = 3
   60 CONTINUE
 *
 *
      K = 0
      K2 = N + 1
      DO 70 J = 1, N
         NJ = INDX( J )
         IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
 *
 *           Deflate due to small z component.
 *
            K2 = K2 - 1
            COLTYP( NJ ) = 4
            INDXP( K2 ) = NJ
            IF( J.EQ.N )
     $         GO TO 100
         ELSE
            PJ = NJ
            GO TO 80
         END IF
   70 CONTINUE
   80 CONTINUE
      J = J + 1
      NJ = INDX( J )
      IF( J.GT.N )
     $   GO TO 100
      IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
 *
 *        Deflate due to small z component.
 *
         K2 = K2 - 1
         COLTYP( NJ ) = 4
         INDXP( K2 ) = NJ
      ELSE
 *
 *        Check if eigenvalues are close enough to allow deflation.
 *
         S = Z( PJ )
         C = Z( NJ )
 *
 *        Find sqrt(a**2+b**2) without overflow or
 *        destructive underflow.
 *
         TAU = DLAPY2( C, S )
         T = D( NJ ) - D( PJ )
         C = C / TAU
         S = -S / TAU
         IF( ABS( T*C*S ).LE.TOL ) THEN
 *
 *           Deflation is possible.
 *
            Z( NJ ) = TAU
            Z( PJ ) = ZERO
            IF( COLTYP( NJ ).NE.COLTYP( PJ ) )
     $         COLTYP( NJ ) = 2
            COLTYP( PJ ) = 4
            CALL DROT( N, Q( 1, PJ ), 1, Q( 1, NJ ), 1, C, S )
            T = D( PJ )*C**2 + D( NJ )*S**2
            D( NJ ) = D( PJ )*S**2 + D( NJ )*C**2
            D( PJ ) = T
            K2 = K2 - 1
            I = 1
   90       CONTINUE
            IF( K2+I.LE.N ) THEN
               IF( D( PJ ).LT.D( INDXP( K2+I ) ) ) THEN
                  INDXP( K2+I-1 ) = INDXP( K2+I )
                  INDXP( K2+I ) = PJ
                  I = I + 1
                  GO TO 90
               ELSE
                  INDXP( K2+I-1 ) = PJ
               END IF
            ELSE
               INDXP( K2+I-1 ) = PJ
            END IF
            PJ = NJ
         ELSE
            K = K + 1
            DLAMDA( K ) = D( PJ )
            W( K ) = Z( PJ )
            INDXP( K ) = PJ
            PJ = NJ
         END IF
      END IF
      GO TO 80
  100 CONTINUE
 *
 *     Record the last eigenvalue.
 *
      K = K + 1
      DLAMDA( K ) = D( PJ )
      W( K ) = Z( PJ )
      INDXP( K ) = PJ
 *
 *     Count up the total number of the various types of columns, then
 *     form a permutation which positions the four column types into
 *     four uniform groups (although one or more of these groups may be
 *     empty).
 *
      DO 110 J = 1, 4
         CTOT( J ) = 0
  110 CONTINUE
      DO 120 J = 1, N
         CT = COLTYP( J )
         CTOT( CT ) = CTOT( CT ) + 1
  120 CONTINUE
 *
 *     PSM(*) = Position in SubMatrix (of types 1 through 4)
 *
      PSM( 1 ) = 1
      PSM( 2 ) = 1 + CTOT( 1 )
      PSM( 3 ) = PSM( 2 ) + CTOT( 2 )
      PSM( 4 ) = PSM( 3 ) + CTOT( 3 )
      K = N - CTOT( 4 )
 *
 *     Fill out the INDXC array so that the permutation which it induces
 *     will place all type-1 columns first, all type-2 columns next,
 *     then all type-3's, and finally all type-4's.
 *
      DO 130 J = 1, N
         JS = INDXP( J )
         CT = COLTYP( JS )
         INDX( PSM( CT ) ) = JS
         INDXC( PSM( CT ) ) = J
         PSM( CT ) = PSM( CT ) + 1
  130 CONTINUE
 *
 *     Sort the eigenvalues and corresponding eigenvectors into DLAMDA
 *     and Q2 respectively.  The eigenvalues/vectors which were not
 *     deflated go into the first K slots of DLAMDA and Q2 respectively,
 *     while those which were deflated go into the last N - K slots.
 *
      I = 1
      IQ1 = 1
      IQ2 = 1 + ( CTOT( 1 )+CTOT( 2 ) )*N1
      DO 140 J = 1, CTOT( 1 )
         JS = INDX( I )
         CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )
         Z( I ) = D( JS )
         I = I + 1
         IQ1 = IQ1 + N1
  140 CONTINUE
 *
      DO 150 J = 1, CTOT( 2 )
         JS = INDX( I )
         CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )
         CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )
         Z( I ) = D( JS )
         I = I + 1
         IQ1 = IQ1 + N1
         IQ2 = IQ2 + N2
  150 CONTINUE
 *
      DO 160 J = 1, CTOT( 3 )
         JS = INDX( I )
         CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )
         Z( I ) = D( JS )
         I = I + 1
         IQ2 = IQ2 + N2
  160 CONTINUE
 *
      IQ1 = IQ2
      DO 170 J = 1, CTOT( 4 )
         JS = INDX( I )
         CALL DCOPY( N, Q( 1, JS ), 1, Q2( IQ2 ), 1 )
         IQ2 = IQ2 + N
         Z( I ) = D( JS )
         I = I + 1
  170 CONTINUE
 *
 *     The deflated eigenvalues and their corresponding vectors go back
 *     into the last N - K slots of D and Q respectively.
 *
      IF( K.LT.N ) THEN
         CALL DLACPY( 'A', N, CTOT( 4 ), Q2( IQ1 ), N,
     $                Q( 1, K+1 ), LDQ )
         CALL DCOPY( N-K, Z( K+1 ), 1, D( K+1 ), 1 )
      END IF
 *
 *     Copy CTOT into COLTYP for referencing in DLAED3.
 *
      DO 180 J = 1, 4
         COLTYP( J ) = CTOT( J )
  180 CONTINUE
 *
  190 CONTINUE
      RETURN
 *
 *     End of DLAED2
 *
      END
--- a/lib/linalg/dlaed3.cpp
+++ b/lib/linalg/dlaed3.cpp
@ -0,0 +1,138 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 static integer c__1 = 1;
 static doublereal c_b22 = 1.;
 static doublereal c_b23 = 0.;
 int dlaed3_(integer *k, integer *n, integer *n1, doublereal *d__, doublereal *q, integer *ldq,
            doublereal *rho, doublereal *dlamda, doublereal *q2, integer *indx, integer *ctot,
            doublereal *w, doublereal *s, integer *info)
 {
    integer q_dim1, q_offset, i__1, i__2;
    doublereal d__1;
    double sqrt(doublereal), d_lmp_sign(doublereal *, doublereal *);
    integer i__, j, n2, n12, ii, n23, iq2;
    doublereal temp;
    extern doublereal dnrm2_(integer *, doublereal *, integer *);
    extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
                      integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
                      ftnlen, ftnlen),
        dcopy_(integer *, doublereal *, integer *, doublereal *, integer *),
        dlaed4_(integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *,
                doublereal *, integer *);
    extern doublereal dlamc3_(doublereal *, doublereal *);
    extern int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *,
                       integer *, ftnlen),
        dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *,
                ftnlen),
        xerbla_(char *, integer *, ftnlen);
    --d__;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1;
    q -= q_offset;
    --dlamda;
    --q2;
    --indx;
    --ctot;
    --w;
    --s;
    *info = 0;
    if (*k < 0) {
        *info = -1;
    } else if (*n < *k) {
        *info = -2;
    } else if (*ldq < max(1, *n)) {
        *info = -6;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_((char *)"DLAED3", &i__1, (ftnlen)6);
        return 0;
    }
    if (*k == 0) {
        return 0;
    }
    i__1 = *k;
    for (i__ = 1; i__ <= i__1; ++i__) {
        dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__];
    }
    i__1 = *k;
    for (j = 1; j <= i__1; ++j) {
        dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], info);
        if (*info != 0) {
            goto L120;
        }
    }
    if (*k == 1) {
        goto L110;
    }
    if (*k == 2) {
        i__1 = *k;
        for (j = 1; j <= i__1; ++j) {
            w[1] = q[j * q_dim1 + 1];
            w[2] = q[j * q_dim1 + 2];
            ii = indx[1];
            q[j * q_dim1 + 1] = w[ii];
            ii = indx[2];
            q[j * q_dim1 + 2] = w[ii];
        }
        goto L110;
    }
    dcopy_(k, &w[1], &c__1, &s[1], &c__1);
    i__1 = *ldq + 1;
    dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1);
    i__1 = *k;
    for (j = 1; j <= i__1; ++j) {
        i__2 = j - 1;
        for (i__ = 1; i__ <= i__2; ++i__) {
            w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
        }
        i__2 = *k;
        for (i__ = j + 1; i__ <= i__2; ++i__) {
            w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
        }
    }
    i__1 = *k;
    for (i__ = 1; i__ <= i__1; ++i__) {
        d__1 = sqrt(-w[i__]);
        w[i__] = d_lmp_sign(&d__1, &s[i__]);
    }
    i__1 = *k;
    for (j = 1; j <= i__1; ++j) {
        i__2 = *k;
        for (i__ = 1; i__ <= i__2; ++i__) {
            s[i__] = w[i__] / q[i__ + j * q_dim1];
        }
        temp = dnrm2_(k, &s[1], &c__1);
        i__2 = *k;
        for (i__ = 1; i__ <= i__2; ++i__) {
            ii = indx[i__];
            q[i__ + j * q_dim1] = s[ii] / temp;
        }
    }
 L110:
    n2 = *n - *n1;
    n12 = ctot[1] + ctot[2];
    n23 = ctot[2] + ctot[3];
    dlacpy_((char *)"A", &n23, k, &q[ctot[1] + 1 + q_dim1], ldq, &s[1], &n23, (ftnlen)1);
    iq2 = *n1 * n12 + 1;
    if (n23 != 0) {
        dgemm_((char *)"N", (char *)"N", &n2, k, &n23, &c_b22, &q2[iq2], &n2, &s[1], &n23, &c_b23,
               &q[*n1 + 1 + q_dim1], ldq, (ftnlen)1, (ftnlen)1);
    } else {
        dlaset_((char *)"A", &n2, k, &c_b23, &c_b23, &q[*n1 + 1 + q_dim1], ldq, (ftnlen)1);
    }
    dlacpy_((char *)"A", &n12, k, &q[q_offset], ldq, &s[1], &n12, (ftnlen)1);
    if (n12 != 0) {
        dgemm_((char *)"N", (char *)"N", n1, k, &n12, &c_b22, &q2[1], n1, &s[1], &n12, &c_b23, &q[q_offset], ldq,
               (ftnlen)1, (ftnlen)1);
    } else {
        dlaset_((char *)"A", n1, k, &c_b23, &c_b23, &q[q_dim1 + 1], ldq, (ftnlen)1);
    }
 L120:
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dlaed3.f
+++ b/lib/linalg/dlaed3.f
@ -1,350 +0,0 @@
 *> \brief \b DLAED3 used by DSTEDC. Finds the roots of the secular equation and updates the eigenvectors. Used when the original matrix is tridiagonal.
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DLAED3 + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed3.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed3.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed3.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,
 *                          CTOT, W, S, INFO )
 *
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, K, LDQ, N, N1
 *       DOUBLE PRECISION   RHO
 *       ..
 *       .. Array Arguments ..
 *       INTEGER            CTOT( * ), INDX( * )
 *       DOUBLE PRECISION   D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
 *      $                   S( * ), W( * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DLAED3 finds the roots of the secular equation, as defined by the
 *> values in D, W, and RHO, between 1 and K.  It makes the
 *> appropriate calls to DLAED4 and then updates the eigenvectors by
 *> multiplying the matrix of eigenvectors of the pair of eigensystems
 *> being combined by the matrix of eigenvectors of the K-by-K system
 *> which is solved here.
 *>
 *> This code makes very mild assumptions about floating point
 *> arithmetic. It will work on machines with a guard digit in
 *> add/subtract, or on those binary machines without guard digits
 *> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
 *> It could conceivably fail on hexadecimal or decimal machines
 *> without guard digits, but we know of none.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] K
 *> \verbatim
 *>          K is INTEGER
 *>          The number of terms in the rational function to be solved by
 *>          DLAED4.  K >= 0.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The number of rows and columns in the Q matrix.
 *>          N >= K (deflation may result in N>K).
 *> \endverbatim
 *>
 *> \param[in] N1
 *> \verbatim
 *>          N1 is INTEGER
 *>          The location of the last eigenvalue in the leading submatrix.
 *>          min(1,N) <= N1 <= N/2.
 *> \endverbatim
 *>
 *> \param[out] D
 *> \verbatim
 *>          D is DOUBLE PRECISION array, dimension (N)
 *>          D(I) contains the updated eigenvalues for
 *>          1 <= I <= K.
 *> \endverbatim
 *>
 *> \param[out] Q
 *> \verbatim
 *>          Q is DOUBLE PRECISION array, dimension (LDQ,N)
 *>          Initially the first K columns are used as workspace.
 *>          On output the columns 1 to K contain
 *>          the updated eigenvectors.
 *> \endverbatim
 *>
 *> \param[in] LDQ
 *> \verbatim
 *>          LDQ is INTEGER
 *>          The leading dimension of the array Q.  LDQ >= max(1,N).
 *> \endverbatim
 *>
 *> \param[in] RHO
 *> \verbatim
 *>          RHO is DOUBLE PRECISION
 *>          The value of the parameter in the rank one update equation.
 *>          RHO >= 0 required.
 *> \endverbatim
 *>
 *> \param[in,out] DLAMDA
 *> \verbatim
 *>          DLAMDA is DOUBLE PRECISION array, dimension (K)
 *>          The first K elements of this array contain the old roots
 *>          of the deflated updating problem.  These are the poles
 *>          of the secular equation. May be changed on output by
 *>          having lowest order bit set to zero on Cray X-MP, Cray Y-MP,
 *>          Cray-2, or Cray C-90, as described above.
 *> \endverbatim
 *>
 *> \param[in] Q2
 *> \verbatim
 *>          Q2 is DOUBLE PRECISION array, dimension (LDQ2*N)
 *>          The first K columns of this matrix contain the non-deflated
 *>          eigenvectors for the split problem.
 *> \endverbatim
 *>
 *> \param[in] INDX
 *> \verbatim
 *>          INDX is INTEGER array, dimension (N)
 *>          The permutation used to arrange the columns of the deflated
 *>          Q matrix into three groups (see DLAED2).
 *>          The rows of the eigenvectors found by DLAED4 must be likewise
 *>          permuted before the matrix multiply can take place.
 *> \endverbatim
 *>
 *> \param[in] CTOT
 *> \verbatim
 *>          CTOT is INTEGER array, dimension (4)
 *>          A count of the total number of the various types of columns
 *>          in Q, as described in INDX.  The fourth column type is any
 *>          column which has been deflated.
 *> \endverbatim
 *>
 *> \param[in,out] W
 *> \verbatim
 *>          W is DOUBLE PRECISION array, dimension (K)
 *>          The first K elements of this array contain the components
 *>          of the deflation-adjusted updating vector. Destroyed on
 *>          output.
 *> \endverbatim
 *>
 *> \param[out] S
 *> \verbatim
 *>          S is DOUBLE PRECISION array, dimension (N1 + 1)*K
 *>          Will contain the eigenvectors of the repaired matrix which
 *>          will be multiplied by the previously accumulated eigenvectors
 *>          to update the system.
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0:  successful exit.
 *>          < 0:  if INFO = -i, the i-th argument had an illegal value.
 *>          > 0:  if INFO = 1, an eigenvalue did not converge
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup auxOTHERcomputational
 *
 *> \par Contributors:
 *  ==================
 *>
 *> Jeff Rutter, Computer Science Division, University of California
 *> at Berkeley, USA \n
 *>  Modified by Francoise Tisseur, University of Tennessee
 *>
 *  =====================================================================
      SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,
     $                   CTOT, W, S, INFO )
 *
 *  -- LAPACK computational routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER            INFO, K, LDQ, N, N1
      DOUBLE PRECISION   RHO
 *     ..
 *     .. Array Arguments ..
      INTEGER            CTOT( * ), INDX( * )
      DOUBLE PRECISION   D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
     $                   S( * ), W( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ONE, ZERO
      PARAMETER          ( ONE = 1.0D0, ZERO = 0.0D0 )
 *     ..
 *     .. Local Scalars ..
      INTEGER            I, II, IQ2, J, N12, N2, N23
      DOUBLE PRECISION   TEMP
 *     ..
 *     .. External Functions ..
      DOUBLE PRECISION   DLAMC3, DNRM2
      EXTERNAL           DLAMC3, DNRM2
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DCOPY, DGEMM, DLACPY, DLAED4, DLASET, XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          MAX, SIGN, SQRT
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input parameters.
 *
      INFO = 0
 *
      IF( K.LT.0 ) THEN
         INFO = -1
      ELSE IF( N.LT.K ) THEN
         INFO = -2
      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
         INFO = -6
      END IF
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'DLAED3', -INFO )
         RETURN
      END IF
 *
 *     Quick return if possible
 *
      IF( K.EQ.0 )
     $   RETURN
 *
 *     Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
 *     be computed with high relative accuracy (barring over/underflow).
 *     This is a problem on machines without a guard digit in
 *     add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
 *     The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
 *     which on any of these machines zeros out the bottommost
 *     bit of DLAMDA(I) if it is 1; this makes the subsequent
 *     subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
 *     occurs. On binary machines with a guard digit (almost all
 *     machines) it does not change DLAMDA(I) at all. On hexadecimal
 *     and decimal machines with a guard digit, it slightly
 *     changes the bottommost bits of DLAMDA(I). It does not account
 *     for hexadecimal or decimal machines without guard digits
 *     (we know of none). We use a subroutine call to compute
 *     2*DLAMBDA(I) to prevent optimizing compilers from eliminating
 *     this code.
 *
      DO 10 I = 1, K
         DLAMDA( I ) = DLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I )
   10 CONTINUE
 *
      DO 20 J = 1, K
         CALL DLAED4( K, J, DLAMDA, W, Q( 1, J ), RHO, D( J ), INFO )
 *
 *        If the zero finder fails, the computation is terminated.
 *
         IF( INFO.NE.0 )
     $      GO TO 120
   20 CONTINUE
 *
      IF( K.EQ.1 )
     $   GO TO 110
      IF( K.EQ.2 ) THEN
         DO 30 J = 1, K
            W( 1 ) = Q( 1, J )
            W( 2 ) = Q( 2, J )
            II = INDX( 1 )
            Q( 1, J ) = W( II )
            II = INDX( 2 )
            Q( 2, J ) = W( II )
   30    CONTINUE
         GO TO 110
      END IF
 *
 *     Compute updated W.
 *
      CALL DCOPY( K, W, 1, S, 1 )
 *
 *     Initialize W(I) = Q(I,I)
 *
      CALL DCOPY( K, Q, LDQ+1, W, 1 )
      DO 60 J = 1, K
         DO 40 I = 1, J - 1
            W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )
   40    CONTINUE
         DO 50 I = J + 1, K
            W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )
   50    CONTINUE
   60 CONTINUE
      DO 70 I = 1, K
         W( I ) = SIGN( SQRT( -W( I ) ), S( I ) )
   70 CONTINUE
 *
 *     Compute eigenvectors of the modified rank-1 modification.
 *
      DO 100 J = 1, K
         DO 80 I = 1, K
            S( I ) = W( I ) / Q( I, J )
   80    CONTINUE
         TEMP = DNRM2( K, S, 1 )
         DO 90 I = 1, K
            II = INDX( I )
            Q( I, J ) = S( II ) / TEMP
   90    CONTINUE
  100 CONTINUE
 *
 *     Compute the updated eigenvectors.
 *
  110 CONTINUE
 *
      N2 = N - N1
      N12 = CTOT( 1 ) + CTOT( 2 )
      N23 = CTOT( 2 ) + CTOT( 3 )
 *
      CALL DLACPY( 'A', N23, K, Q( CTOT( 1 )+1, 1 ), LDQ, S, N23 )
      IQ2 = N1*N12 + 1
      IF( N23.NE.0 ) THEN
         CALL DGEMM( 'N', 'N', N2, K, N23, ONE, Q2( IQ2 ), N2, S, N23,
     $               ZERO, Q( N1+1, 1 ), LDQ )
      ELSE
         CALL DLASET( 'A', N2, K, ZERO, ZERO, Q( N1+1, 1 ), LDQ )
      END IF
 *
      CALL DLACPY( 'A', N12, K, Q, LDQ, S, N12 )
      IF( N12.NE.0 ) THEN
         CALL DGEMM( 'N', 'N', N1, K, N12, ONE, Q2, N1, S, N12, ZERO, Q,
     $               LDQ )
      ELSE
         CALL DLASET( 'A', N1, K, ZERO, ZERO, Q( 1, 1 ), LDQ )
      END IF
 *
 *
  120 CONTINUE
      RETURN
 *
 *     End of DLAED3
 *
      END
--- a/lib/linalg/dlaed4.cpp
+++ b/lib/linalg/dlaed4.cpp
@ -0,0 +1,571 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 int dlaed4_(integer *n, integer *i__, doublereal *d__, doublereal *z__, doublereal *delta,
            doublereal *rho, doublereal *dlam, integer *info)
 {
    integer i__1;
    doublereal d__1;
    double sqrt(doublereal);
    doublereal a, b, c__;
    integer j;
    doublereal w;
    integer ii;
    doublereal dw, zz[3];
    integer ip1;
    doublereal del, eta, phi, eps, tau, psi;
    integer iim1, iip1;
    doublereal dphi, dpsi;
    integer iter;
    doublereal temp, prew, temp1, dltlb, dltub, midpt;
    integer niter;
    logical swtch;
    extern int dlaed5_(integer *, doublereal *, doublereal *, doublereal *, doublereal *,
                       doublereal *),
        dlaed6_(integer *, logical *, doublereal *, doublereal *, doublereal *, doublereal *,
                doublereal *, integer *);
    logical swtch3;
    extern doublereal dlamch_(char *, ftnlen);
    logical orgati;
    doublereal erretm, rhoinv;
    --delta;
    --z__;
    --d__;
    *info = 0;
    if (*n == 1) {
        *dlam = d__[1] + *rho * z__[1] * z__[1];
        delta[1] = 1.;
        return 0;
    }
    if (*n == 2) {
        dlaed5_(i__, &d__[1], &z__[1], &delta[1], rho, dlam);
        return 0;
    }
    eps = dlamch_((char *)"Epsilon", (ftnlen)7);
    rhoinv = 1. / *rho;
    if (*i__ == *n) {
        ii = *n - 1;
        niter = 1;
        midpt = *rho / 2.;
        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            delta[j] = d__[j] - d__[*i__] - midpt;
        }
        psi = 0.;
        i__1 = *n - 2;
        for (j = 1; j <= i__1; ++j) {
            psi += z__[j] * z__[j] / delta[j];
        }
        c__ = rhoinv + psi;
        w = c__ + z__[ii] * z__[ii] / delta[ii] + z__[*n] * z__[*n] / delta[*n];
        if (w <= 0.) {
            temp = z__[*n - 1] * z__[*n - 1] / (d__[*n] - d__[*n - 1] + *rho) +
                   z__[*n] * z__[*n] / *rho;
            if (c__ <= temp) {
                tau = *rho;
            } else {
                del = d__[*n] - d__[*n - 1];
                a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n];
                b = z__[*n] * z__[*n] * del;
                if (a < 0.) {
                    tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
                } else {
                    tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
                }
            }
            dltlb = midpt;
            dltub = *rho;
        } else {
            del = d__[*n] - d__[*n - 1];
            a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n];
            b = z__[*n] * z__[*n] * del;
            if (a < 0.) {
                tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
            } else {
                tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
            }
            dltlb = 0.;
            dltub = midpt;
        }
        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            delta[j] = d__[j] - d__[*i__] - tau;
        }
        dpsi = 0.;
        psi = 0.;
        erretm = 0.;
        i__1 = ii;
        for (j = 1; j <= i__1; ++j) {
            temp = z__[j] / delta[j];
            psi += z__[j] * temp;
            dpsi += temp * temp;
            erretm += psi;
        }
        erretm = abs(erretm);
        temp = z__[*n] / delta[*n];
        phi = z__[*n] * temp;
        dphi = temp * temp;
        erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + dphi);
        w = rhoinv + phi + psi;
        if (abs(w) <= eps * erretm) {
            *dlam = d__[*i__] + tau;
            goto L250;
        }
        if (w <= 0.) {
            dltlb = max(dltlb, tau);
        } else {
            dltub = min(dltub, tau);
        }
        ++niter;
        c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi;
        a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * (dpsi + dphi);
        b = delta[*n - 1] * delta[*n] * w;
        if (c__ < 0.) {
            c__ = abs(c__);
        }
        if (c__ == 0.) {
            eta = -w / (dpsi + dphi);
        } else if (a >= 0.) {
            eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
        } else {
            eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
        }
        if (w * eta > 0.) {
            eta = -w / (dpsi + dphi);
        }
        temp = tau + eta;
        if (temp > dltub || temp < dltlb) {
            if (w < 0.) {
                eta = (dltub - tau) / 2.;
            } else {
                eta = (dltlb - tau) / 2.;
            }
        }
        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            delta[j] -= eta;
        }
        tau += eta;
        dpsi = 0.;
        psi = 0.;
        erretm = 0.;
        i__1 = ii;
        for (j = 1; j <= i__1; ++j) {
            temp = z__[j] / delta[j];
            psi += z__[j] * temp;
            dpsi += temp * temp;
            erretm += psi;
        }
        erretm = abs(erretm);
        temp = z__[*n] / delta[*n];
        phi = z__[*n] * temp;
        dphi = temp * temp;
        erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + dphi);
        w = rhoinv + phi + psi;
        iter = niter + 1;
        for (niter = iter; niter <= 30; ++niter) {
            if (abs(w) <= eps * erretm) {
                *dlam = d__[*i__] + tau;
                goto L250;
            }
            if (w <= 0.) {
                dltlb = max(dltlb, tau);
            } else {
                dltub = min(dltub, tau);
            }
            c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi;
            a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * (dpsi + dphi);
            b = delta[*n - 1] * delta[*n] * w;
            if (a >= 0.) {
                eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
            } else {
                eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
            }
            if (w * eta > 0.) {
                eta = -w / (dpsi + dphi);
            }
            temp = tau + eta;
            if (temp > dltub || temp < dltlb) {
                if (w < 0.) {
                    eta = (dltub - tau) / 2.;
                } else {
                    eta = (dltlb - tau) / 2.;
                }
            }
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                delta[j] -= eta;
            }
            tau += eta;
            dpsi = 0.;
            psi = 0.;
            erretm = 0.;
            i__1 = ii;
            for (j = 1; j <= i__1; ++j) {
                temp = z__[j] / delta[j];
                psi += z__[j] * temp;
                dpsi += temp * temp;
                erretm += psi;
            }
            erretm = abs(erretm);
            temp = z__[*n] / delta[*n];
            phi = z__[*n] * temp;
            dphi = temp * temp;
            erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + dphi);
            w = rhoinv + phi + psi;
        }
        *info = 1;
        *dlam = d__[*i__] + tau;
        goto L250;
    } else {
        niter = 1;
        ip1 = *i__ + 1;
        del = d__[ip1] - d__[*i__];
        midpt = del / 2.;
        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            delta[j] = d__[j] - d__[*i__] - midpt;
        }
        psi = 0.;
        i__1 = *i__ - 1;
        for (j = 1; j <= i__1; ++j) {
            psi += z__[j] * z__[j] / delta[j];
        }
        phi = 0.;
        i__1 = *i__ + 2;
        for (j = *n; j >= i__1; --j) {
            phi += z__[j] * z__[j] / delta[j];
        }
        c__ = rhoinv + psi + phi;
        w = c__ + z__[*i__] * z__[*i__] / delta[*i__] + z__[ip1] * z__[ip1] / delta[ip1];
        if (w > 0.) {
            orgati = TRUE_;
            a = c__ * del + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1];
            b = z__[*i__] * z__[*i__] * del;
            if (a > 0.) {
                tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
            } else {
                tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
            }
            dltlb = 0.;
            dltub = midpt;
        } else {
            orgati = FALSE_;
            a = c__ * del - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1];
            b = z__[ip1] * z__[ip1] * del;
            if (a < 0.) {
                tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs(d__1))));
            } else {
                tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) / (c__ * 2.);
            }
            dltlb = -midpt;
            dltub = 0.;
        }
        if (orgati) {
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                delta[j] = d__[j] - d__[*i__] - tau;
            }
        } else {
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                delta[j] = d__[j] - d__[ip1] - tau;
            }
        }
        if (orgati) {
            ii = *i__;
        } else {
            ii = *i__ + 1;
        }
        iim1 = ii - 1;
        iip1 = ii + 1;
        dpsi = 0.;
        psi = 0.;
        erretm = 0.;
        i__1 = iim1;
        for (j = 1; j <= i__1; ++j) {
            temp = z__[j] / delta[j];
            psi += z__[j] * temp;
            dpsi += temp * temp;
            erretm += psi;
        }
        erretm = abs(erretm);
        dphi = 0.;
        phi = 0.;
        i__1 = iip1;
        for (j = *n; j >= i__1; --j) {
            temp = z__[j] / delta[j];
            phi += z__[j] * temp;
            dphi += temp * temp;
            erretm += phi;
        }
        w = rhoinv + phi + psi;
        swtch3 = FALSE_;
        if (orgati) {
            if (w < 0.) {
                swtch3 = TRUE_;
            }
        } else {
            if (w > 0.) {
                swtch3 = TRUE_;
            }
        }
        if (ii == 1 || ii == *n) {
            swtch3 = FALSE_;
        }
        temp = z__[ii] / delta[ii];
        dw = dpsi + dphi + temp * temp;
        temp = z__[ii] * temp;
        w += temp;
        erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + abs(tau) * dw;
        if (abs(w) <= eps * erretm) {
            if (orgati) {
                *dlam = d__[*i__] + tau;
            } else {
                *dlam = d__[ip1] + tau;
            }
            goto L250;
        }
        if (w <= 0.) {
            dltlb = max(dltlb, tau);
        } else {
            dltub = min(dltub, tau);
        }
        ++niter;
        if (!swtch3) {
            if (orgati) {
                d__1 = z__[*i__] / delta[*i__];
                c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (d__1 * d__1);
            } else {
                d__1 = z__[ip1] / delta[ip1];
                c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (d__1 * d__1);
            }
            a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] * dw;
            b = delta[*i__] * delta[ip1] * w;
            if (c__ == 0.) {
                if (a == 0.) {
                    if (orgati) {
                        a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] * (dpsi + dphi);
                    } else {
                        a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] * (dpsi + dphi);
                    }
                }
                eta = b / a;
            } else if (a <= 0.) {
                eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
            } else {
                eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
            }
        } else {
            temp = rhoinv + psi + phi;
            if (orgati) {
                temp1 = z__[iim1] / delta[iim1];
                temp1 *= temp1;
                c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[iip1]) * temp1;
                zz[0] = z__[iim1] * z__[iim1];
                zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi);
            } else {
                temp1 = z__[iip1] / delta[iip1];
                temp1 *= temp1;
                c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[iim1]) * temp1;
                zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1));
                zz[2] = z__[iip1] * z__[iip1];
            }
            zz[1] = z__[ii] * z__[ii];
            dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info);
            if (*info != 0) {
                goto L250;
            }
        }
        if (w * eta >= 0.) {
            eta = -w / dw;
        }
        temp = tau + eta;
        if (temp > dltub || temp < dltlb) {
            if (w < 0.) {
                eta = (dltub - tau) / 2.;
            } else {
                eta = (dltlb - tau) / 2.;
            }
        }
        prew = w;
        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            delta[j] -= eta;
        }
        dpsi = 0.;
        psi = 0.;
        erretm = 0.;
        i__1 = iim1;
        for (j = 1; j <= i__1; ++j) {
            temp = z__[j] / delta[j];
            psi += z__[j] * temp;
            dpsi += temp * temp;
            erretm += psi;
        }
        erretm = abs(erretm);
        dphi = 0.;
        phi = 0.;
        i__1 = iip1;
        for (j = *n; j >= i__1; --j) {
            temp = z__[j] / delta[j];
            phi += z__[j] * temp;
            dphi += temp * temp;
            erretm += phi;
        }
        temp = z__[ii] / delta[ii];
        dw = dpsi + dphi + temp * temp;
        temp = z__[ii] * temp;
        w = rhoinv + phi + psi + temp;
        erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. +
                 (d__1 = tau + eta, abs(d__1)) * dw;
        swtch = FALSE_;
        if (orgati) {
            if (-w > abs(prew) / 10.) {
                swtch = TRUE_;
            }
        } else {
            if (w > abs(prew) / 10.) {
                swtch = TRUE_;
            }
        }
        tau += eta;
        iter = niter + 1;
        for (niter = iter; niter <= 30; ++niter) {
            if (abs(w) <= eps * erretm) {
                if (orgati) {
                    *dlam = d__[*i__] + tau;
                } else {
                    *dlam = d__[ip1] + tau;
                }
                goto L250;
            }
            if (w <= 0.) {
                dltlb = max(dltlb, tau);
            } else {
                dltub = min(dltub, tau);
            }
            if (!swtch3) {
                if (!swtch) {
                    if (orgati) {
                        d__1 = z__[*i__] / delta[*i__];
                        c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (d__1 * d__1);
                    } else {
                        d__1 = z__[ip1] / delta[ip1];
                        c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (d__1 * d__1);
                    }
                } else {
                    temp = z__[ii] / delta[ii];
                    if (orgati) {
                        dpsi += temp * temp;
                    } else {
                        dphi += temp * temp;
                    }
                    c__ = w - delta[*i__] * dpsi - delta[ip1] * dphi;
                }
                a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] * dw;
                b = delta[*i__] * delta[ip1] * w;
                if (c__ == 0.) {
                    if (a == 0.) {
                        if (!swtch) {
                            if (orgati) {
                                a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] * (dpsi + dphi);
                            } else {
                                a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] * (dpsi + dphi);
                            }
                        } else {
                            a = delta[*i__] * delta[*i__] * dpsi + delta[ip1] * delta[ip1] * dphi;
                        }
                    }
                    eta = b / a;
                } else if (a <= 0.) {
                    eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
                } else {
                    eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
                }
            } else {
                temp = rhoinv + psi + phi;
                if (swtch) {
                    c__ = temp - delta[iim1] * dpsi - delta[iip1] * dphi;
                    zz[0] = delta[iim1] * delta[iim1] * dpsi;
                    zz[2] = delta[iip1] * delta[iip1] * dphi;
                } else {
                    if (orgati) {
                        temp1 = z__[iim1] / delta[iim1];
                        temp1 *= temp1;
                        c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[iip1]) * temp1;
                        zz[0] = z__[iim1] * z__[iim1];
                        zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi);
                    } else {
                        temp1 = z__[iip1] / delta[iip1];
                        temp1 *= temp1;
                        c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[iim1]) * temp1;
                        zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1));
                        zz[2] = z__[iip1] * z__[iip1];
                    }
                }
                dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info);
                if (*info != 0) {
                    goto L250;
                }
            }
            if (w * eta >= 0.) {
                eta = -w / dw;
            }
            temp = tau + eta;
            if (temp > dltub || temp < dltlb) {
                if (w < 0.) {
                    eta = (dltub - tau) / 2.;
                } else {
                    eta = (dltlb - tau) / 2.;
                }
            }
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                delta[j] -= eta;
            }
            tau += eta;
            prew = w;
            dpsi = 0.;
            psi = 0.;
            erretm = 0.;
            i__1 = iim1;
            for (j = 1; j <= i__1; ++j) {
                temp = z__[j] / delta[j];
                psi += z__[j] * temp;
                dpsi += temp * temp;
                erretm += psi;
            }
            erretm = abs(erretm);
            dphi = 0.;
            phi = 0.;
            i__1 = iip1;
            for (j = *n; j >= i__1; --j) {
                temp = z__[j] / delta[j];
                phi += z__[j] * temp;
                dphi += temp * temp;
                erretm += phi;
            }
            temp = z__[ii] / delta[ii];
            dw = dpsi + dphi + temp * temp;
            temp = z__[ii] * temp;
            w = rhoinv + phi + psi + temp;
            erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + abs(tau) * dw;
            if (w * prew > 0. && abs(w) > abs(prew) / 10.) {
                swtch = !swtch;
            }
        }
        *info = 1;
        if (orgati) {
            *dlam = d__[*i__] + tau;
        } else {
            *dlam = d__[ip1] + tau;
        }
    }
 L250:
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dlaed4.f
+++ b/lib/linalg/dlaed4.f
@ -1,917 +0,0 @@
 *> \brief \b DLAED4 used by DSTEDC. Finds a single root of the secular equation.
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DLAED4 + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed4.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed4.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed4.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLAED4( N, I, D, Z, DELTA, RHO, DLAM, INFO )
 *
 *       .. Scalar Arguments ..
 *       INTEGER            I, INFO, N
 *       DOUBLE PRECISION   DLAM, RHO
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   D( * ), DELTA( * ), Z( * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> This subroutine computes the I-th updated eigenvalue of a symmetric
 *> rank-one modification to a diagonal matrix whose elements are
 *> given in the array d, and that
 *>
 *>            D(i) < D(j)  for  i < j
 *>
 *> and that RHO > 0.  This is arranged by the calling routine, and is
 *> no loss in generality.  The rank-one modified system is thus
 *>
 *>            diag( D )  +  RHO * Z * Z_transpose.
 *>
 *> where we assume the Euclidean norm of Z is 1.
 *>
 *> The method consists of approximating the rational functions in the
 *> secular equation by simpler interpolating rational functions.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>         The length of all arrays.
 *> \endverbatim
 *>
 *> \param[in] I
 *> \verbatim
 *>          I is INTEGER
 *>         The index of the eigenvalue to be computed.  1 <= I <= N.
 *> \endverbatim
 *>
 *> \param[in] D
 *> \verbatim
 *>          D is DOUBLE PRECISION array, dimension (N)
 *>         The original eigenvalues.  It is assumed that they are in
 *>         order, D(I) < D(J)  for I < J.
 *> \endverbatim
 *>
 *> \param[in] Z
 *> \verbatim
 *>          Z is DOUBLE PRECISION array, dimension (N)
 *>         The components of the updating vector.
 *> \endverbatim
 *>
 *> \param[out] DELTA
 *> \verbatim
 *>          DELTA is DOUBLE PRECISION array, dimension (N)
 *>         If N > 2, DELTA contains (D(j) - lambda_I) in its  j-th
 *>         component.  If N = 1, then DELTA(1) = 1. If N = 2, see DLAED5
 *>         for detail. The vector DELTA contains the information necessary
 *>         to construct the eigenvectors by DLAED3 and DLAED9.
 *> \endverbatim
 *>
 *> \param[in] RHO
 *> \verbatim
 *>          RHO is DOUBLE PRECISION
 *>         The scalar in the symmetric updating formula.
 *> \endverbatim
 *>
 *> \param[out] DLAM
 *> \verbatim
 *>          DLAM is DOUBLE PRECISION
 *>         The computed lambda_I, the I-th updated eigenvalue.
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>         = 0:  successful exit
 *>         > 0:  if INFO = 1, the updating process failed.
 *> \endverbatim
 *
 *> \par Internal Parameters:
 *  =========================
 *>
 *> \verbatim
 *>  Logical variable ORGATI (origin-at-i?) is used for distinguishing
 *>  whether D(i) or D(i+1) is treated as the origin.
 *>
 *>            ORGATI = .true.    origin at i
 *>            ORGATI = .false.   origin at i+1
 *>
 *>   Logical variable SWTCH3 (switch-for-3-poles?) is for noting
 *>   if we are working with THREE poles!
 *>
 *>   MAXIT is the maximum number of iterations allowed for each
 *>   eigenvalue.
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup auxOTHERcomputational
 *
 *> \par Contributors:
 *  ==================
 *>
 *>     Ren-Cang Li, Computer Science Division, University of California
 *>     at Berkeley, USA
 *>
 *  =====================================================================
      SUBROUTINE DLAED4( N, I, D, Z, DELTA, RHO, DLAM, INFO )
 *
 *  -- LAPACK computational routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER            I, INFO, N
      DOUBLE PRECISION   DLAM, RHO
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION   D( * ), DELTA( * ), Z( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      INTEGER            MAXIT
      PARAMETER          ( MAXIT = 30 )
      DOUBLE PRECISION   ZERO, ONE, TWO, THREE, FOUR, EIGHT, TEN
      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0,
     $                   THREE = 3.0D0, FOUR = 4.0D0, EIGHT = 8.0D0,
     $                   TEN = 10.0D0 )
 *     ..
 *     .. Local Scalars ..
      LOGICAL            ORGATI, SWTCH, SWTCH3
      INTEGER            II, IIM1, IIP1, IP1, ITER, J, NITER
      DOUBLE PRECISION   A, B, C, DEL, DLTLB, DLTUB, DPHI, DPSI, DW,
     $                   EPS, ERRETM, ETA, MIDPT, PHI, PREW, PSI,
     $                   RHOINV, TAU, TEMP, TEMP1, W
 *     ..
 *     .. Local Arrays ..
      DOUBLE PRECISION   ZZ( 3 )
 *     ..
 *     .. External Functions ..
      DOUBLE PRECISION   DLAMCH
      EXTERNAL           DLAMCH
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLAED5, DLAED6
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, MAX, MIN, SQRT
 *     ..
 *     .. Executable Statements ..
 *
 *     Since this routine is called in an inner loop, we do no argument
 *     checking.
 *
 *     Quick return for N=1 and 2.
 *
      INFO = 0
      IF( N.EQ.1 ) THEN
 *
 *         Presumably, I=1 upon entry
 *
         DLAM = D( 1 ) + RHO*Z( 1 )*Z( 1 )
         DELTA( 1 ) = ONE
         RETURN
      END IF
      IF( N.EQ.2 ) THEN
         CALL DLAED5( I, D, Z, DELTA, RHO, DLAM )
         RETURN
      END IF
 *
 *     Compute machine epsilon
 *
      EPS = DLAMCH( 'Epsilon' )
      RHOINV = ONE / RHO
 *
 *     The case I = N
 *
      IF( I.EQ.N ) THEN
 *
 *        Initialize some basic variables
 *
         II = N - 1
         NITER = 1
 *
 *        Calculate initial guess
 *
         MIDPT = RHO / TWO
 *
 *        If ||Z||_2 is not one, then TEMP should be set to
 *        RHO * ||Z||_2^2 / TWO
 *
         DO 10 J = 1, N
            DELTA( J ) = ( D( J )-D( I ) ) - MIDPT
   10    CONTINUE
 *
         PSI = ZERO
         DO 20 J = 1, N - 2
            PSI = PSI + Z( J )*Z( J ) / DELTA( J )
   20    CONTINUE
 *
         C = RHOINV + PSI
         W = C + Z( II )*Z( II ) / DELTA( II ) +
     $       Z( N )*Z( N ) / DELTA( N )
 *
         IF( W.LE.ZERO ) THEN
            TEMP = Z( N-1 )*Z( N-1 ) / ( D( N )-D( N-1 )+RHO ) +
     $             Z( N )*Z( N ) / RHO
            IF( C.LE.TEMP ) THEN
               TAU = RHO
            ELSE
               DEL = D( N ) - D( N-1 )
               A = -C*DEL + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N )
               B = Z( N )*Z( N )*DEL
               IF( A.LT.ZERO ) THEN
                  TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A )
               ELSE
                  TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C )
               END IF
            END IF
 *
 *           It can be proved that
 *               D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO
 *
            DLTLB = MIDPT
            DLTUB = RHO
         ELSE
            DEL = D( N ) - D( N-1 )
            A = -C*DEL + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N )
            B = Z( N )*Z( N )*DEL
            IF( A.LT.ZERO ) THEN
               TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A )
            ELSE
               TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C )
            END IF
 *
 *           It can be proved that
 *               D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2
 *
            DLTLB = ZERO
            DLTUB = MIDPT
         END IF
 *
         DO 30 J = 1, N
            DELTA( J ) = ( D( J )-D( I ) ) - TAU
   30    CONTINUE
 *
 *        Evaluate PSI and the derivative DPSI
 *
         DPSI = ZERO
         PSI = ZERO
         ERRETM = ZERO
         DO 40 J = 1, II
            TEMP = Z( J ) / DELTA( J )
            PSI = PSI + Z( J )*TEMP
            DPSI = DPSI + TEMP*TEMP
            ERRETM = ERRETM + PSI
   40    CONTINUE
         ERRETM = ABS( ERRETM )
 *
 *        Evaluate PHI and the derivative DPHI
 *
         TEMP = Z( N ) / DELTA( N )
         PHI = Z( N )*TEMP
         DPHI = TEMP*TEMP
         ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +
     $            ABS( TAU )*( DPSI+DPHI )
 *
         W = RHOINV + PHI + PSI
 *
 *        Test for convergence
 *
         IF( ABS( W ).LE.EPS*ERRETM ) THEN
            DLAM = D( I ) + TAU
            GO TO 250
         END IF
 *
         IF( W.LE.ZERO ) THEN
            DLTLB = MAX( DLTLB, TAU )
         ELSE
            DLTUB = MIN( DLTUB, TAU )
         END IF
 *
 *        Calculate the new step
 *
         NITER = NITER + 1
         C = W - DELTA( N-1 )*DPSI - DELTA( N )*DPHI
         A = ( DELTA( N-1 )+DELTA( N ) )*W -
     $       DELTA( N-1 )*DELTA( N )*( DPSI+DPHI )
         B = DELTA( N-1 )*DELTA( N )*W
         IF( C.LT.ZERO )
     $      C = ABS( C )
         IF( C.EQ.ZERO ) THEN
 *           ETA = B/A
 *           ETA = RHO - TAU
 *           ETA = DLTUB - TAU
 *
 *           Update proposed by Li, Ren-Cang:
            ETA = -W / ( DPSI+DPHI )
         ELSE IF( A.GE.ZERO ) THEN
            ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
         ELSE
            ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) )
         END IF
 *
 *        Note, eta should be positive if w is negative, and
 *        eta should be negative otherwise. However,
 *        if for some reason caused by roundoff, eta*w > 0,
 *        we simply use one Newton step instead. This way
 *        will guarantee eta*w < 0.
 *
         IF( W*ETA.GT.ZERO )
     $      ETA = -W / ( DPSI+DPHI )
         TEMP = TAU + ETA
         IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN
            IF( W.LT.ZERO ) THEN
               ETA = ( DLTUB-TAU ) / TWO
            ELSE
               ETA = ( DLTLB-TAU ) / TWO
            END IF
         END IF
         DO 50 J = 1, N
            DELTA( J ) = DELTA( J ) - ETA
   50    CONTINUE
 *
         TAU = TAU + ETA
 *
 *        Evaluate PSI and the derivative DPSI
 *
         DPSI = ZERO
         PSI = ZERO
         ERRETM = ZERO
         DO 60 J = 1, II
            TEMP = Z( J ) / DELTA( J )
            PSI = PSI + Z( J )*TEMP
            DPSI = DPSI + TEMP*TEMP
            ERRETM = ERRETM + PSI
   60    CONTINUE
         ERRETM = ABS( ERRETM )
 *
 *        Evaluate PHI and the derivative DPHI
 *
         TEMP = Z( N ) / DELTA( N )
         PHI = Z( N )*TEMP
         DPHI = TEMP*TEMP
         ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +
     $            ABS( TAU )*( DPSI+DPHI )
 *
         W = RHOINV + PHI + PSI
 *
 *        Main loop to update the values of the array   DELTA
 *
         ITER = NITER + 1
 *
         DO 90 NITER = ITER, MAXIT
 *
 *           Test for convergence
 *
            IF( ABS( W ).LE.EPS*ERRETM ) THEN
               DLAM = D( I ) + TAU
               GO TO 250
            END IF
 *
            IF( W.LE.ZERO ) THEN
               DLTLB = MAX( DLTLB, TAU )
            ELSE
               DLTUB = MIN( DLTUB, TAU )
            END IF
 *
 *           Calculate the new step
 *
            C = W - DELTA( N-1 )*DPSI - DELTA( N )*DPHI
            A = ( DELTA( N-1 )+DELTA( N ) )*W -
     $          DELTA( N-1 )*DELTA( N )*( DPSI+DPHI )
            B = DELTA( N-1 )*DELTA( N )*W
            IF( A.GE.ZERO ) THEN
               ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
            ELSE
               ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) )
            END IF
 *
 *           Note, eta should be positive if w is negative, and
 *           eta should be negative otherwise. However,
 *           if for some reason caused by roundoff, eta*w > 0,
 *           we simply use one Newton step instead. This way
 *           will guarantee eta*w < 0.
 *
            IF( W*ETA.GT.ZERO )
     $         ETA = -W / ( DPSI+DPHI )
            TEMP = TAU + ETA
            IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN
               IF( W.LT.ZERO ) THEN
                  ETA = ( DLTUB-TAU ) / TWO
               ELSE
                  ETA = ( DLTLB-TAU ) / TWO
               END IF
            END IF
            DO 70 J = 1, N
               DELTA( J ) = DELTA( J ) - ETA
   70       CONTINUE
 *
            TAU = TAU + ETA
 *
 *           Evaluate PSI and the derivative DPSI
 *
            DPSI = ZERO
            PSI = ZERO
            ERRETM = ZERO
            DO 80 J = 1, II
               TEMP = Z( J ) / DELTA( J )
               PSI = PSI + Z( J )*TEMP
               DPSI = DPSI + TEMP*TEMP
               ERRETM = ERRETM + PSI
   80       CONTINUE
            ERRETM = ABS( ERRETM )
 *
 *           Evaluate PHI and the derivative DPHI
 *
            TEMP = Z( N ) / DELTA( N )
            PHI = Z( N )*TEMP
            DPHI = TEMP*TEMP
            ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +
     $               ABS( TAU )*( DPSI+DPHI )
 *
            W = RHOINV + PHI + PSI
   90    CONTINUE
 *
 *        Return with INFO = 1, NITER = MAXIT and not converged
 *
         INFO = 1
         DLAM = D( I ) + TAU
         GO TO 250
 *
 *        End for the case I = N
 *
      ELSE
 *
 *        The case for I < N
 *
         NITER = 1
         IP1 = I + 1
 *
 *        Calculate initial guess
 *
         DEL = D( IP1 ) - D( I )
         MIDPT = DEL / TWO
         DO 100 J = 1, N
            DELTA( J ) = ( D( J )-D( I ) ) - MIDPT
  100    CONTINUE
 *
         PSI = ZERO
         DO 110 J = 1, I - 1
            PSI = PSI + Z( J )*Z( J ) / DELTA( J )
  110    CONTINUE
 *
         PHI = ZERO
         DO 120 J = N, I + 2, -1
            PHI = PHI + Z( J )*Z( J ) / DELTA( J )
  120    CONTINUE
         C = RHOINV + PSI + PHI
         W = C + Z( I )*Z( I ) / DELTA( I ) +
     $       Z( IP1 )*Z( IP1 ) / DELTA( IP1 )
 *
         IF( W.GT.ZERO ) THEN
 *
 *           d(i)< the ith eigenvalue < (d(i)+d(i+1))/2
 *
 *           We choose d(i) as origin.
 *
            ORGATI = .TRUE.
            A = C*DEL + Z( I )*Z( I ) + Z( IP1 )*Z( IP1 )
            B = Z( I )*Z( I )*DEL
            IF( A.GT.ZERO ) THEN
               TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
            ELSE
               TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
            END IF
            DLTLB = ZERO
            DLTUB = MIDPT
         ELSE
 *
 *           (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1)
 *
 *           We choose d(i+1) as origin.
 *
            ORGATI = .FALSE.
            A = C*DEL - Z( I )*Z( I ) - Z( IP1 )*Z( IP1 )
            B = Z( IP1 )*Z( IP1 )*DEL
            IF( A.LT.ZERO ) THEN
               TAU = TWO*B / ( A-SQRT( ABS( A*A+FOUR*B*C ) ) )
            ELSE
               TAU = -( A+SQRT( ABS( A*A+FOUR*B*C ) ) ) / ( TWO*C )
            END IF
            DLTLB = -MIDPT
            DLTUB = ZERO
         END IF
 *
         IF( ORGATI ) THEN
            DO 130 J = 1, N
               DELTA( J ) = ( D( J )-D( I ) ) - TAU
  130       CONTINUE
         ELSE
            DO 140 J = 1, N
               DELTA( J ) = ( D( J )-D( IP1 ) ) - TAU
  140       CONTINUE
         END IF
         IF( ORGATI ) THEN
            II = I
         ELSE
            II = I + 1
         END IF
         IIM1 = II - 1
         IIP1 = II + 1
 *
 *        Evaluate PSI and the derivative DPSI
 *
         DPSI = ZERO
         PSI = ZERO
         ERRETM = ZERO
         DO 150 J = 1, IIM1
            TEMP = Z( J ) / DELTA( J )
            PSI = PSI + Z( J )*TEMP
            DPSI = DPSI + TEMP*TEMP
            ERRETM = ERRETM + PSI
  150    CONTINUE
         ERRETM = ABS( ERRETM )
 *
 *        Evaluate PHI and the derivative DPHI
 *
         DPHI = ZERO
         PHI = ZERO
         DO 160 J = N, IIP1, -1
            TEMP = Z( J ) / DELTA( J )
            PHI = PHI + Z( J )*TEMP
            DPHI = DPHI + TEMP*TEMP
            ERRETM = ERRETM + PHI
  160    CONTINUE
 *
         W = RHOINV + PHI + PSI
 *
 *        W is the value of the secular function with
 *        its ii-th element removed.
 *
         SWTCH3 = .FALSE.
         IF( ORGATI ) THEN
            IF( W.LT.ZERO )
     $         SWTCH3 = .TRUE.
         ELSE
            IF( W.GT.ZERO )
     $         SWTCH3 = .TRUE.
         END IF
         IF( II.EQ.1 .OR. II.EQ.N )
     $      SWTCH3 = .FALSE.
 *
         TEMP = Z( II ) / DELTA( II )
         DW = DPSI + DPHI + TEMP*TEMP
         TEMP = Z( II )*TEMP
         W = W + TEMP
         ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV +
     $            THREE*ABS( TEMP ) + ABS( TAU )*DW
 *
 *        Test for convergence
 *
         IF( ABS( W ).LE.EPS*ERRETM ) THEN
            IF( ORGATI ) THEN
               DLAM = D( I ) + TAU
            ELSE
               DLAM = D( IP1 ) + TAU
            END IF
            GO TO 250
         END IF
 *
         IF( W.LE.ZERO ) THEN
            DLTLB = MAX( DLTLB, TAU )
         ELSE
            DLTUB = MIN( DLTUB, TAU )
         END IF
 *
 *        Calculate the new step
 *
         NITER = NITER + 1
         IF( .NOT.SWTCH3 ) THEN
            IF( ORGATI ) THEN
               C = W - DELTA( IP1 )*DW - ( D( I )-D( IP1 ) )*
     $             ( Z( I ) / DELTA( I ) )**2
            ELSE
               C = W - DELTA( I )*DW - ( D( IP1 )-D( I ) )*
     $             ( Z( IP1 ) / DELTA( IP1 ) )**2
            END IF
            A = ( DELTA( I )+DELTA( IP1 ) )*W -
     $          DELTA( I )*DELTA( IP1 )*DW
            B = DELTA( I )*DELTA( IP1 )*W
            IF( C.EQ.ZERO ) THEN
               IF( A.EQ.ZERO ) THEN
                  IF( ORGATI ) THEN
                     A = Z( I )*Z( I ) + DELTA( IP1 )*DELTA( IP1 )*
     $                   ( DPSI+DPHI )
                  ELSE
                     A = Z( IP1 )*Z( IP1 ) + DELTA( I )*DELTA( I )*
     $                   ( DPSI+DPHI )
                  END IF
               END IF
               ETA = B / A
            ELSE IF( A.LE.ZERO ) THEN
               ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
            ELSE
               ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
            END IF
         ELSE
 *
 *           Interpolation using THREE most relevant poles
 *
            TEMP = RHOINV + PSI + PHI
            IF( ORGATI ) THEN
               TEMP1 = Z( IIM1 ) / DELTA( IIM1 )
               TEMP1 = TEMP1*TEMP1
               C = TEMP - DELTA( IIP1 )*( DPSI+DPHI ) -
     $             ( D( IIM1 )-D( IIP1 ) )*TEMP1
               ZZ( 1 ) = Z( IIM1 )*Z( IIM1 )
               ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )*
     $                   ( ( DPSI-TEMP1 )+DPHI )
            ELSE
               TEMP1 = Z( IIP1 ) / DELTA( IIP1 )
               TEMP1 = TEMP1*TEMP1
               C = TEMP - DELTA( IIM1 )*( DPSI+DPHI ) -
     $             ( D( IIP1 )-D( IIM1 ) )*TEMP1
               ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )*
     $                   ( DPSI+( DPHI-TEMP1 ) )
               ZZ( 3 ) = Z( IIP1 )*Z( IIP1 )
            END IF
            ZZ( 2 ) = Z( II )*Z( II )
            CALL DLAED6( NITER, ORGATI, C, DELTA( IIM1 ), ZZ, W, ETA,
     $                   INFO )
            IF( INFO.NE.0 )
     $         GO TO 250
         END IF
 *
 *        Note, eta should be positive if w is negative, and
 *        eta should be negative otherwise. However,
 *        if for some reason caused by roundoff, eta*w > 0,
 *        we simply use one Newton step instead. This way
 *        will guarantee eta*w < 0.
 *
         IF( W*ETA.GE.ZERO )
     $      ETA = -W / DW
         TEMP = TAU + ETA
         IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN
            IF( W.LT.ZERO ) THEN
               ETA = ( DLTUB-TAU ) / TWO
            ELSE
               ETA = ( DLTLB-TAU ) / TWO
            END IF
         END IF
 *
         PREW = W
 *
         DO 180 J = 1, N
            DELTA( J ) = DELTA( J ) - ETA
  180    CONTINUE
 *
 *        Evaluate PSI and the derivative DPSI
 *
         DPSI = ZERO
         PSI = ZERO
         ERRETM = ZERO
         DO 190 J = 1, IIM1
            TEMP = Z( J ) / DELTA( J )
            PSI = PSI + Z( J )*TEMP
            DPSI = DPSI + TEMP*TEMP
            ERRETM = ERRETM + PSI
  190    CONTINUE
         ERRETM = ABS( ERRETM )
 *
 *        Evaluate PHI and the derivative DPHI
 *
         DPHI = ZERO
         PHI = ZERO
         DO 200 J = N, IIP1, -1
            TEMP = Z( J ) / DELTA( J )
            PHI = PHI + Z( J )*TEMP
            DPHI = DPHI + TEMP*TEMP
            ERRETM = ERRETM + PHI
  200    CONTINUE
 *
         TEMP = Z( II ) / DELTA( II )
         DW = DPSI + DPHI + TEMP*TEMP
         TEMP = Z( II )*TEMP
         W = RHOINV + PHI + PSI + TEMP
         ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV +
     $            THREE*ABS( TEMP ) + ABS( TAU+ETA )*DW
 *
         SWTCH = .FALSE.
         IF( ORGATI ) THEN
            IF( -W.GT.ABS( PREW ) / TEN )
     $         SWTCH = .TRUE.
         ELSE
            IF( W.GT.ABS( PREW ) / TEN )
     $         SWTCH = .TRUE.
         END IF
 *
         TAU = TAU + ETA
 *
 *        Main loop to update the values of the array   DELTA
 *
         ITER = NITER + 1
 *
         DO 240 NITER = ITER, MAXIT
 *
 *           Test for convergence
 *
            IF( ABS( W ).LE.EPS*ERRETM ) THEN
               IF( ORGATI ) THEN
                  DLAM = D( I ) + TAU
               ELSE
                  DLAM = D( IP1 ) + TAU
               END IF
               GO TO 250
            END IF
 *
            IF( W.LE.ZERO ) THEN
               DLTLB = MAX( DLTLB, TAU )
            ELSE
               DLTUB = MIN( DLTUB, TAU )
            END IF
 *
 *           Calculate the new step
 *
            IF( .NOT.SWTCH3 ) THEN
               IF( .NOT.SWTCH ) THEN
                  IF( ORGATI ) THEN
                     C = W - DELTA( IP1 )*DW -
     $                   ( D( I )-D( IP1 ) )*( Z( I ) / DELTA( I ) )**2
                  ELSE
                     C = W - DELTA( I )*DW - ( D( IP1 )-D( I ) )*
     $                   ( Z( IP1 ) / DELTA( IP1 ) )**2
                  END IF
               ELSE
                  TEMP = Z( II ) / DELTA( II )
                  IF( ORGATI ) THEN
                     DPSI = DPSI + TEMP*TEMP
                  ELSE
                     DPHI = DPHI + TEMP*TEMP
                  END IF
                  C = W - DELTA( I )*DPSI - DELTA( IP1 )*DPHI
               END IF
               A = ( DELTA( I )+DELTA( IP1 ) )*W -
     $             DELTA( I )*DELTA( IP1 )*DW
               B = DELTA( I )*DELTA( IP1 )*W
               IF( C.EQ.ZERO ) THEN
                  IF( A.EQ.ZERO ) THEN
                     IF( .NOT.SWTCH ) THEN
                        IF( ORGATI ) THEN
                           A = Z( I )*Z( I ) + DELTA( IP1 )*
     $                         DELTA( IP1 )*( DPSI+DPHI )
                        ELSE
                           A = Z( IP1 )*Z( IP1 ) +
     $                         DELTA( I )*DELTA( I )*( DPSI+DPHI )
                        END IF
                     ELSE
                        A = DELTA( I )*DELTA( I )*DPSI +
     $                      DELTA( IP1 )*DELTA( IP1 )*DPHI
                     END IF
                  END IF
                  ETA = B / A
               ELSE IF( A.LE.ZERO ) THEN
                  ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
               ELSE
                  ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
               END IF
            ELSE
 *
 *              Interpolation using THREE most relevant poles
 *
               TEMP = RHOINV + PSI + PHI
               IF( SWTCH ) THEN
                  C = TEMP - DELTA( IIM1 )*DPSI - DELTA( IIP1 )*DPHI
                  ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )*DPSI
                  ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )*DPHI
               ELSE
                  IF( ORGATI ) THEN
                     TEMP1 = Z( IIM1 ) / DELTA( IIM1 )
                     TEMP1 = TEMP1*TEMP1
                     C = TEMP - DELTA( IIP1 )*( DPSI+DPHI ) -
     $                   ( D( IIM1 )-D( IIP1 ) )*TEMP1
                     ZZ( 1 ) = Z( IIM1 )*Z( IIM1 )
                     ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )*
     $                         ( ( DPSI-TEMP1 )+DPHI )
                  ELSE
                     TEMP1 = Z( IIP1 ) / DELTA( IIP1 )
                     TEMP1 = TEMP1*TEMP1
                     C = TEMP - DELTA( IIM1 )*( DPSI+DPHI ) -
     $                   ( D( IIP1 )-D( IIM1 ) )*TEMP1
                     ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )*
     $                         ( DPSI+( DPHI-TEMP1 ) )
                     ZZ( 3 ) = Z( IIP1 )*Z( IIP1 )
                  END IF
               END IF
               CALL DLAED6( NITER, ORGATI, C, DELTA( IIM1 ), ZZ, W, ETA,
     $                      INFO )
               IF( INFO.NE.0 )
     $            GO TO 250
            END IF
 *
 *           Note, eta should be positive if w is negative, and
 *           eta should be negative otherwise. However,
 *           if for some reason caused by roundoff, eta*w > 0,
 *           we simply use one Newton step instead. This way
 *           will guarantee eta*w < 0.
 *
            IF( W*ETA.GE.ZERO )
     $         ETA = -W / DW
            TEMP = TAU + ETA
            IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN
               IF( W.LT.ZERO ) THEN
                  ETA = ( DLTUB-TAU ) / TWO
               ELSE
                  ETA = ( DLTLB-TAU ) / TWO
               END IF
            END IF
 *
            DO 210 J = 1, N
               DELTA( J ) = DELTA( J ) - ETA
  210       CONTINUE
 *
            TAU = TAU + ETA
            PREW = W
 *
 *           Evaluate PSI and the derivative DPSI
 *
            DPSI = ZERO
            PSI = ZERO
            ERRETM = ZERO
            DO 220 J = 1, IIM1
               TEMP = Z( J ) / DELTA( J )
               PSI = PSI + Z( J )*TEMP
               DPSI = DPSI + TEMP*TEMP
               ERRETM = ERRETM + PSI
  220       CONTINUE
            ERRETM = ABS( ERRETM )
 *
 *           Evaluate PHI and the derivative DPHI
 *
            DPHI = ZERO
            PHI = ZERO
            DO 230 J = N, IIP1, -1
               TEMP = Z( J ) / DELTA( J )
               PHI = PHI + Z( J )*TEMP
               DPHI = DPHI + TEMP*TEMP
               ERRETM = ERRETM + PHI
  230       CONTINUE
 *
            TEMP = Z( II ) / DELTA( II )
            DW = DPSI + DPHI + TEMP*TEMP
            TEMP = Z( II )*TEMP
            W = RHOINV + PHI + PSI + TEMP
            ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV +
     $               THREE*ABS( TEMP ) + ABS( TAU )*DW
            IF( W*PREW.GT.ZERO .AND. ABS( W ).GT.ABS( PREW ) / TEN )
     $         SWTCH = .NOT.SWTCH
 *
  240    CONTINUE
 *
 *        Return with INFO = 1, NITER = MAXIT and not converged
 *
         INFO = 1
         IF( ORGATI ) THEN
            DLAM = D( I ) + TAU
         ELSE
            DLAM = D( IP1 ) + TAU
         END IF
 *
      END IF
 *
  250 CONTINUE
 *
      RETURN
 *
 *     End of DLAED4
 *
      END
--- a/lib/linalg/dlaed5.cpp
+++ b/lib/linalg/dlaed5.cpp
@ -0,0 +1,58 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 int dlaed5_(integer *i__, doublereal *d__, doublereal *z__, doublereal *delta, doublereal *rho,
            doublereal *dlam)
 {
    doublereal d__1;
    double sqrt(doublereal);
    doublereal b, c__, w, del, tau, temp;
    --delta;
    --z__;
    --d__;
    del = d__[2] - d__[1];
    if (*i__ == 1) {
        w = *rho * 2. * (z__[2] * z__[2] - z__[1] * z__[1]) / del + 1.;
        if (w > 0.) {
            b = del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
            c__ = *rho * z__[1] * z__[1] * del;
            tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1))));
            *dlam = d__[1] + tau;
            delta[1] = -z__[1] / tau;
            delta[2] = z__[2] / (del - tau);
        } else {
            b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
            c__ = *rho * z__[2] * z__[2] * del;
            if (b > 0.) {
                tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.));
            } else {
                tau = (b - sqrt(b * b + c__ * 4.)) / 2.;
            }
            *dlam = d__[2] + tau;
            delta[1] = -z__[1] / (del + tau);
            delta[2] = -z__[2] / tau;
        }
        temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
        delta[1] /= temp;
        delta[2] /= temp;
    } else {
        b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
        c__ = *rho * z__[2] * z__[2] * del;
        if (b > 0.) {
            tau = (b + sqrt(b * b + c__ * 4.)) / 2.;
        } else {
            tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.));
        }
        *dlam = d__[2] + tau;
        delta[1] = -z__[1] / (del + tau);
        delta[2] = -z__[2] / tau;
        temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
        delta[1] /= temp;
        delta[2] /= temp;
    }
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dlaed5.f
+++ b/lib/linalg/dlaed5.f
@ -1,186 +0,0 @@
 *> \brief \b DLAED5 used by DSTEDC. Solves the 2-by-2 secular equation.
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DLAED5 + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed5.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed5.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed5.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLAED5( I, D, Z, DELTA, RHO, DLAM )
 *
 *       .. Scalar Arguments ..
 *       INTEGER            I
 *       DOUBLE PRECISION   DLAM, RHO
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   D( 2 ), DELTA( 2 ), Z( 2 )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> This subroutine computes the I-th eigenvalue of a symmetric rank-one
 *> modification of a 2-by-2 diagonal matrix
 *>
 *>            diag( D )  +  RHO * Z * transpose(Z) .
 *>
 *> The diagonal elements in the array D are assumed to satisfy
 *>
 *>            D(i) < D(j)  for  i < j .
 *>
 *> We also assume RHO > 0 and that the Euclidean norm of the vector
 *> Z is one.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] I
 *> \verbatim
 *>          I is INTEGER
 *>         The index of the eigenvalue to be computed.  I = 1 or I = 2.
 *> \endverbatim
 *>
 *> \param[in] D
 *> \verbatim
 *>          D is DOUBLE PRECISION array, dimension (2)
 *>         The original eigenvalues.  We assume D(1) < D(2).
 *> \endverbatim
 *>
 *> \param[in] Z
 *> \verbatim
 *>          Z is DOUBLE PRECISION array, dimension (2)
 *>         The components of the updating vector.
 *> \endverbatim
 *>
 *> \param[out] DELTA
 *> \verbatim
 *>          DELTA is DOUBLE PRECISION array, dimension (2)
 *>         The vector DELTA contains the information necessary
 *>         to construct the eigenvectors.
 *> \endverbatim
 *>
 *> \param[in] RHO
 *> \verbatim
 *>          RHO is DOUBLE PRECISION
 *>         The scalar in the symmetric updating formula.
 *> \endverbatim
 *>
 *> \param[out] DLAM
 *> \verbatim
 *>          DLAM is DOUBLE PRECISION
 *>         The computed lambda_I, the I-th updated eigenvalue.
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup auxOTHERcomputational
 *
 *> \par Contributors:
 *  ==================
 *>
 *>     Ren-Cang Li, Computer Science Division, University of California
 *>     at Berkeley, USA
 *>
 *  =====================================================================
      SUBROUTINE DLAED5( I, D, Z, DELTA, RHO, DLAM )
 *
 *  -- LAPACK computational routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER            I
      DOUBLE PRECISION   DLAM, RHO
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION   D( 2 ), DELTA( 2 ), Z( 2 )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ZERO, ONE, TWO, FOUR
      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0,
     $                   FOUR = 4.0D0 )
 *     ..
 *     .. Local Scalars ..
      DOUBLE PRECISION   B, C, DEL, TAU, TEMP, W
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, SQRT
 *     ..
 *     .. Executable Statements ..
 *
      DEL = D( 2 ) - D( 1 )
      IF( I.EQ.1 ) THEN
         W = ONE + TWO*RHO*( Z( 2 )*Z( 2 )-Z( 1 )*Z( 1 ) ) / DEL
         IF( W.GT.ZERO ) THEN
            B = DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
            C = RHO*Z( 1 )*Z( 1 )*DEL
 *
 *           B > ZERO, always
 *
            TAU = TWO*C / ( B+SQRT( ABS( B*B-FOUR*C ) ) )
            DLAM = D( 1 ) + TAU
            DELTA( 1 ) = -Z( 1 ) / TAU
            DELTA( 2 ) = Z( 2 ) / ( DEL-TAU )
         ELSE
            B = -DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
            C = RHO*Z( 2 )*Z( 2 )*DEL
            IF( B.GT.ZERO ) THEN
               TAU = -TWO*C / ( B+SQRT( B*B+FOUR*C ) )
            ELSE
               TAU = ( B-SQRT( B*B+FOUR*C ) ) / TWO
            END IF
            DLAM = D( 2 ) + TAU
            DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
            DELTA( 2 ) = -Z( 2 ) / TAU
         END IF
         TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
         DELTA( 1 ) = DELTA( 1 ) / TEMP
         DELTA( 2 ) = DELTA( 2 ) / TEMP
      ELSE
 *
 *     Now I=2
 *
         B = -DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
         C = RHO*Z( 2 )*Z( 2 )*DEL
         IF( B.GT.ZERO ) THEN
            TAU = ( B+SQRT( B*B+FOUR*C ) ) / TWO
         ELSE
            TAU = TWO*C / ( -B+SQRT( B*B+FOUR*C ) )
         END IF
         DLAM = D( 2 ) + TAU
         DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
         DELTA( 2 ) = -Z( 2 ) / TAU
         TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
         DELTA( 1 ) = DELTA( 1 ) / TEMP
         DELTA( 2 ) = DELTA( 2 ) / TEMP
      END IF
      RETURN
 *
 *     End of DLAED5
 *
      END
--- a/lib/linalg/dlaed6.cpp
+++ b/lib/linalg/dlaed6.cpp
@ -0,0 +1,209 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 int dlaed6_(integer *kniter, logical *orgati, doublereal *rho, doublereal *d__, doublereal *z__,
            doublereal *finit, doublereal *tau, integer *info)
 {
    integer i__1;
    doublereal d__1, d__2, d__3, d__4;
    double sqrt(doublereal), log(doublereal), pow_lmp_di(doublereal *, integer *);
    doublereal a, b, c__, f;
    integer i__;
    doublereal fc, df, ddf, lbd, eta, ubd, eps, base;
    integer iter;
    doublereal temp, temp1, temp2, temp3, temp4;
    logical scale;
    integer niter;
    doublereal small1, small2, sminv1, sminv2;
    extern doublereal dlamch_(char *, ftnlen);
    doublereal dscale[3], sclfac, zscale[3], erretm, sclinv;
    --z__;
    --d__;
    *info = 0;
    if (*orgati) {
        lbd = d__[2];
        ubd = d__[3];
    } else {
        lbd = d__[1];
        ubd = d__[2];
    }
    if (*finit < 0.) {
        lbd = 0.;
    } else {
        ubd = 0.;
    }
    niter = 1;
    *tau = 0.;
    if (*kniter == 2) {
        if (*orgati) {
            temp = (d__[3] - d__[2]) / 2.;
            c__ = *rho + z__[1] / (d__[1] - d__[2] - temp);
            a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3];
            b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2];
        } else {
            temp = (d__[1] - d__[2]) / 2.;
            c__ = *rho + z__[3] / (d__[3] - d__[2] - temp);
            a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2];
            b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1];
        }
        d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1, d__2), d__2 = abs(c__);
        temp = max(d__1, d__2);
        a /= temp;
        b /= temp;
        c__ /= temp;
        if (c__ == 0.) {
            *tau = b / a;
        } else if (a <= 0.) {
            *tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
        } else {
            *tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
        }
        if (*tau < lbd || *tau > ubd) {
            *tau = (lbd + ubd) / 2.;
        }
        if (d__[1] == *tau || d__[2] == *tau || d__[3] == *tau) {
            *tau = 0.;
        } else {
            temp = *finit + *tau * z__[1] / (d__[1] * (d__[1] - *tau)) +
                   *tau * z__[2] / (d__[2] * (d__[2] - *tau)) +
                   *tau * z__[3] / (d__[3] * (d__[3] - *tau));
            if (temp <= 0.) {
                lbd = *tau;
            } else {
                ubd = *tau;
            }
            if (abs(*finit) <= abs(temp)) {
                *tau = 0.;
            }
        }
    }
    eps = dlamch_((char *)"Epsilon", (ftnlen)7);
    base = dlamch_((char *)"Base", (ftnlen)4);
    i__1 = (integer)(log(dlamch_((char *)"SafMin", (ftnlen)6)) / log(base) / 3.);
    small1 = pow_lmp_di(&base, &i__1);
    sminv1 = 1. / small1;
    small2 = small1 * small1;
    sminv2 = sminv1 * sminv1;
    if (*orgati) {
        d__3 = (d__1 = d__[2] - *tau, abs(d__1)), d__4 = (d__2 = d__[3] - *tau, abs(d__2));
        temp = min(d__3, d__4);
    } else {
        d__3 = (d__1 = d__[1] - *tau, abs(d__1)), d__4 = (d__2 = d__[2] - *tau, abs(d__2));
        temp = min(d__3, d__4);
    }
    scale = FALSE_;
    if (temp <= small1) {
        scale = TRUE_;
        if (temp <= small2) {
            sclfac = sminv2;
            sclinv = small2;
        } else {
            sclfac = sminv1;
            sclinv = small1;
        }
        for (i__ = 1; i__ <= 3; ++i__) {
            dscale[i__ - 1] = d__[i__] * sclfac;
            zscale[i__ - 1] = z__[i__] * sclfac;
        }
        *tau *= sclfac;
        lbd *= sclfac;
        ubd *= sclfac;
    } else {
        for (i__ = 1; i__ <= 3; ++i__) {
            dscale[i__ - 1] = d__[i__];
            zscale[i__ - 1] = z__[i__];
        }
    }
    fc = 0.;
    df = 0.;
    ddf = 0.;
    for (i__ = 1; i__ <= 3; ++i__) {
        temp = 1. / (dscale[i__ - 1] - *tau);
        temp1 = zscale[i__ - 1] * temp;
        temp2 = temp1 * temp;
        temp3 = temp2 * temp;
        fc += temp1 / dscale[i__ - 1];
        df += temp2;
        ddf += temp3;
    }
    f = *finit + *tau * fc;
    if (abs(f) <= 0.) {
        goto L60;
    }
    if (f <= 0.) {
        lbd = *tau;
    } else {
        ubd = *tau;
    }
    iter = niter + 1;
    for (niter = iter; niter <= 40; ++niter) {
        if (*orgati) {
            temp1 = dscale[1] - *tau;
            temp2 = dscale[2] - *tau;
        } else {
            temp1 = dscale[0] - *tau;
            temp2 = dscale[1] - *tau;
        }
        a = (temp1 + temp2) * f - temp1 * temp2 * df;
        b = temp1 * temp2 * f;
        c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf;
        d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1, d__2), d__2 = abs(c__);
        temp = max(d__1, d__2);
        a /= temp;
        b /= temp;
        c__ /= temp;
        if (c__ == 0.) {
            eta = b / a;
        } else if (a <= 0.) {
            eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
        } else {
            eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
        }
        if (f * eta >= 0.) {
            eta = -f / df;
        }
        *tau += eta;
        if (*tau < lbd || *tau > ubd) {
            *tau = (lbd + ubd) / 2.;
        }
        fc = 0.;
        erretm = 0.;
        df = 0.;
        ddf = 0.;
        for (i__ = 1; i__ <= 3; ++i__) {
            if (dscale[i__ - 1] - *tau != 0.) {
                temp = 1. / (dscale[i__ - 1] - *tau);
                temp1 = zscale[i__ - 1] * temp;
                temp2 = temp1 * temp;
                temp3 = temp2 * temp;
                temp4 = temp1 / dscale[i__ - 1];
                fc += temp4;
                erretm += abs(temp4);
                df += temp2;
                ddf += temp3;
            } else {
                goto L60;
            }
        }
        f = *finit + *tau * fc;
        erretm = (abs(*finit) + abs(*tau) * erretm) * 8. + abs(*tau) * df;
        if (abs(f) <= eps * 4. * erretm || ubd - lbd <= eps * 4. * abs(*tau)) {
            goto L60;
        }
        if (f <= 0.) {
            lbd = *tau;
        } else {
            ubd = *tau;
        }
    }
    *info = 1;
 L60:
    if (scale) {
        *tau *= sclinv;
    }
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/lib/linalg/dlaed6.f
+++ b/lib/linalg/dlaed6.f
@ -1,407 +0,0 @@
 *> \brief \b DLAED6 used by DSTEDC. Computes one Newton step in solution of the secular equation.
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DLAED6 + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed6.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed6.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed6.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLAED6( KNITER, ORGATI, RHO, D, Z, FINIT, TAU, INFO )
 *
 *       .. Scalar Arguments ..
 *       LOGICAL            ORGATI
 *       INTEGER            INFO, KNITER
 *       DOUBLE PRECISION   FINIT, RHO, TAU
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   D( 3 ), Z( 3 )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DLAED6 computes the positive or negative root (closest to the origin)
 *> of
 *>                  z(1)        z(2)        z(3)
 *> f(x) =   rho + --------- + ---------- + ---------
 *>                 d(1)-x      d(2)-x      d(3)-x
 *>
 *> It is assumed that
 *>
 *>       if ORGATI = .true. the root is between d(2) and d(3);
 *>       otherwise it is between d(1) and d(2)
 *>
 *> This routine will be called by DLAED4 when necessary. In most cases,
 *> the root sought is the smallest in magnitude, though it might not be
 *> in some extremely rare situations.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] KNITER
 *> \verbatim
 *>          KNITER is INTEGER
 *>               Refer to DLAED4 for its significance.
 *> \endverbatim
 *>
 *> \param[in] ORGATI
 *> \verbatim
 *>          ORGATI is LOGICAL
 *>               If ORGATI is true, the needed root is between d(2) and
 *>               d(3); otherwise it is between d(1) and d(2).  See
 *>               DLAED4 for further details.
 *> \endverbatim
 *>
 *> \param[in] RHO
 *> \verbatim
 *>          RHO is DOUBLE PRECISION
 *>               Refer to the equation f(x) above.
 *> \endverbatim
 *>
 *> \param[in] D
 *> \verbatim
 *>          D is DOUBLE PRECISION array, dimension (3)
 *>               D satisfies d(1) < d(2) < d(3).
 *> \endverbatim
 *>
 *> \param[in] Z
 *> \verbatim
 *>          Z is DOUBLE PRECISION array, dimension (3)
 *>               Each of the elements in z must be positive.
 *> \endverbatim
 *>
 *> \param[in] FINIT
 *> \verbatim
 *>          FINIT is DOUBLE PRECISION
 *>               The value of f at 0. It is more accurate than the one
 *>               evaluated inside this routine (if someone wants to do
 *>               so).
 *> \endverbatim
 *>
 *> \param[out] TAU
 *> \verbatim
 *>          TAU is DOUBLE PRECISION
 *>               The root of the equation f(x).
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>               = 0: successful exit
 *>               > 0: if INFO = 1, failure to converge
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup auxOTHERcomputational
 *
 *> \par Further Details:
 *  =====================
 *>
 *> \verbatim
 *>
 *>  10/02/03: This version has a few statements commented out for thread
 *>  safety (machine parameters are computed on each entry). SJH.
 *>
 *>  05/10/06: Modified from a new version of Ren-Cang Li, use
 *>     Gragg-Thornton-Warner cubic convergent scheme for better stability.
 *> \endverbatim
 *
 *> \par Contributors:
 *  ==================
 *>
 *>     Ren-Cang Li, Computer Science Division, University of California
 *>     at Berkeley, USA
 *>
 *  =====================================================================
      SUBROUTINE DLAED6( KNITER, ORGATI, RHO, D, Z, FINIT, TAU, INFO )
 *
 *  -- LAPACK computational routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      LOGICAL            ORGATI
      INTEGER            INFO, KNITER
      DOUBLE PRECISION   FINIT, RHO, TAU
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION   D( 3 ), Z( 3 )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      INTEGER            MAXIT
      PARAMETER          ( MAXIT = 40 )
      DOUBLE PRECISION   ZERO, ONE, TWO, THREE, FOUR, EIGHT
      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0,
     $                   THREE = 3.0D0, FOUR = 4.0D0, EIGHT = 8.0D0 )
 *     ..
 *     .. External Functions ..
      DOUBLE PRECISION   DLAMCH
      EXTERNAL           DLAMCH
 *     ..
 *     .. Local Arrays ..
      DOUBLE PRECISION   DSCALE( 3 ), ZSCALE( 3 )
 *     ..
 *     .. Local Scalars ..
      LOGICAL            SCALE
      INTEGER            I, ITER, NITER
      DOUBLE PRECISION   A, B, BASE, C, DDF, DF, EPS, ERRETM, ETA, F,
     $                   FC, SCLFAC, SCLINV, SMALL1, SMALL2, SMINV1,
     $                   SMINV2, TEMP, TEMP1, TEMP2, TEMP3, TEMP4,
     $                   LBD, UBD
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, INT, LOG, MAX, MIN, SQRT
 *     ..
 *     .. Executable Statements ..
 *
      INFO = 0
 *
      IF( ORGATI ) THEN
         LBD = D(2)
         UBD = D(3)
      ELSE
         LBD = D(1)
         UBD = D(2)
      END IF
      IF( FINIT .LT. ZERO )THEN
         LBD = ZERO
      ELSE
         UBD = ZERO
      END IF
 *
      NITER = 1
      TAU = ZERO
      IF( KNITER.EQ.2 ) THEN
         IF( ORGATI ) THEN
            TEMP = ( D( 3 )-D( 2 ) ) / TWO
            C = RHO + Z( 1 ) / ( ( D( 1 )-D( 2 ) )-TEMP )
            A = C*( D( 2 )+D( 3 ) ) + Z( 2 ) + Z( 3 )
            B = C*D( 2 )*D( 3 ) + Z( 2 )*D( 3 ) + Z( 3 )*D( 2 )
         ELSE
            TEMP = ( D( 1 )-D( 2 ) ) / TWO
            C = RHO + Z( 3 ) / ( ( D( 3 )-D( 2 ) )-TEMP )
            A = C*( D( 1 )+D( 2 ) ) + Z( 1 ) + Z( 2 )
            B = C*D( 1 )*D( 2 ) + Z( 1 )*D( 2 ) + Z( 2 )*D( 1 )
         END IF
         TEMP = MAX( ABS( A ), ABS( B ), ABS( C ) )
         A = A / TEMP
         B = B / TEMP
         C = C / TEMP
         IF( C.EQ.ZERO ) THEN
            TAU = B / A
         ELSE IF( A.LE.ZERO ) THEN
            TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
         ELSE
            TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
         END IF
         IF( TAU .LT. LBD .OR. TAU .GT. UBD )
     $      TAU = ( LBD+UBD )/TWO
         IF( D(1).EQ.TAU .OR. D(2).EQ.TAU .OR. D(3).EQ.TAU ) THEN
            TAU = ZERO
         ELSE
            TEMP = FINIT + TAU*Z(1)/( D(1)*( D( 1 )-TAU ) ) +
     $                     TAU*Z(2)/( D(2)*( D( 2 )-TAU ) ) +
     $                     TAU*Z(3)/( D(3)*( D( 3 )-TAU ) )
            IF( TEMP .LE. ZERO )THEN
               LBD = TAU
            ELSE
               UBD = TAU
            END IF
            IF( ABS( FINIT ).LE.ABS( TEMP ) )
     $         TAU = ZERO
         END IF
      END IF
 *
 *     get machine parameters for possible scaling to avoid overflow
 *
 *     modified by Sven: parameters SMALL1, SMINV1, SMALL2,
 *     SMINV2, EPS are not SAVEd anymore between one call to the
 *     others but recomputed at each call
 *
      EPS = DLAMCH( 'Epsilon' )
      BASE = DLAMCH( 'Base' )
      SMALL1 = BASE**( INT( LOG( DLAMCH( 'SafMin' ) ) / LOG( BASE ) /
     $         THREE ) )
      SMINV1 = ONE / SMALL1
      SMALL2 = SMALL1*SMALL1
      SMINV2 = SMINV1*SMINV1
 *
 *     Determine if scaling of inputs necessary to avoid overflow
 *     when computing 1/TEMP**3
 *
      IF( ORGATI ) THEN
         TEMP = MIN( ABS( D( 2 )-TAU ), ABS( D( 3 )-TAU ) )
      ELSE
         TEMP = MIN( ABS( D( 1 )-TAU ), ABS( D( 2 )-TAU ) )
      END IF
      SCALE = .FALSE.
      IF( TEMP.LE.SMALL1 ) THEN
         SCALE = .TRUE.
         IF( TEMP.LE.SMALL2 ) THEN
 *
 *        Scale up by power of radix nearest 1/SAFMIN**(2/3)
 *
            SCLFAC = SMINV2
            SCLINV = SMALL2
         ELSE
 *
 *        Scale up by power of radix nearest 1/SAFMIN**(1/3)
 *
            SCLFAC = SMINV1
            SCLINV = SMALL1
         END IF
 *
 *        Scaling up safe because D, Z, TAU scaled elsewhere to be O(1)
 *
         DO 10 I = 1, 3
            DSCALE( I ) = D( I )*SCLFAC
            ZSCALE( I ) = Z( I )*SCLFAC
   10    CONTINUE
         TAU = TAU*SCLFAC
         LBD = LBD*SCLFAC
         UBD = UBD*SCLFAC
      ELSE
 *
 *        Copy D and Z to DSCALE and ZSCALE
 *
         DO 20 I = 1, 3
            DSCALE( I ) = D( I )
            ZSCALE( I ) = Z( I )
   20    CONTINUE
      END IF
 *
      FC = ZERO
      DF = ZERO
      DDF = ZERO
      DO 30 I = 1, 3
         TEMP = ONE / ( DSCALE( I )-TAU )
         TEMP1 = ZSCALE( I )*TEMP
         TEMP2 = TEMP1*TEMP
         TEMP3 = TEMP2*TEMP
         FC = FC + TEMP1 / DSCALE( I )
         DF = DF + TEMP2
         DDF = DDF + TEMP3
   30 CONTINUE
      F = FINIT + TAU*FC
 *
      IF( ABS( F ).LE.ZERO )
     $   GO TO 60
      IF( F .LE. ZERO )THEN
         LBD = TAU
      ELSE
         UBD = TAU
      END IF
 *
 *        Iteration begins -- Use Gragg-Thornton-Warner cubic convergent
 *                            scheme
 *
 *     It is not hard to see that
 *
 *           1) Iterations will go up monotonically
 *              if FINIT < 0;
 *
 *           2) Iterations will go down monotonically
 *              if FINIT > 0.
 *
      ITER = NITER + 1
 *
      DO 50 NITER = ITER, MAXIT
 *
         IF( ORGATI ) THEN
            TEMP1 = DSCALE( 2 ) - TAU
            TEMP2 = DSCALE( 3 ) - TAU
         ELSE
            TEMP1 = DSCALE( 1 ) - TAU
            TEMP2 = DSCALE( 2 ) - TAU
         END IF
         A = ( TEMP1+TEMP2 )*F - TEMP1*TEMP2*DF
         B = TEMP1*TEMP2*F
         C = F - ( TEMP1+TEMP2 )*DF + TEMP1*TEMP2*DDF
         TEMP = MAX( ABS( A ), ABS( B ), ABS( C ) )
         A = A / TEMP
         B = B / TEMP
         C = C / TEMP
         IF( C.EQ.ZERO ) THEN
            ETA = B / A
         ELSE IF( A.LE.ZERO ) THEN
            ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
         ELSE
            ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
         END IF
         IF( F*ETA.GE.ZERO ) THEN
            ETA = -F / DF
         END IF
 *
         TAU = TAU + ETA
         IF( TAU .LT. LBD .OR. TAU .GT. UBD )
     $      TAU = ( LBD + UBD )/TWO
 *
         FC = ZERO
         ERRETM = ZERO
         DF = ZERO
         DDF = ZERO
         DO 40 I = 1, 3
            IF ( ( DSCALE( I )-TAU ).NE.ZERO ) THEN
               TEMP = ONE / ( DSCALE( I )-TAU )
               TEMP1 = ZSCALE( I )*TEMP
               TEMP2 = TEMP1*TEMP
               TEMP3 = TEMP2*TEMP
               TEMP4 = TEMP1 / DSCALE( I )
               FC = FC + TEMP4
               ERRETM = ERRETM + ABS( TEMP4 )
               DF = DF + TEMP2
               DDF = DDF + TEMP3
            ELSE
               GO TO 60
            END IF
   40    CONTINUE
         F = FINIT + TAU*FC
         ERRETM = EIGHT*( ABS( FINIT )+ABS( TAU )*ERRETM ) +
     $            ABS( TAU )*DF
         IF( ( ABS( F ).LE.FOUR*EPS*ERRETM ) .OR.
     $      ( (UBD-LBD).LE.FOUR*EPS*ABS(TAU) )  )
     $      GO TO 60
         IF( F .LE. ZERO )THEN
            LBD = TAU
         ELSE
            UBD = TAU
         END IF
   50 CONTINUE
      INFO = 1
   60 CONTINUE
 *
 *     Undo scaling
 *
      IF( SCALE )
     $   TAU = TAU*SCLINV
      RETURN
 *
 *     End of DLAED6
 *
      END
--- a/lib/linalg/dlaed7.cpp
+++ b/lib/linalg/dlaed7.cpp
@ -0,0 +1,131 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "lmp_f2c.h"
 static integer c__2 = 2;
 static integer c__1 = 1;
 static doublereal c_b10 = 1.;
 static doublereal c_b11 = 0.;
 static integer c_n1 = -1;
 int dlaed7_(integer *icompq, integer *n, integer *qsiz, integer *tlvls, integer *curlvl,
            integer *curpbm, doublereal *d__, doublereal *q, integer *ldq, integer *indxq,
            doublereal *rho, integer *cutpnt, doublereal *qstore, integer *qptr, integer *prmptr,
            integer *perm, integer *givptr, integer *givcol, doublereal *givnum, doublereal *work,
            integer *iwork, integer *info)
 {
    integer q_dim1, q_offset, i__1, i__2;
    integer pow_lmp_ii(integer *, integer *);
    integer i__, k, n1, n2, is, iw, iz, iq2, ptr, ldq2, indx, curr;
    extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
                      integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
                      ftnlen, ftnlen);
    integer indxc, indxp;
    extern int dlaed8_(integer *, integer *, integer *, integer *, doublereal *, doublereal *,
                       integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
                       doublereal *, integer *, doublereal *, integer *, integer *, integer *,
                       doublereal *, integer *, integer *, integer *),
        dlaed9_(integer *, integer *, integer *, integer *, doublereal *, doublereal *, integer *,
                doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *),
        dlaeda_(integer *, integer *, integer *, integer *, integer *, integer *, integer *,
                integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *,
                integer *);
    integer idlmda;
    extern int dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *),
        xerbla_(char *, integer *, ftnlen);
    integer coltyp;
    --d__;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1;
    q -= q_offset;
    --indxq;
    --qstore;
    --qptr;
    --prmptr;
    --perm;
    --givptr;
    givcol -= 3;
    givnum -= 3;
    --work;
    --iwork;
    *info = 0;
    if (*icompq < 0 || *icompq > 1) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*icompq == 1 && *qsiz < *n) {
        *info = -3;
    } else if (*ldq < max(1, *n)) {
        *info = -9;
    } else if (min(1, *n) > *cutpnt || *n < *cutpnt) {
        *info = -12;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_((char *)"DLAED7", &i__1, (ftnlen)6);
        return 0;
    }
    if (*n == 0) {
        return 0;
    }
    if (*icompq == 1) {
        ldq2 = *qsiz;
    } else {
        ldq2 = *n;
    }
    iz = 1;
    idlmda = iz + *n;
    iw = idlmda + *n;
    iq2 = iw + *n;
    is = iq2 + *n * ldq2;
    indx = 1;
    indxc = indx + *n;
    coltyp = indxc + *n;
    indxp = coltyp + *n;
    ptr = pow_lmp_ii(&c__2, tlvls) + 1;
    i__1 = *curlvl - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {
        i__2 = *tlvls - i__;
        ptr += pow_lmp_ii(&c__2, &i__2);
    }
    curr = ptr + *curpbm;
    dlaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], &givcol[3], &givnum[3],
            &qstore[1], &qptr[1], &work[iz], &work[iz + *n], info);
    if (*curlvl == *tlvls) {
        qptr[curr] = 1;
        prmptr[curr] = 1;
        givptr[curr] = 1;
    }
    dlaed8_(icompq, &k, n, qsiz, &d__[1], &q[q_offset], ldq, &indxq[1], rho, cutpnt, &work[iz],
            &work[idlmda], &work[iq2], &ldq2, &work[iw], &perm[prmptr[curr]], &givptr[curr + 1],
            &givcol[(givptr[curr] << 1) + 1], &givnum[(givptr[curr] << 1) + 1], &iwork[indxp],
            &iwork[indx], info);
    prmptr[curr + 1] = prmptr[curr] + *n;
    givptr[curr + 1] += givptr[curr];
    if (k != 0) {
        dlaed9_(&k, &c__1, &k, n, &d__[1], &work[is], &k, rho, &work[idlmda], &work[iw],
                &qstore[qptr[curr]], &k, info);
        if (*info != 0) {
            goto L30;
        }
        if (*icompq == 1) {
            dgemm_((char *)"N", (char *)"N", qsiz, &k, &k, &c_b10, &work[iq2], &ldq2, &qstore[qptr[curr]], &k,
                   &c_b11, &q[q_offset], ldq, (ftnlen)1, (ftnlen)1);
        }
        i__1 = k;
        qptr[curr + 1] = qptr[curr] + i__1 * i__1;
        n1 = k;
        n2 = *n - k;
        dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
    } else {
        qptr[curr + 1] = qptr[curr];
        i__1 = *n;
        for (i__ = 1; i__ <= i__1; ++i__) {
            indxq[i__] = i__;
        }
    }
 L30:
    return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/Show More
+++ b/Show More