Merge pull request #3579 from akohlmey/linalg-in-cpp

Convert linalg library from Fortran to C++
2023-01-05 21:16:06 -05:00
parent 843cc98531 57790ef35f
commit 3bf527d070
431 changed files with 31249 additions and 63712 deletions
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@ -440,16 +440,12 @@ if(PKG_MSCG OR PKG_ATC OR PKG_AWPMD OR PKG_ML-QUIP OR PKG_ML-POD OR PKG_LATTE OR
    find_package(BLAS)
  endif()
  if(NOT LAPACK_FOUND OR NOT BLAS_FOUND OR USE_INTERNAL_LINALG)
-    include(CheckGeneratorSupport)
-    if(NOT CMAKE_GENERATOR_SUPPORT_FORTRAN)
-      status(FATAL_ERROR "Cannot build internal linear algebra library as CMake build tool lacks Fortran support")
-    endif()
-    enable_language(Fortran)
-    file(GLOB LAPACK_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/linalg/[^.]*.[fF])
-    add_library(linalg STATIC ${LAPACK_SOURCES})
+    file(GLOB LINALG_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/linalg/[^.]*.cpp)
+    add_library(linalg STATIC ${LINALG_SOURCES})
    set_target_properties(linalg PROPERTIES OUTPUT_NAME lammps_linalg${LAMMPS_MACHINE})
    set(BLAS_LIBRARIES "$<TARGET_FILE:linalg>")
    set(LAPACK_LIBRARIES "$<TARGET_FILE:linalg>")
+    target_link_libraries(lammps PRIVATE linalg)
  else()
    list(APPEND LAPACK_LIBRARIES ${BLAS_LIBRARIES})
  endif()
--- a/cmake/CMakeSettings.json
+++ b/cmake/CMakeSettings.json
@ -72,7 +72,7 @@
      "configurationType": "Debug",
      "buildRoot": "${workspaceRoot}\\build\\${name}",
      "installRoot": "${workspaceRoot}\\install\\${name}",
-      "cmakeCommandArgs": "-C ${workspaceRoot}\\cmake\\presets\\windows.cmake -DCMAKE_C_COMPILER=clang-cl.exe -DCMAKE_CXX_COMPILER=clang-cl.exe",
+      "cmakeCommandArgs": "-C ${workspaceRoot}\\cmake\\presets\\windows.cmake -DCMAKE_C_COMPILER=clang-cl.exe -DCMAKE_CXX_COMPILER=clang-cl.exe -DBUILD_MPI=off",
      "buildCommandArgs": "",
      "ctestCommandArgs": "",
      "inheritEnvironments": [ "clang_cl_x64" ],
@ -105,7 +105,7 @@
      "configurationType": "Release",
      "buildRoot": "${workspaceRoot}\\build\\${name}",
      "installRoot": "${workspaceRoot}\\install\\${name}",
-      "cmakeCommandArgs": "-C ${workspaceRoot}\\cmake\\presets\\windows.cmake -DCMAKE_C_COMPILER=clang-cl.exe -DCMAKE_CXX_COMPILER=clang-cl.exe",
+      "cmakeCommandArgs": "-C ${workspaceRoot}\\cmake\\presets\\windows.cmake -DCMAKE_C_COMPILER=clang-cl.exe -DCMAKE_CXX_COMPILER=clang-cl.exe -DBUILD_MPI=off",
      "buildCommandArgs": "",
      "ctestCommandArgs": "-V",
      "inheritEnvironments": [ "clang_cl_x64" ],
--- a/cmake/presets/windows.cmake
+++ b/cmake/presets/windows.cmake
@ -1,6 +1,7 @@
 set(WIN_PACKAGES
  AMOEBA
  ASPHERE
+  AWPMD
  BOCS
  BODY
  BPM
@ -20,6 +21,7 @@ set(WIN_PACKAGES
  DPD-SMOOTH
  DRUDE
  EFF
+  ELECTRODE
  EXTRA-COMPUTE
  EXTRA-DUMP
  EXTRA-FIX
@ -35,6 +37,7 @@ set(WIN_PACKAGES
  MEAM
  MISC
  ML-IAP
+  ML-POD
  ML-SNAP
  MOFFF
  MOLECULE
--- a/doc/src/Build_extras.rst
+++ b/doc/src/Build_extras.rst
@ -1209,9 +1209,9 @@ The ATC package requires the MANYBODY package also be installed.
      .. code-block:: bash

         $ make lib-linalg                   # print help message
-         $ make lib-linalg args="-m serial"    # build with GNU Fortran compiler (settings as with "make serial")
-         $ make lib-linalg args="-m mpi"       # build with default MPI Fortran compiler (settings as with "make mpi")
-         $ make lib-linalg args="-m gfortran"  # build with GNU Fortran compiler
+         $ make lib-linalg args="-m serial"  # build with GNU C++ compiler (settings as with "make serial")
+         $ make lib-linalg args="-m mpi"     # build with default MPI C++ compiler (settings as with "make mpi")
+         $ make lib-linalg args="-m g++"     # build with GNU C++ compiler

 ----------

@ -1260,9 +1260,9 @@ AWPMD package
      .. code-block:: bash

         $ make lib-linalg                   # print help message
-         $ make lib-linalg args="-m serial"    # build with GNU Fortran compiler (settings as with "make serial")
-         $ make lib-linalg args="-m mpi"       # build with default MPI Fortran compiler (settings as with "make mpi")
-         $ make lib-linalg args="-m gfortran"  # build with GNU Fortran compiler
+         $ make lib-linalg args="-m serial"  # build with GNU C++ compiler (settings as with "make serial")
+         $ make lib-linalg args="-m mpi"     # build with default MPI C++ compiler (settings as with "make mpi")
+         $ make lib-linalg args="-m g++"     # build with GNU C++ compiler

 ----------

@ -1364,9 +1364,9 @@ This package depends on the KSPACE package.
      .. code-block:: bash

         $ make lib-linalg                   # print help message
-         $ make lib-linalg args="-m serial"    # build with GNU Fortran compiler (settings as with "make serial")
-         $ make lib-linalg args="-m mpi"       # build with default MPI Fortran compiler (settings as with "make mpi")
-         $ make lib-linalg args="-m gfortran"  # build with GNU Fortran compiler
+         $ make lib-linalg args="-m serial"  # build with GNU C++ compiler (settings as with "make serial")
+         $ make lib-linalg args="-m mpi"     # build with default MPI C++ compiler (settings as with "make mpi")
+         $ make lib-linalg args="-m g++"     # build with GNU C++ compiler

      The package itself is activated with ``make yes-KSPACE`` and
      ``make yes-ELECTRODE``
@ -1448,9 +1448,9 @@ ML-POD package
      .. code-block:: bash

         $ make lib-linalg                   # print help message
-         $ make lib-linalg args="-m serial"    # build with GNU Fortran compiler (settings as with "make serial")
-         $ make lib-linalg args="-m mpi"       # build with default MPI Fortran compiler (settings as with "make mpi")
-         $ make lib-linalg args="-m gfortran"  # build with GNU Fortran compiler
+         $ make lib-linalg args="-m serial"  # build with GNU C++ compiler (settings as with "make serial")
+         $ make lib-linalg args="-m mpi"     # build with default MPI C++ compiler (settings as with "make mpi")
+         $ make lib-linalg args="-m g++"     # build with GNU C++ compiler

      The package itself is activated with ``make yes-ML-POD``.

--- a/lib/atc/Makefile.lammps.linalg
+++ b/lib/atc/Makefile.lammps.linalg
@ -1,6 +1,6 @@
 # Settings that the LAMMPS build will import when this package library is used

 atc_SYSINC =
-atc_SYSLIB = -llinalg -lgfortran
+atc_SYSLIB = -llinalg
 atc_SYSPATH = -L../../lib/linalg$(LIBOBJDIR)

--- a/lib/awpmd/Makefile.lammps.linalg
+++ b/lib/awpmd/Makefile.lammps.linalg
@ -1,5 +1,5 @@
 # Settings that the LAMMPS build will import when this package library is used

 awpmd_SYSINC =
-awpmd_SYSLIB = -llinalg -lgfortran
+awpmd_SYSLIB = -llinalg
 awpmd_SYSPATH = -L../../lib/linalg$(LIBOBJDIR)
--- a/lib/awpmd/ivutils/include/erf.h
+++ b/lib/awpmd/ivutils/include/erf.h
@ -1,19 +0,0 @@
-# ifndef ERF_H
-# define ERF_H
-
-# ifdef _WIN32
-
-# ifdef __cplusplus
-extern "C" {
-# endif
-
-double erf(double x);
-double erfc(double x);
-
-# ifdef __cplusplus
-}
-# endif
-
-# endif
-
-# endif
--- a/lib/awpmd/ivutils/include/lapack_inter.h
+++ b/lib/awpmd/ivutils/include/lapack_inter.h
@ -1,53 +1,36 @@
 // Interface for LAPACK function

-# ifndef LAPACK_INTER_H
-# define LAPACK_INTER_H
+#ifndef LAPACK_INTER_H
+#define LAPACK_INTER_H

 #include <complex>
 typedef int lapack_int;
 typedef complex<float> lapack_complex_float;
 typedef complex<double> lapack_complex_double;

-#if defined(_WIN32) && !defined(__MINGW32__)
+#define DGETRF dgetrf_
+#define DGETRS dgetrs_
+#define DGETRI dgetri_
+#define ZPPTRF zpptrf_
+#define ZPPTRI zpptri_

-  //#define MKL_Complex8 lapack_complex_float
-  //#define MKL_Complex16 lapack_complex_double
-  #include "mkl.h"
-
-  inline void ZPPTRF( char* uplo, const lapack_int* n, lapack_complex_double* ap, lapack_int* info ) {
-    ZPPTRF(uplo, (int*)n, (MKL_Complex16*)ap, (int*)info);
-  }
-  inline void ZPPTRI( char* uplo, const lapack_int* n, lapack_complex_double* ap, lapack_int* info ){
-     ZPPTRI(uplo, (int*)n, (MKL_Complex16*)ap, (int*)info);
-  }
-
-#else
-
-  #define DGETRF dgetrf_
-  #define DGETRS dgetrs_
-  #define DGETRI dgetri_
-  #define ZPPTRF zpptrf_
-  #define ZPPTRI zpptri_
-
-  #ifdef __cplusplus
-  extern "C" {
-  #endif /* __cplusplus */
-  void dgetrf_( const lapack_int* m, const lapack_int* n, double* a, const lapack_int* lda,
-             lapack_int* ipiv, lapack_int* info );
-  void dgetrs_( const char* trans, const lapack_int* n, const lapack_int* nrhs,
-             const double* a, const lapack_int* lda, const lapack_int* ipiv,
-             double* b, const lapack_int* ldb, lapack_int* info );
-  void dgetri_( const lapack_int* n, double* a, const lapack_int* lda,
-             const lapack_int* ipiv, double* work, const lapack_int* lwork,
-             lapack_int* info );
-  void zpptrf_( const char* uplo, const lapack_int* n, lapack_complex_double* ap,
-             lapack_int* info );
-  void zpptri_( const char* uplo, const lapack_int* n, lapack_complex_double* ap,
-             lapack_int* info );
-  #ifdef __cplusplus
-  }
-  #endif /* __cplusplus */
-
-#endif
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+void dgetrf_(const lapack_int *m, const lapack_int *n, double *a,
+             const lapack_int *lda, lapack_int *ipiv, lapack_int *info);
+void dgetrs_(const char *trans, const lapack_int *n, const lapack_int *nrhs,
+             const double *a, const lapack_int *lda, const lapack_int *ipiv,
+             double *b, const lapack_int *ldb, lapack_int *info);
+void dgetri_(const lapack_int *n, double *a, const lapack_int *lda,
+             const lapack_int *ipiv, double *work, const lapack_int *lwork,
+             lapack_int *info);
+void zpptrf_(const char *uplo, const lapack_int *n, lapack_complex_double *ap,
+             lapack_int *info);
+void zpptri_(const char *uplo, const lapack_int *n, lapack_complex_double *ap,
+             lapack_int *info);
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */

 #endif /* lapack_intER_H */
--- a/lib/awpmd/systems/interact/TCP/wpmd.h
+++ b/lib/awpmd/systems/interact/TCP/wpmd.h
@ -149,10 +149,8 @@
 # include "pairhash.h"
 # include "TCP/tcpdefs.h"
 # include "wavepacket.h"
-# include "erf.h"
 # include "cerf.h"

-
 using namespace std;

 # include "lapack_inter.h"
--- a/lib/awpmd/systems/interact/TCP/wpmd_split.cpp
+++ b/lib/awpmd/systems/interact/TCP/wpmd_split.cpp
@ -1,6 +1,4 @@
 # include "wpmd_split.h"
-//# include "erf.h"
-

 void AWPMD_split::resize(int flag){
  for(int s=0;s<2;s++){
--- a/lib/colvars/colvarbias_meta.cpp
+++ b/lib/colvars/colvarbias_meta.cpp
@ -14,7 +14,7 @@
 #include <algorithm>

 // used to set the absolute path of a replica file
-#if defined(WIN32) && !defined(__CYGWIN__)
+#if defined(_WIN32) && !defined(__CYGWIN__)
 #include <direct.h>
 #define CHDIR ::_chdir
 #define GETCWD ::_getcwd
--- a/lib/colvars/colvarproxy.cpp
+++ b/lib/colvars/colvarproxy.cpp
@ -8,10 +8,10 @@
 // Colvars repository at GitHub.

 // Using access() to check if a file exists (until we can assume C++14/17)
-#if !defined(WIN32) || defined(__CYGWIN__)
+#if !defined(_WIN32) || defined(__CYGWIN__)
 #include <unistd.h>
 #endif
-#if defined(WIN32)
+#if defined(_WIN32)
 #include <io.h>
 #endif

@ -678,7 +678,7 @@ int colvarproxy_io::backup_file(char const *filename)
  // Simplified version of NAMD_file_exists()
  int exit_code;
  do {
-#if defined(WIN32) && !defined(__CYGWIN__)
+#if defined(_WIN32) && !defined(__CYGWIN__)
    // We could use _access_s here, but it is probably too new
    exit_code = _access(filename, 00);
 #else
@ -708,7 +708,7 @@ int colvarproxy_io::backup_file(char const *filename)
 int colvarproxy_io::remove_file(char const *filename)
 {
  int error_code = COLVARS_OK;
-#if defined(WIN32) && !defined(__CYGWIN__)
+#if defined(_WIN32) && !defined(__CYGWIN__)
  // Because the file may be open by other processes, rename it to filename.old
  std::string const renamed_file(std::string(filename)+".old");
  // It may still be there from an interrupted run, so remove it to be safe
@ -741,7 +741,7 @@ int colvarproxy_io::remove_file(char const *filename)
 int colvarproxy_io::rename_file(char const *filename, char const *newfilename)
 {
  int error_code = COLVARS_OK;
-#if defined(WIN32) && !defined(__CYGWIN__)
+#if defined(_WIN32) && !defined(__CYGWIN__)
  // On straight Windows, must remove the destination before renaming it
  error_code |= remove_file(newfilename);
 #endif
--- a/lib/electrode/Makefile.lammps.linalg
+++ b/lib/electrode/Makefile.lammps.linalg
@ -1,5 +1,5 @@
 # Settings that the LAMMPS build will import when this package library is used

 electrode_SYSINC =
-electrode_SYSLIB = -llinalg -lgfortran
+electrode_SYSLIB = -llinalg
 electrode_SYSPATH = -L../../lib/linalg$(LIBOBJDIR)
--- a/lib/linalg/Makefile.gfortran
+++ b/lib/linalg/Makefile.gfortran
@ -6,21 +6,17 @@ SHELL = /bin/sh

 # ------ FILES ------

-SRC =	$(wildcard *.f)
-SRC1 =	$(wildcard *.F)
-
-FILES = $(SRC) $(SRC1) Makefile.* README
+SRC =	$(wildcard *.cpp)

 # ------ DEFINITIONS ------

 LIB = liblinalg.a
-OBJ =   $(SRC:.f=.o) $(SRC1:.F=.o)
+OBJ =   $(SRC:.cpp=.o)

 # ------ SETTINGS ------

-FC =           gfortran
-FFLAGS =      -O3 -fPIC -ffast-math -fstrict-aliasing -fno-second-underscore
-FFLAGS0 =     -O0 -fPIC -fno-second-underscore
+CXX =           g++ -std=c++11
+CCFLAGS =       -O3 -fPIC -ffast-math -fstrict-aliasing
 ARCHIVE =	ar
 AR =	ar
 ARCHFLAG =	-rcs
@ -34,20 +30,11 @@ lib: 	$(OBJ)

 # ------ COMPILE RULES ------

-%.o:%.F
-	$(FC) $(FFLAGS) -c $<
-
-%.o:%.f
-	$(FC) $(FFLAGS) -c $<
-
-dlamch.o: dlamch.f
-	$(FC) $(FFLAGS0) -c $<
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<

 # ------ CLEAN ------

 clean:
-	-rm -f *.o *.mod *~ $(LIB)
-
-tar:
-	-tar -czvf ../linalg.tar.gz $(FILES)
+	-rm -f *.o *~ $(LIB)

--- a/lib/linalg/Makefile.mpi
+++ b/lib/linalg/Makefile.mpi
@ -6,21 +6,17 @@ SHELL = /bin/sh

 # ------ FILES ------

-SRC =	$(wildcard *.f)
-SRC1 =	$(wildcard *.F)
-
-FILES = $(SRC) $(SRC1) Makefile.* README
+SRC =	$(wildcard *.cpp)

 # ------ DEFINITIONS ------

 LIB = liblinalg.a
-OBJ =   $(SRC:.f=.o) $(SRC1:.F=.o)
+OBJ =   $(SRC:.cpp=.o)

 # ------ SETTINGS ------

-FC =          mpifort
-FFLAGS =      -O3 -fPIC
-FFLAGS0 =     -O0 -fPIC
+CC =          mpicxx
+CCFLAGS =      -O3 -fPIC
 ARCHIVE =	ar
 AR =	ar
 ARCHFLAG =	-rcs
@ -34,20 +30,11 @@ lib: 	$(OBJ)

 # ------ COMPILE RULES ------

-%.o:%.F
-	$(FC) $(FFLAGS) -c $<
-
-%.o:%.f
-	$(FC) $(FFLAGS) -c $<
-
-dlamch.o: dlamch.f
-	$(FC) $(FFLAGS0) -c $<
+%.o:%.cpp
+	$(CC) $(CCFLAGS) -c $<

 # ------ CLEAN ------

 clean:
-	-rm -f *.o *.mod *~ $(LIB)
-
-tar:
-	-tar -czvf ../linalg.tar.gz $(FILES)
+	-rm -f *.o *~ $(LIB)

--- a/lib/linalg/Makefile.serial
+++ b/lib/linalg/Makefile.serial
@ -1 +1 @@
-Makefile.gfortran
+Makefile.g++
--- a/lib/linalg/README
+++ b/lib/linalg/README
@ -1,7 +1,13 @@
 This directory has generic BLAS and LAPACK source files needed by the
 ATC, AWPMD, ELECTRODE, LATTE, and ML-POD packages (and possibly by other
 packages) in the future that can be used instead of platform or vendor
-optimized BLAS/LAPACK library.
+optimized BLAS/LAPACK library.  To simplify installation, these files
+have been translated from the Fortran versions of the BLAS and LAPACK
+references source files at https://netlib.org/lapack/ to C++ with f2c.
+The package with the tools to do the translation and the matching
+original Fortran sources are at https://github.com/lammps/linalg.
+Please note that even through the files are C++ source code the
+resulting library will follow the Fortran binary conventions.

 Note that this is an *incomplete* subset of full BLAS/LAPACK.

@ -20,7 +26,7 @@ can do it manually by following the instructions below.
 Build the library using one of the provided Makefile.* files or create
 your own, specific to your compiler and system.  For example:

-make -f Makefile.gfortran
+make -f Makefile.g++

 When you are done building this library, one file should exist in this
 directory:
--- a/lib/linalg/d_lmp_cnjg.cpp
+++ b/lib/linalg/d_lmp_cnjg.cpp
@ -0,0 +1,13 @@
+
+#include "lmp_f2c.h"
+
+extern "C" {
+
+void d_lmp_cnjg(doublecomplex *r, doublecomplex *z)
+{
+    doublereal zi = z->i;
+
+    r->r = z->r;
+    r->i = -zi;
+}
+}
--- a/lib/linalg/d_lmp_imag.cpp
+++ b/lib/linalg/d_lmp_imag.cpp
@ -0,0 +1,10 @@
+
+#include "lmp_f2c.h"
+
+extern "C" {
+
+double d_lmp_imag(doublecomplex *z)
+{
+    return (z->i);
+}
+}
--- a/lib/linalg/d_lmp_lg10.cpp
+++ b/lib/linalg/d_lmp_lg10.cpp
@ -0,0 +1,14 @@
+
+#include "lmp_f2c.h"
+#undef abs
+
+static constexpr double log10e = 0.43429448190325182765;
+
+#include <cmath>
+
+extern "C" {
+double d_lmp_lg10(doublereal *x)
+{
+    return (log10e * log(*x));
+}
+}
--- a/lib/linalg/d_lmp_sign.cpp
+++ b/lib/linalg/d_lmp_sign.cpp
@ -0,0 +1,12 @@
+
+#include "lmp_f2c.h"
+
+extern "C" {
+
+double d_lmp_sign(doublereal *a, doublereal *b)
+{
+    double x;
+    x = (*a >= 0 ? *a : -*a);
+    return (*b >= 0 ? x : -x);
+}
+}
--- a/lib/linalg/dasum.cpp
+++ b/lib/linalg/dasum.cpp
@ -0,0 +1,50 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+doublereal dasum_(integer *n, doublereal *dx, integer *incx)
+{
+    integer i__1, i__2;
+    doublereal ret_val, d__1, d__2, d__3, d__4, d__5, d__6;
+    integer i__, m, mp1;
+    doublereal dtemp;
+    integer nincx;
+    --dx;
+    ret_val = 0.;
+    dtemp = 0.;
+    if (*n <= 0 || *incx <= 0) {
+        return ret_val;
+    }
+    if (*incx == 1) {
+        m = *n % 6;
+        if (m != 0) {
+            i__1 = m;
+            for (i__ = 1; i__ <= i__1; ++i__) {
+                dtemp += (d__1 = dx[i__], abs(d__1));
+            }
+            if (*n < 6) {
+                ret_val = dtemp;
+                return ret_val;
+            }
+        }
+        mp1 = m + 1;
+        i__1 = *n;
+        for (i__ = mp1; i__ <= i__1; i__ += 6) {
+            dtemp = dtemp + (d__1 = dx[i__], abs(d__1)) + (d__2 = dx[i__ + 1], abs(d__2)) +
+                    (d__3 = dx[i__ + 2], abs(d__3)) + (d__4 = dx[i__ + 3], abs(d__4)) +
+                    (d__5 = dx[i__ + 4], abs(d__5)) + (d__6 = dx[i__ + 5], abs(d__6));
+        }
+    } else {
+        nincx = *n * *incx;
+        i__1 = nincx;
+        i__2 = *incx;
+        for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+            dtemp += (d__1 = dx[i__], abs(d__1));
+        }
+    }
+    ret_val = dtemp;
+    return ret_val;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dasum.f
+++ b/lib/linalg/dasum.f
@ -1,131 +0,0 @@
-*> \brief \b DASUM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       DOUBLE PRECISION FUNCTION DASUM(N,DX,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,N
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION DX(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    DASUM takes the sum of the absolute values.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>         number of elements in input vector(s)
-*> \endverbatim
-*>
-*> \param[in] DX
-*> \verbatim
-*>          DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>         storage spacing between elements of DX
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup double_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      DOUBLE PRECISION FUNCTION DASUM(N,DX,INCX)
-*
-*  -- Reference BLAS level1 routine --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,N
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION DX(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      DOUBLE PRECISION DTEMP
-      INTEGER I,M,MP1,NINCX
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DABS,MOD
-*     ..
-      DASUM = 0.0d0
-      DTEMP = 0.0d0
-      IF (N.LE.0 .OR. INCX.LE.0) RETURN
-      IF (INCX.EQ.1) THEN
-*        code for increment equal to 1
-*
-*
-*        clean-up loop
-*
-         M = MOD(N,6)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               DTEMP = DTEMP + DABS(DX(I))
-            END DO
-            IF (N.LT.6) THEN
-               DASUM = DTEMP
-               RETURN
-            END IF
-         END IF
-         MP1 = M + 1
-         DO I = MP1,N,6
-            DTEMP = DTEMP + DABS(DX(I)) + DABS(DX(I+1)) +
-     $              DABS(DX(I+2)) + DABS(DX(I+3)) +
-     $              DABS(DX(I+4)) + DABS(DX(I+5))
-         END DO
-      ELSE
-*
-*        code for increment not equal to 1
-*
-         NINCX = N*INCX
-         DO I = 1,NINCX,INCX
-            DTEMP = DTEMP + DABS(DX(I))
-         END DO
-      END IF
-      DASUM = DTEMP
-      RETURN
-*
-*     End of DASUM
-*
-      END
--- a/lib/linalg/daxpy.cpp
+++ b/lib/linalg/daxpy.cpp
@ -0,0 +1,56 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+int daxpy_(integer *n, doublereal *da, doublereal *dx, integer *incx, doublereal *dy, integer *incy)
+{
+    integer i__1;
+    integer i__, m, ix, iy, mp1;
+    --dy;
+    --dx;
+    if (*n <= 0) {
+        return 0;
+    }
+    if (*da == 0.) {
+        return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+        m = *n % 4;
+        if (m != 0) {
+            i__1 = m;
+            for (i__ = 1; i__ <= i__1; ++i__) {
+                dy[i__] += *da * dx[i__];
+            }
+        }
+        if (*n < 4) {
+            return 0;
+        }
+        mp1 = m + 1;
+        i__1 = *n;
+        for (i__ = mp1; i__ <= i__1; i__ += 4) {
+            dy[i__] += *da * dx[i__];
+            dy[i__ + 1] += *da * dx[i__ + 1];
+            dy[i__ + 2] += *da * dx[i__ + 2];
+            dy[i__ + 3] += *da * dx[i__ + 3];
+        }
+    } else {
+        ix = 1;
+        iy = 1;
+        if (*incx < 0) {
+            ix = (-(*n) + 1) * *incx + 1;
+        }
+        if (*incy < 0) {
+            iy = (-(*n) + 1) * *incy + 1;
+        }
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+            dy[iy] += *da * dx[ix];
+            ix += *incx;
+            iy += *incy;
+        }
+    }
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/daxpy.f
+++ b/lib/linalg/daxpy.f
@ -1,152 +0,0 @@
-*> \brief \b DAXPY
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DAXPY(N,DA,DX,INCX,DY,INCY)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION DA
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION DX(*),DY(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    DAXPY constant times a vector plus a vector.
-*>    uses unrolled loops for increments equal to one.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>         number of elements in input vector(s)
-*> \endverbatim
-*>
-*> \param[in] DA
-*> \verbatim
-*>          DA is DOUBLE PRECISION
-*>           On entry, DA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] DX
-*> \verbatim
-*>          DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>         storage spacing between elements of DX
-*> \endverbatim
-*>
-*> \param[in,out] DY
-*> \verbatim
-*>          DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>         storage spacing between elements of DY
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup double_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DAXPY(N,DA,DX,INCX,DY,INCY)
-*
-*  -- Reference BLAS level1 routine --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION DA
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION DX(*),DY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      INTEGER I,IX,IY,M,MP1
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MOD
-*     ..
-      IF (N.LE.0) RETURN
-      IF (DA.EQ.0.0d0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*        code for both increments equal to 1
-*
-*
-*        clean-up loop
-*
-         M = MOD(N,4)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               DY(I) = DY(I) + DA*DX(I)
-            END DO
-         END IF
-         IF (N.LT.4) RETURN
-         MP1 = M + 1
-         DO I = MP1,N,4
-            DY(I) = DY(I) + DA*DX(I)
-            DY(I+1) = DY(I+1) + DA*DX(I+1)
-            DY(I+2) = DY(I+2) + DA*DX(I+2)
-            DY(I+3) = DY(I+3) + DA*DX(I+3)
-         END DO
-      ELSE
-*
-*        code for unequal increments or equal increments
-*          not equal to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-          DY(IY) = DY(IY) + DA*DX(IX)
-          IX = IX + INCX
-          IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-*
-*     End of DAXPY
-*
-      END
--- a/lib/linalg/dbdsqr.cpp
+++ b/lib/linalg/dbdsqr.cpp
@ -0,0 +1,522 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+static doublereal c_b15 = -.125;
+static integer c__1 = 1;
+static doublereal c_b49 = 1.;
+static doublereal c_b72 = -1.;
+int dbdsqr_(char *uplo, integer *n, integer *ncvt, integer *nru, integer *ncc, doublereal *d__,
+            doublereal *e, doublereal *vt, integer *ldvt, doublereal *u, integer *ldu,
+            doublereal *c__, integer *ldc, doublereal *work, integer *info, ftnlen uplo_len)
+{
+    integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2;
+    doublereal d__1, d__2, d__3, d__4;
+    double pow_lmp_dd(doublereal *, doublereal *), sqrt(doublereal), d_lmp_sign(doublereal *, doublereal *);
+    integer iterdivn;
+    doublereal f, g, h__;
+    integer i__, j, m;
+    doublereal r__;
+    integer maxitdivn;
+    doublereal cs;
+    integer ll;
+    doublereal sn, mu;
+    integer nm1, nm12, nm13, lll;
+    doublereal eps, sll, tol, abse;
+    integer idir;
+    doublereal abss;
+    integer oldm;
+    doublereal cosl;
+    integer isub, iter;
+    doublereal unfl, sinl, cosr, smin, smax, sinr;
+    extern int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *,
+                     doublereal *),
+        dlas2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *),
+        dscal_(integer *, doublereal *, doublereal *, integer *);
+    extern logical lsame_(char *, char *, ftnlen, ftnlen);
+    doublereal oldcs;
+    extern int dlasr_(char *, char *, char *, integer *, integer *, doublereal *, doublereal *,
+                      doublereal *, integer *, ftnlen, ftnlen, ftnlen);
+    integer oldll;
+    doublereal shift, sigmn, oldsn;
+    extern int dswap_(integer *, doublereal *, integer *, doublereal *, integer *);
+    doublereal sminl, sigmx;
+    logical lower;
+    extern int dlasq1_(integer *, doublereal *, doublereal *, doublereal *, integer *),
+        dlasv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *,
+                doublereal *, doublereal *, doublereal *);
+    extern doublereal dlamch_(char *, ftnlen);
+    extern int dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *),
+        xerbla_(char *, integer *, ftnlen);
+    doublereal sminoa, thresh;
+    logical rotate;
+    doublereal tolmul;
+    --d__;
+    --e;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+    *info = 0;
+    lower = lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1);
+    if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lower) {
+        *info = -1;
+    } else if (*n < 0) {
+        *info = -2;
+    } else if (*ncvt < 0) {
+        *info = -3;
+    } else if (*nru < 0) {
+        *info = -4;
+    } else if (*ncc < 0) {
+        *info = -5;
+    } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1, *n)) {
+        *info = -9;
+    } else if (*ldu < max(1, *nru)) {
+        *info = -11;
+    } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1, *n)) {
+        *info = -13;
+    }
+    if (*info != 0) {
+        i__1 = -(*info);
+        xerbla_((char *)"DBDSQR", &i__1, (ftnlen)6);
+        return 0;
+    }
+    if (*n == 0) {
+        return 0;
+    }
+    if (*n == 1) {
+        goto L160;
+    }
+    rotate = *ncvt > 0 || *nru > 0 || *ncc > 0;
+    if (!rotate) {
+        dlasq1_(n, &d__[1], &e[1], &work[1], info);
+        if (*info != 2) {
+            return 0;
+        }
+        *info = 0;
+    }
+    nm1 = *n - 1;
+    nm12 = nm1 + nm1;
+    nm13 = nm12 + nm1;
+    idir = 0;
+    eps = dlamch_((char *)"Epsilon", (ftnlen)7);
+    unfl = dlamch_((char *)"Safe minimum", (ftnlen)12);
+    if (lower) {
+        i__1 = *n - 1;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+            dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+            d__[i__] = r__;
+            e[i__] = sn * d__[i__ + 1];
+            d__[i__ + 1] = cs * d__[i__ + 1];
+            work[i__] = cs;
+            work[nm1 + i__] = sn;
+        }
+        if (*nru > 0) {
+            dlasr_((char *)"R", (char *)"V", (char *)"F", nru, n, &work[1], &work[*n], &u[u_offset], ldu, (ftnlen)1,
+                   (ftnlen)1, (ftnlen)1);
+        }
+        if (*ncc > 0) {
+            dlasr_((char *)"L", (char *)"V", (char *)"F", n, ncc, &work[1], &work[*n], &c__[c_offset], ldc, (ftnlen)1,
+                   (ftnlen)1, (ftnlen)1);
+        }
+    }
+    d__3 = 100., d__4 = pow_lmp_dd(&eps, &c_b15);
+    d__1 = 10., d__2 = min(d__3, d__4);
+    tolmul = max(d__1, d__2);
+    tol = tolmul * eps;
+    smax = 0.;
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+        d__2 = smax, d__3 = (d__1 = d__[i__], abs(d__1));
+        smax = max(d__2, d__3);
+    }
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+        d__2 = smax, d__3 = (d__1 = e[i__], abs(d__1));
+        smax = max(d__2, d__3);
+    }
+    sminl = 0.;
+    if (tol >= 0.) {
+        sminoa = abs(d__[1]);
+        if (sminoa == 0.) {
+            goto L50;
+        }
+        mu = sminoa;
+        i__1 = *n;
+        for (i__ = 2; i__ <= i__1; ++i__) {
+            mu = (d__2 = d__[i__], abs(d__2)) * (mu / (mu + (d__1 = e[i__ - 1], abs(d__1))));
+            sminoa = min(sminoa, mu);
+            if (sminoa == 0.) {
+                goto L50;
+            }
+        }
+    L50:
+        sminoa /= sqrt((doublereal)(*n));
+        d__1 = tol * sminoa, d__2 = *n * (*n * unfl) * 6;
+        thresh = max(d__1, d__2);
+    } else {
+        d__1 = abs(tol) * smax, d__2 = *n * (*n * unfl) * 6;
+        thresh = max(d__1, d__2);
+    }
+    maxitdivn = *n * 6;
+    iterdivn = 0;
+    iter = -1;
+    oldll = -1;
+    oldm = -1;
+    m = *n;
+L60:
+    if (m <= 1) {
+        goto L160;
+    }
+    if (iter >= *n) {
+        iter -= *n;
+        ++iterdivn;
+        if (iterdivn >= maxitdivn) {
+            goto L200;
+        }
+    }
+    if (tol < 0. && (d__1 = d__[m], abs(d__1)) <= thresh) {
+        d__[m] = 0.;
+    }
+    smax = (d__1 = d__[m], abs(d__1));
+    smin = smax;
+    i__1 = m - 1;
+    for (lll = 1; lll <= i__1; ++lll) {
+        ll = m - lll;
+        abss = (d__1 = d__[ll], abs(d__1));
+        abse = (d__1 = e[ll], abs(d__1));
+        if (tol < 0. && abss <= thresh) {
+            d__[ll] = 0.;
+        }
+        if (abse <= thresh) {
+            goto L80;
+        }
+        smin = min(smin, abss);
+        d__1 = max(smax, abss);
+        smax = max(d__1, abse);
+    }
+    ll = 0;
+    goto L90;
+L80:
+    e[ll] = 0.;
+    if (ll == m - 1) {
+        --m;
+        goto L60;
+    }
+L90:
+    ++ll;
+    if (ll == m - 1) {
+        dlasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr, &sinl, &cosl);
+        d__[m - 1] = sigmx;
+        e[m - 1] = 0.;
+        d__[m] = sigmn;
+        if (*ncvt > 0) {
+            drot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, &cosr, &sinr);
+        }
+        if (*nru > 0) {
+            drot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], &c__1, &cosl, &sinl);
+        }
+        if (*ncc > 0) {
+            drot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, &cosl, &sinl);
+        }
+        m += -2;
+        goto L60;
+    }
+    if (ll > oldm || m < oldll) {
+        if ((d__1 = d__[ll], abs(d__1)) >= (d__2 = d__[m], abs(d__2))) {
+            idir = 1;
+        } else {
+            idir = 2;
+        }
+    }
+    if (idir == 1) {
+        if ((d__2 = e[m - 1], abs(d__2)) <= abs(tol) * (d__1 = d__[m], abs(d__1)) ||
+            tol < 0. && (d__3 = e[m - 1], abs(d__3)) <= thresh) {
+            e[m - 1] = 0.;
+            goto L60;
+        }
+        if (tol >= 0.) {
+            mu = (d__1 = d__[ll], abs(d__1));
+            sminl = mu;
+            i__1 = m - 1;
+            for (lll = ll; lll <= i__1; ++lll) {
+                if ((d__1 = e[lll], abs(d__1)) <= tol * mu) {
+                    e[lll] = 0.;
+                    goto L60;
+                }
+                mu = (d__2 = d__[lll + 1], abs(d__2)) * (mu / (mu + (d__1 = e[lll], abs(d__1))));
+                sminl = min(sminl, mu);
+            }
+        }
+    } else {
+        if ((d__2 = e[ll], abs(d__2)) <= abs(tol) * (d__1 = d__[ll], abs(d__1)) ||
+            tol < 0. && (d__3 = e[ll], abs(d__3)) <= thresh) {
+            e[ll] = 0.;
+            goto L60;
+        }
+        if (tol >= 0.) {
+            mu = (d__1 = d__[m], abs(d__1));
+            sminl = mu;
+            i__1 = ll;
+            for (lll = m - 1; lll >= i__1; --lll) {
+                if ((d__1 = e[lll], abs(d__1)) <= tol * mu) {
+                    e[lll] = 0.;
+                    goto L60;
+                }
+                mu = (d__2 = d__[lll], abs(d__2)) * (mu / (mu + (d__1 = e[lll], abs(d__1))));
+                sminl = min(sminl, mu);
+            }
+        }
+    }
+    oldll = ll;
+    oldm = m;
+    d__1 = eps, d__2 = tol * .01;
+    if (tol >= 0. && *n * tol * (sminl / smax) <= max(d__1, d__2)) {
+        shift = 0.;
+    } else {
+        if (idir == 1) {
+            sll = (d__1 = d__[ll], abs(d__1));
+            dlas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__);
+        } else {
+            sll = (d__1 = d__[m], abs(d__1));
+            dlas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__);
+        }
+        if (sll > 0.) {
+            d__1 = shift / sll;
+            if (d__1 * d__1 < eps) {
+                shift = 0.;
+            }
+        }
+    }
+    iter = iter + m - ll;
+    if (shift == 0.) {
+        if (idir == 1) {
+            cs = 1.;
+            oldcs = 1.;
+            i__1 = m - 1;
+            for (i__ = ll; i__ <= i__1; ++i__) {
+                d__1 = d__[i__] * cs;
+                dlartg_(&d__1, &e[i__], &cs, &sn, &r__);
+                if (i__ > ll) {
+                    e[i__ - 1] = oldsn * r__;
+                }
+                d__1 = oldcs * r__;
+                d__2 = d__[i__ + 1] * sn;
+                dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]);
+                work[i__ - ll + 1] = cs;
+                work[i__ - ll + 1 + nm1] = sn;
+                work[i__ - ll + 1 + nm12] = oldcs;
+                work[i__ - ll + 1 + nm13] = oldsn;
+            }
+            h__ = d__[m] * cs;
+            d__[m] = h__ * oldcs;
+            e[m - 1] = h__ * oldsn;
+            if (*ncvt > 0) {
+                i__1 = m - ll + 1;
+                dlasr_((char *)"L", (char *)"V", (char *)"F", &i__1, ncvt, &work[1], &work[*n], &vt[ll + vt_dim1], ldvt,
+                       (ftnlen)1, (ftnlen)1, (ftnlen)1);
+            }
+            if (*nru > 0) {
+                i__1 = m - ll + 1;
+                dlasr_((char *)"R", (char *)"V", (char *)"F", nru, &i__1, &work[nm12 + 1], &work[nm13 + 1],
+                       &u[ll * u_dim1 + 1], ldu, (ftnlen)1, (ftnlen)1, (ftnlen)1);
+            }
+            if (*ncc > 0) {
+                i__1 = m - ll + 1;
+                dlasr_((char *)"L", (char *)"V", (char *)"F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + 1],
+                       &c__[ll + c_dim1], ldc, (ftnlen)1, (ftnlen)1, (ftnlen)1);
+            }
+            if ((d__1 = e[m - 1], abs(d__1)) <= thresh) {
+                e[m - 1] = 0.;
+            }
+        } else {
+            cs = 1.;
+            oldcs = 1.;
+            i__1 = ll + 1;
+            for (i__ = m; i__ >= i__1; --i__) {
+                d__1 = d__[i__] * cs;
+                dlartg_(&d__1, &e[i__ - 1], &cs, &sn, &r__);
+                if (i__ < m) {
+                    e[i__] = oldsn * r__;
+                }
+                d__1 = oldcs * r__;
+                d__2 = d__[i__ - 1] * sn;
+                dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]);
+                work[i__ - ll] = cs;
+                work[i__ - ll + nm1] = -sn;
+                work[i__ - ll + nm12] = oldcs;
+                work[i__ - ll + nm13] = -oldsn;
+            }
+            h__ = d__[ll] * cs;
+            d__[ll] = h__ * oldcs;
+            e[ll] = h__ * oldsn;
+            if (*ncvt > 0) {
+                i__1 = m - ll + 1;
+                dlasr_((char *)"L", (char *)"V", (char *)"B", &i__1, ncvt, &work[nm12 + 1], &work[nm13 + 1],
+                       &vt[ll + vt_dim1], ldvt, (ftnlen)1, (ftnlen)1, (ftnlen)1);
+            }
+            if (*nru > 0) {
+                i__1 = m - ll + 1;
+                dlasr_((char *)"R", (char *)"V", (char *)"B", nru, &i__1, &work[1], &work[*n], &u[ll * u_dim1 + 1], ldu,
+                       (ftnlen)1, (ftnlen)1, (ftnlen)1);
+            }
+            if (*ncc > 0) {
+                i__1 = m - ll + 1;
+                dlasr_((char *)"L", (char *)"V", (char *)"B", &i__1, ncc, &work[1], &work[*n], &c__[ll + c_dim1], ldc,
+                       (ftnlen)1, (ftnlen)1, (ftnlen)1);
+            }
+            if ((d__1 = e[ll], abs(d__1)) <= thresh) {
+                e[ll] = 0.;
+            }
+        }
+    } else {
+        if (idir == 1) {
+            f = ((d__1 = d__[ll], abs(d__1)) - shift) *
+                (d_lmp_sign(&c_b49, &d__[ll]) + shift / d__[ll]);
+            g = e[ll];
+            i__1 = m - 1;
+            for (i__ = ll; i__ <= i__1; ++i__) {
+                dlartg_(&f, &g, &cosr, &sinr, &r__);
+                if (i__ > ll) {
+                    e[i__ - 1] = r__;
+                }
+                f = cosr * d__[i__] + sinr * e[i__];
+                e[i__] = cosr * e[i__] - sinr * d__[i__];
+                g = sinr * d__[i__ + 1];
+                d__[i__ + 1] = cosr * d__[i__ + 1];
+                dlartg_(&f, &g, &cosl, &sinl, &r__);
+                d__[i__] = r__;
+                f = cosl * e[i__] + sinl * d__[i__ + 1];
+                d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__];
+                if (i__ < m - 1) {
+                    g = sinl * e[i__ + 1];
+                    e[i__ + 1] = cosl * e[i__ + 1];
+                }
+                work[i__ - ll + 1] = cosr;
+                work[i__ - ll + 1 + nm1] = sinr;
+                work[i__ - ll + 1 + nm12] = cosl;
+                work[i__ - ll + 1 + nm13] = sinl;
+            }
+            e[m - 1] = f;
+            if (*ncvt > 0) {
+                i__1 = m - ll + 1;
+                dlasr_((char *)"L", (char *)"V", (char *)"F", &i__1, ncvt, &work[1], &work[*n], &vt[ll + vt_dim1], ldvt,
+                       (ftnlen)1, (ftnlen)1, (ftnlen)1);
+            }
+            if (*nru > 0) {
+                i__1 = m - ll + 1;
+                dlasr_((char *)"R", (char *)"V", (char *)"F", nru, &i__1, &work[nm12 + 1], &work[nm13 + 1],
+                       &u[ll * u_dim1 + 1], ldu, (ftnlen)1, (ftnlen)1, (ftnlen)1);
+            }
+            if (*ncc > 0) {
+                i__1 = m - ll + 1;
+                dlasr_((char *)"L", (char *)"V", (char *)"F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + 1],
+                       &c__[ll + c_dim1], ldc, (ftnlen)1, (ftnlen)1, (ftnlen)1);
+            }
+            if ((d__1 = e[m - 1], abs(d__1)) <= thresh) {
+                e[m - 1] = 0.;
+            }
+        } else {
+            f = ((d__1 = d__[m], abs(d__1)) - shift) * (d_lmp_sign(&c_b49, &d__[m]) + shift / d__[m]);
+            g = e[m - 1];
+            i__1 = ll + 1;
+            for (i__ = m; i__ >= i__1; --i__) {
+                dlartg_(&f, &g, &cosr, &sinr, &r__);
+                if (i__ < m) {
+                    e[i__] = r__;
+                }
+                f = cosr * d__[i__] + sinr * e[i__ - 1];
+                e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__];
+                g = sinr * d__[i__ - 1];
+                d__[i__ - 1] = cosr * d__[i__ - 1];
+                dlartg_(&f, &g, &cosl, &sinl, &r__);
+                d__[i__] = r__;
+                f = cosl * e[i__ - 1] + sinl * d__[i__ - 1];
+                d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1];
+                if (i__ > ll + 1) {
+                    g = sinl * e[i__ - 2];
+                    e[i__ - 2] = cosl * e[i__ - 2];
+                }
+                work[i__ - ll] = cosr;
+                work[i__ - ll + nm1] = -sinr;
+                work[i__ - ll + nm12] = cosl;
+                work[i__ - ll + nm13] = -sinl;
+            }
+            e[ll] = f;
+            if ((d__1 = e[ll], abs(d__1)) <= thresh) {
+                e[ll] = 0.;
+            }
+            if (*ncvt > 0) {
+                i__1 = m - ll + 1;
+                dlasr_((char *)"L", (char *)"V", (char *)"B", &i__1, ncvt, &work[nm12 + 1], &work[nm13 + 1],
+                       &vt[ll + vt_dim1], ldvt, (ftnlen)1, (ftnlen)1, (ftnlen)1);
+            }
+            if (*nru > 0) {
+                i__1 = m - ll + 1;
+                dlasr_((char *)"R", (char *)"V", (char *)"B", nru, &i__1, &work[1], &work[*n], &u[ll * u_dim1 + 1], ldu,
+                       (ftnlen)1, (ftnlen)1, (ftnlen)1);
+            }
+            if (*ncc > 0) {
+                i__1 = m - ll + 1;
+                dlasr_((char *)"L", (char *)"V", (char *)"B", &i__1, ncc, &work[1], &work[*n], &c__[ll + c_dim1], ldc,
+                       (ftnlen)1, (ftnlen)1, (ftnlen)1);
+            }
+        }
+    }
+    goto L60;
+L160:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+        if (d__[i__] < 0.) {
+            d__[i__] = -d__[i__];
+            if (*ncvt > 0) {
+                dscal_(ncvt, &c_b72, &vt[i__ + vt_dim1], ldvt);
+            }
+        }
+    }
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+        isub = 1;
+        smin = d__[1];
+        i__2 = *n + 1 - i__;
+        for (j = 2; j <= i__2; ++j) {
+            if (d__[j] <= smin) {
+                isub = j;
+                smin = d__[j];
+            }
+        }
+        if (isub != *n + 1 - i__) {
+            d__[isub] = d__[*n + 1 - i__];
+            d__[*n + 1 - i__] = smin;
+            if (*ncvt > 0) {
+                dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ + vt_dim1], ldvt);
+            }
+            if (*nru > 0) {
+                dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) * u_dim1 + 1], &c__1);
+            }
+            if (*ncc > 0) {
+                dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ + c_dim1], ldc);
+            }
+        }
+    }
+    goto L220;
+L200:
+    *info = 0;
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+        if (e[i__] != 0.) {
+            ++(*info);
+        }
+    }
+L220:
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dbdsqr.f
+++ b/lib/linalg/dbdsqr.f
@ -1,864 +0,0 @@
-*> \brief \b DBDSQR
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DBDSQR + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dbdsqr.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dbdsqr.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dbdsqr.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U,
-*                          LDU, C, LDC, WORK, INFO )
-*
-*       .. Scalar Arguments ..
-*       CHARACTER          UPLO
-*       INTEGER            INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION   C( LDC, * ), D( * ), E( * ), U( LDU, * ),
-*      $                   VT( LDVT, * ), WORK( * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DBDSQR computes the singular values and, optionally, the right and/or
-*> left singular vectors from the singular value decomposition (SVD) of
-*> a real N-by-N (upper or lower) bidiagonal matrix B using the implicit
-*> zero-shift QR algorithm.  The SVD of B has the form
-*>
-*>    B = Q * S * P**T
-*>
-*> where S is the diagonal matrix of singular values, Q is an orthogonal
-*> matrix of left singular vectors, and P is an orthogonal matrix of
-*> right singular vectors.  If left singular vectors are requested, this
-*> subroutine actually returns U*Q instead of Q, and, if right singular
-*> vectors are requested, this subroutine returns P**T*VT instead of
-*> P**T, for given real input matrices U and VT.  When U and VT are the
-*> orthogonal matrices that reduce a general matrix A to bidiagonal
-*> form:  A = U*B*VT, as computed by DGEBRD, then
-*>
-*>    A = (U*Q) * S * (P**T*VT)
-*>
-*> is the SVD of A.  Optionally, the subroutine may also compute Q**T*C
-*> for a given real input matrix C.
-*>
-*> See "Computing  Small Singular Values of Bidiagonal Matrices With
-*> Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
-*> LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11,
-*> no. 5, pp. 873-912, Sept 1990) and
-*> "Accurate singular values and differential qd algorithms," by
-*> B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics
-*> Department, University of California at Berkeley, July 1992
-*> for a detailed description of the algorithm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>          = 'U':  B is upper bidiagonal;
-*>          = 'L':  B is lower bidiagonal.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix B.  N >= 0.
-*> \endverbatim
-*>
-*> \param[in] NCVT
-*> \verbatim
-*>          NCVT is INTEGER
-*>          The number of columns of the matrix VT. NCVT >= 0.
-*> \endverbatim
-*>
-*> \param[in] NRU
-*> \verbatim
-*>          NRU is INTEGER
-*>          The number of rows of the matrix U. NRU >= 0.
-*> \endverbatim
-*>
-*> \param[in] NCC
-*> \verbatim
-*>          NCC is INTEGER
-*>          The number of columns of the matrix C. NCC >= 0.
-*> \endverbatim
-*>
-*> \param[in,out] D
-*> \verbatim
-*>          D is DOUBLE PRECISION array, dimension (N)
-*>          On entry, the n diagonal elements of the bidiagonal matrix B.
-*>          On exit, if INFO=0, the singular values of B in decreasing
-*>          order.
-*> \endverbatim
-*>
-*> \param[in,out] E
-*> \verbatim
-*>          E is DOUBLE PRECISION array, dimension (N-1)
-*>          On entry, the N-1 offdiagonal elements of the bidiagonal
-*>          matrix B.
-*>          On exit, if INFO = 0, E is destroyed; if INFO > 0, D and E
-*>          will contain the diagonal and superdiagonal elements of a
-*>          bidiagonal matrix orthogonally equivalent to the one given
-*>          as input.
-*> \endverbatim
-*>
-*> \param[in,out] VT
-*> \verbatim
-*>          VT is DOUBLE PRECISION array, dimension (LDVT, NCVT)
-*>          On entry, an N-by-NCVT matrix VT.
-*>          On exit, VT is overwritten by P**T * VT.
-*>          Not referenced if NCVT = 0.
-*> \endverbatim
-*>
-*> \param[in] LDVT
-*> \verbatim
-*>          LDVT is INTEGER
-*>          The leading dimension of the array VT.
-*>          LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0.
-*> \endverbatim
-*>
-*> \param[in,out] U
-*> \verbatim
-*>          U is DOUBLE PRECISION array, dimension (LDU, N)
-*>          On entry, an NRU-by-N matrix U.
-*>          On exit, U is overwritten by U * Q.
-*>          Not referenced if NRU = 0.
-*> \endverbatim
-*>
-*> \param[in] LDU
-*> \verbatim
-*>          LDU is INTEGER
-*>          The leading dimension of the array U.  LDU >= max(1,NRU).
-*> \endverbatim
-*>
-*> \param[in,out] C
-*> \verbatim
-*>          C is DOUBLE PRECISION array, dimension (LDC, NCC)
-*>          On entry, an N-by-NCC matrix C.
-*>          On exit, C is overwritten by Q**T * C.
-*>          Not referenced if NCC = 0.
-*> \endverbatim
-*>
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>          The leading dimension of the array C.
-*>          LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0.
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is DOUBLE PRECISION array, dimension (4*(N-1))
-*> \endverbatim
-*>
-*> \param[out] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>          = 0:  successful exit
-*>          < 0:  If INFO = -i, the i-th argument had an illegal value
-*>          > 0:
-*>             if NCVT = NRU = NCC = 0,
-*>                = 1, a split was marked by a positive value in E
-*>                = 2, current block of Z not diagonalized after 30*N
-*>                     iterations (in inner while loop)
-*>                = 3, termination criterion of outer while loop not met
-*>                     (program created more than N unreduced blocks)
-*>             else NCVT = NRU = NCC = 0,
-*>                   the algorithm did not converge; D and E contain the
-*>                   elements of a bidiagonal matrix which is orthogonally
-*>                   similar to the input matrix B;  if INFO = i, i
-*>                   elements of E have not converged to zero.
-*> \endverbatim
-*
-*> \par Internal Parameters:
-*  =========================
-*>
-*> \verbatim
-*>  TOLMUL  DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8)))
-*>          TOLMUL controls the convergence criterion of the QR loop.
-*>          If it is positive, TOLMUL*EPS is the desired relative
-*>             precision in the computed singular values.
-*>          If it is negative, abs(TOLMUL*EPS*sigma_max) is the
-*>             desired absolute accuracy in the computed singular
-*>             values (corresponds to relative accuracy
-*>             abs(TOLMUL*EPS) in the largest singular value.
-*>          abs(TOLMUL) should be between 1 and 1/EPS, and preferably
-*>             between 10 (for fast convergence) and .1/EPS
-*>             (for there to be some accuracy in the results).
-*>          Default is to lose at either one eighth or 2 of the
-*>             available decimal digits in each computed singular value
-*>             (whichever is smaller).
-*>
-*>  MAXITR  INTEGER, default = 6
-*>          MAXITR controls the maximum number of passes of the
-*>          algorithm through its inner loop. The algorithms stops
-*>          (and so fails to converge) if the number of passes
-*>          through the inner loop exceeds MAXITR*N**2.
-*>
-*> \endverbatim
-*
-*> \par Note:
-*  ===========
-*>
-*> \verbatim
-*>  Bug report from Cezary Dendek.
-*>  On March 23rd 2017, the INTEGER variable MAXIT = MAXITR*N**2 is
-*>  removed since it can overflow pretty easily (for N larger or equal
-*>  than 18,919). We instead use MAXITDIVN = MAXITR*N.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup auxOTHERcomputational
-*
-*  =====================================================================
-      SUBROUTINE DBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U,
-     $                   LDU, C, LDC, WORK, INFO )
-*
-*  -- LAPACK computational routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      CHARACTER          UPLO
-      INTEGER            INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION   C( LDC, * ), D( * ), E( * ), U( LDU, * ),
-     $                   VT( LDVT, * ), WORK( * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION   ZERO
-      PARAMETER          ( ZERO = 0.0D0 )
-      DOUBLE PRECISION   ONE
-      PARAMETER          ( ONE = 1.0D0 )
-      DOUBLE PRECISION   NEGONE
-      PARAMETER          ( NEGONE = -1.0D0 )
-      DOUBLE PRECISION   HNDRTH
-      PARAMETER          ( HNDRTH = 0.01D0 )
-      DOUBLE PRECISION   TEN
-      PARAMETER          ( TEN = 10.0D0 )
-      DOUBLE PRECISION   HNDRD
-      PARAMETER          ( HNDRD = 100.0D0 )
-      DOUBLE PRECISION   MEIGTH
-      PARAMETER          ( MEIGTH = -0.125D0 )
-      INTEGER            MAXITR
-      PARAMETER          ( MAXITR = 6 )
-*     ..
-*     .. Local Scalars ..
-      LOGICAL            LOWER, ROTATE
-      INTEGER            I, IDIR, ISUB, ITER, ITERDIVN, J, LL, LLL, M,
-     $                   MAXITDIVN, NM1, NM12, NM13, OLDLL, OLDM
-      DOUBLE PRECISION   ABSE, ABSS, COSL, COSR, CS, EPS, F, G, H, MU,
-     $                   OLDCS, OLDSN, R, SHIFT, SIGMN, SIGMX, SINL,
-     $                   SINR, SLL, SMAX, SMIN, SMINL, SMINOA,
-     $                   SN, THRESH, TOL, TOLMUL, UNFL
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME
-      DOUBLE PRECISION   DLAMCH
-      EXTERNAL           LSAME, DLAMCH
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           DLARTG, DLAS2, DLASQ1, DLASR, DLASV2, DROT,
-     $                   DSCAL, DSWAP, XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, DBLE, MAX, MIN, SIGN, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      LOWER = LSAME( UPLO, 'L' )
-      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LOWER ) THEN
-         INFO = -1
-      ELSE IF( N.LT.0 ) THEN
-         INFO = -2
-      ELSE IF( NCVT.LT.0 ) THEN
-         INFO = -3
-      ELSE IF( NRU.LT.0 ) THEN
-         INFO = -4
-      ELSE IF( NCC.LT.0 ) THEN
-         INFO = -5
-      ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR.
-     $         ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN
-         INFO = -9
-      ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN
-         INFO = -11
-      ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR.
-     $         ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN
-         INFO = -13
-      END IF
-      IF( INFO.NE.0 ) THEN
-         CALL XERBLA( 'DBDSQR', -INFO )
-         RETURN
-      END IF
-      IF( N.EQ.0 )
-     $   RETURN
-      IF( N.EQ.1 )
-     $   GO TO 160
-*
-*     ROTATE is true if any singular vectors desired, false otherwise
-*
-      ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 )
-*
-*     If no singular vectors desired, use qd algorithm
-*
-      IF( .NOT.ROTATE ) THEN
-         CALL DLASQ1( N, D, E, WORK, INFO )
-*
-*     If INFO equals 2, dqds didn't finish, try to finish
-*
-         IF( INFO .NE. 2 ) RETURN
-         INFO = 0
-      END IF
-*
-      NM1 = N - 1
-      NM12 = NM1 + NM1
-      NM13 = NM12 + NM1
-      IDIR = 0
-*
-*     Get machine constants
-*
-      EPS = DLAMCH( 'Epsilon' )
-      UNFL = DLAMCH( 'Safe minimum' )
-*
-*     If matrix lower bidiagonal, rotate to be upper bidiagonal
-*     by applying Givens rotations on the left
-*
-      IF( LOWER ) THEN
-         DO 10 I = 1, N - 1
-            CALL DLARTG( D( I ), E( I ), CS, SN, R )
-            D( I ) = R
-            E( I ) = SN*D( I+1 )
-            D( I+1 ) = CS*D( I+1 )
-            WORK( I ) = CS
-            WORK( NM1+I ) = SN
-   10    CONTINUE
-*
-*        Update singular vectors if desired
-*
-         IF( NRU.GT.0 )
-     $      CALL DLASR( 'R', 'V', 'F', NRU, N, WORK( 1 ), WORK( N ), U,
-     $                  LDU )
-         IF( NCC.GT.0 )
-     $      CALL DLASR( 'L', 'V', 'F', N, NCC, WORK( 1 ), WORK( N ), C,
-     $                  LDC )
-      END IF
-*
-*     Compute singular values to relative accuracy TOL
-*     (By setting TOL to be negative, algorithm will compute
-*     singular values to absolute accuracy ABS(TOL)*norm(input matrix))
-*
-      TOLMUL = MAX( TEN, MIN( HNDRD, EPS**MEIGTH ) )
-      TOL = TOLMUL*EPS
-*
-*     Compute approximate maximum, minimum singular values
-*
-      SMAX = ZERO
-      DO 20 I = 1, N
-         SMAX = MAX( SMAX, ABS( D( I ) ) )
-   20 CONTINUE
-      DO 30 I = 1, N - 1
-         SMAX = MAX( SMAX, ABS( E( I ) ) )
-   30 CONTINUE
-      SMINL = ZERO
-      IF( TOL.GE.ZERO ) THEN
-*
-*        Relative accuracy desired
-*
-         SMINOA = ABS( D( 1 ) )
-         IF( SMINOA.EQ.ZERO )
-     $      GO TO 50
-         MU = SMINOA
-         DO 40 I = 2, N
-            MU = ABS( D( I ) )*( MU / ( MU+ABS( E( I-1 ) ) ) )
-            SMINOA = MIN( SMINOA, MU )
-            IF( SMINOA.EQ.ZERO )
-     $         GO TO 50
-   40    CONTINUE
-   50    CONTINUE
-         SMINOA = SMINOA / SQRT( DBLE( N ) )
-         THRESH = MAX( TOL*SMINOA, MAXITR*(N*(N*UNFL)) )
-      ELSE
-*
-*        Absolute accuracy desired
-*
-         THRESH = MAX( ABS( TOL )*SMAX, MAXITR*(N*(N*UNFL)) )
-      END IF
-*
-*     Prepare for main iteration loop for the singular values
-*     (MAXIT is the maximum number of passes through the inner
-*     loop permitted before nonconvergence signalled.)
-*
-      MAXITDIVN = MAXITR*N
-      ITERDIVN = 0
-      ITER = -1
-      OLDLL = -1
-      OLDM = -1
-*
-*     M points to last element of unconverged part of matrix
-*
-      M = N
-*
-*     Begin main iteration loop
-*
-   60 CONTINUE
-*
-*     Check for convergence or exceeding iteration count
-*
-      IF( M.LE.1 )
-     $   GO TO 160
-*
-      IF( ITER.GE.N ) THEN
-         ITER = ITER - N
-         ITERDIVN = ITERDIVN + 1
-         IF( ITERDIVN.GE.MAXITDIVN )
-     $      GO TO 200
-      END IF
-*
-*     Find diagonal block of matrix to work on
-*
-      IF( TOL.LT.ZERO .AND. ABS( D( M ) ).LE.THRESH )
-     $   D( M ) = ZERO
-      SMAX = ABS( D( M ) )
-      SMIN = SMAX
-      DO 70 LLL = 1, M - 1
-         LL = M - LLL
-         ABSS = ABS( D( LL ) )
-         ABSE = ABS( E( LL ) )
-         IF( TOL.LT.ZERO .AND. ABSS.LE.THRESH )
-     $      D( LL ) = ZERO
-         IF( ABSE.LE.THRESH )
-     $      GO TO 80
-         SMIN = MIN( SMIN, ABSS )
-         SMAX = MAX( SMAX, ABSS, ABSE )
-   70 CONTINUE
-      LL = 0
-      GO TO 90
-   80 CONTINUE
-      E( LL ) = ZERO
-*
-*     Matrix splits since E(LL) = 0
-*
-      IF( LL.EQ.M-1 ) THEN
-*
-*        Convergence of bottom singular value, return to top of loop
-*
-         M = M - 1
-         GO TO 60
-      END IF
-   90 CONTINUE
-      LL = LL + 1
-*
-*     E(LL) through E(M-1) are nonzero, E(LL-1) is zero
-*
-      IF( LL.EQ.M-1 ) THEN
-*
-*        2 by 2 block, handle separately
-*
-         CALL DLASV2( D( M-1 ), E( M-1 ), D( M ), SIGMN, SIGMX, SINR,
-     $                COSR, SINL, COSL )
-         D( M-1 ) = SIGMX
-         E( M-1 ) = ZERO
-         D( M ) = SIGMN
-*
-*        Compute singular vectors, if desired
-*
-         IF( NCVT.GT.0 )
-     $      CALL DROT( NCVT, VT( M-1, 1 ), LDVT, VT( M, 1 ), LDVT, COSR,
-     $                 SINR )
-         IF( NRU.GT.0 )
-     $      CALL DROT( NRU, U( 1, M-1 ), 1, U( 1, M ), 1, COSL, SINL )
-         IF( NCC.GT.0 )
-     $      CALL DROT( NCC, C( M-1, 1 ), LDC, C( M, 1 ), LDC, COSL,
-     $                 SINL )
-         M = M - 2
-         GO TO 60
-      END IF
-*
-*     If working on new submatrix, choose shift direction
-*     (from larger end diagonal element towards smaller)
-*
-      IF( LL.GT.OLDM .OR. M.LT.OLDLL ) THEN
-         IF( ABS( D( LL ) ).GE.ABS( D( M ) ) ) THEN
-*
-*           Chase bulge from top (big end) to bottom (small end)
-*
-            IDIR = 1
-         ELSE
-*
-*           Chase bulge from bottom (big end) to top (small end)
-*
-            IDIR = 2
-         END IF
-      END IF
-*
-*     Apply convergence tests
-*
-      IF( IDIR.EQ.1 ) THEN
-*
-*        Run convergence test in forward direction
-*        First apply standard test to bottom of matrix
-*
-         IF( ABS( E( M-1 ) ).LE.ABS( TOL )*ABS( D( M ) ) .OR.
-     $       ( TOL.LT.ZERO .AND. ABS( E( M-1 ) ).LE.THRESH ) ) THEN
-            E( M-1 ) = ZERO
-            GO TO 60
-         END IF
-*
-         IF( TOL.GE.ZERO ) THEN
-*
-*           If relative accuracy desired,
-*           apply convergence criterion forward
-*
-            MU = ABS( D( LL ) )
-            SMINL = MU
-            DO 100 LLL = LL, M - 1
-               IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN
-                  E( LLL ) = ZERO
-                  GO TO 60
-               END IF
-               MU = ABS( D( LLL+1 ) )*( MU / ( MU+ABS( E( LLL ) ) ) )
-               SMINL = MIN( SMINL, MU )
-  100       CONTINUE
-         END IF
-*
-      ELSE
-*
-*        Run convergence test in backward direction
-*        First apply standard test to top of matrix
-*
-         IF( ABS( E( LL ) ).LE.ABS( TOL )*ABS( D( LL ) ) .OR.
-     $       ( TOL.LT.ZERO .AND. ABS( E( LL ) ).LE.THRESH ) ) THEN
-            E( LL ) = ZERO
-            GO TO 60
-         END IF
-*
-         IF( TOL.GE.ZERO ) THEN
-*
-*           If relative accuracy desired,
-*           apply convergence criterion backward
-*
-            MU = ABS( D( M ) )
-            SMINL = MU
-            DO 110 LLL = M - 1, LL, -1
-               IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN
-                  E( LLL ) = ZERO
-                  GO TO 60
-               END IF
-               MU = ABS( D( LLL ) )*( MU / ( MU+ABS( E( LLL ) ) ) )
-               SMINL = MIN( SMINL, MU )
-  110       CONTINUE
-         END IF
-      END IF
-      OLDLL = LL
-      OLDM = M
-*
-*     Compute shift.  First, test if shifting would ruin relative
-*     accuracy, and if so set the shift to zero.
-*
-      IF( TOL.GE.ZERO .AND. N*TOL*( SMINL / SMAX ).LE.
-     $    MAX( EPS, HNDRTH*TOL ) ) THEN
-*
-*        Use a zero shift to avoid loss of relative accuracy
-*
-         SHIFT = ZERO
-      ELSE
-*
-*        Compute the shift from 2-by-2 block at end of matrix
-*
-         IF( IDIR.EQ.1 ) THEN
-            SLL = ABS( D( LL ) )
-            CALL DLAS2( D( M-1 ), E( M-1 ), D( M ), SHIFT, R )
-         ELSE
-            SLL = ABS( D( M ) )
-            CALL DLAS2( D( LL ), E( LL ), D( LL+1 ), SHIFT, R )
-         END IF
-*
-*        Test if shift negligible, and if so set to zero
-*
-         IF( SLL.GT.ZERO ) THEN
-            IF( ( SHIFT / SLL )**2.LT.EPS )
-     $         SHIFT = ZERO
-         END IF
-      END IF
-*
-*     Increment iteration count
-*
-      ITER = ITER + M - LL
-*
-*     If SHIFT = 0, do simplified QR iteration
-*
-      IF( SHIFT.EQ.ZERO ) THEN
-         IF( IDIR.EQ.1 ) THEN
-*
-*           Chase bulge from top to bottom
-*           Save cosines and sines for later singular vector updates
-*
-            CS = ONE
-            OLDCS = ONE
-            DO 120 I = LL, M - 1
-               CALL DLARTG( D( I )*CS, E( I ), CS, SN, R )
-               IF( I.GT.LL )
-     $            E( I-1 ) = OLDSN*R
-               CALL DLARTG( OLDCS*R, D( I+1 )*SN, OLDCS, OLDSN, D( I ) )
-               WORK( I-LL+1 ) = CS
-               WORK( I-LL+1+NM1 ) = SN
-               WORK( I-LL+1+NM12 ) = OLDCS
-               WORK( I-LL+1+NM13 ) = OLDSN
-  120       CONTINUE
-            H = D( M )*CS
-            D( M ) = H*OLDCS
-            E( M-1 ) = H*OLDSN
-*
-*           Update singular vectors
-*
-            IF( NCVT.GT.0 )
-     $         CALL DLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ),
-     $                     WORK( N ), VT( LL, 1 ), LDVT )
-            IF( NRU.GT.0 )
-     $         CALL DLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ),
-     $                     WORK( NM13+1 ), U( 1, LL ), LDU )
-            IF( NCC.GT.0 )
-     $         CALL DLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ),
-     $                     WORK( NM13+1 ), C( LL, 1 ), LDC )
-*
-*           Test convergence
-*
-            IF( ABS( E( M-1 ) ).LE.THRESH )
-     $         E( M-1 ) = ZERO
-*
-         ELSE
-*
-*           Chase bulge from bottom to top
-*           Save cosines and sines for later singular vector updates
-*
-            CS = ONE
-            OLDCS = ONE
-            DO 130 I = M, LL + 1, -1
-               CALL DLARTG( D( I )*CS, E( I-1 ), CS, SN, R )
-               IF( I.LT.M )
-     $            E( I ) = OLDSN*R
-               CALL DLARTG( OLDCS*R, D( I-1 )*SN, OLDCS, OLDSN, D( I ) )
-               WORK( I-LL ) = CS
-               WORK( I-LL+NM1 ) = -SN
-               WORK( I-LL+NM12 ) = OLDCS
-               WORK( I-LL+NM13 ) = -OLDSN
-  130       CONTINUE
-            H = D( LL )*CS
-            D( LL ) = H*OLDCS
-            E( LL ) = H*OLDSN
-*
-*           Update singular vectors
-*
-            IF( NCVT.GT.0 )
-     $         CALL DLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ),
-     $                     WORK( NM13+1 ), VT( LL, 1 ), LDVT )
-            IF( NRU.GT.0 )
-     $         CALL DLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ),
-     $                     WORK( N ), U( 1, LL ), LDU )
-            IF( NCC.GT.0 )
-     $         CALL DLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ),
-     $                     WORK( N ), C( LL, 1 ), LDC )
-*
-*           Test convergence
-*
-            IF( ABS( E( LL ) ).LE.THRESH )
-     $         E( LL ) = ZERO
-         END IF
-      ELSE
-*
-*        Use nonzero shift
-*
-         IF( IDIR.EQ.1 ) THEN
-*
-*           Chase bulge from top to bottom
-*           Save cosines and sines for later singular vector updates
-*
-            F = ( ABS( D( LL ) )-SHIFT )*
-     $          ( SIGN( ONE, D( LL ) )+SHIFT / D( LL ) )
-            G = E( LL )
-            DO 140 I = LL, M - 1
-               CALL DLARTG( F, G, COSR, SINR, R )
-               IF( I.GT.LL )
-     $            E( I-1 ) = R
-               F = COSR*D( I ) + SINR*E( I )
-               E( I ) = COSR*E( I ) - SINR*D( I )
-               G = SINR*D( I+1 )
-               D( I+1 ) = COSR*D( I+1 )
-               CALL DLARTG( F, G, COSL, SINL, R )
-               D( I ) = R
-               F = COSL*E( I ) + SINL*D( I+1 )
-               D( I+1 ) = COSL*D( I+1 ) - SINL*E( I )
-               IF( I.LT.M-1 ) THEN
-                  G = SINL*E( I+1 )
-                  E( I+1 ) = COSL*E( I+1 )
-               END IF
-               WORK( I-LL+1 ) = COSR
-               WORK( I-LL+1+NM1 ) = SINR
-               WORK( I-LL+1+NM12 ) = COSL
-               WORK( I-LL+1+NM13 ) = SINL
-  140       CONTINUE
-            E( M-1 ) = F
-*
-*           Update singular vectors
-*
-            IF( NCVT.GT.0 )
-     $         CALL DLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ),
-     $                     WORK( N ), VT( LL, 1 ), LDVT )
-            IF( NRU.GT.0 )
-     $         CALL DLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ),
-     $                     WORK( NM13+1 ), U( 1, LL ), LDU )
-            IF( NCC.GT.0 )
-     $         CALL DLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ),
-     $                     WORK( NM13+1 ), C( LL, 1 ), LDC )
-*
-*           Test convergence
-*
-            IF( ABS( E( M-1 ) ).LE.THRESH )
-     $         E( M-1 ) = ZERO
-*
-         ELSE
-*
-*           Chase bulge from bottom to top
-*           Save cosines and sines for later singular vector updates
-*
-            F = ( ABS( D( M ) )-SHIFT )*( SIGN( ONE, D( M ) )+SHIFT /
-     $          D( M ) )
-            G = E( M-1 )
-            DO 150 I = M, LL + 1, -1
-               CALL DLARTG( F, G, COSR, SINR, R )
-               IF( I.LT.M )
-     $            E( I ) = R
-               F = COSR*D( I ) + SINR*E( I-1 )
-               E( I-1 ) = COSR*E( I-1 ) - SINR*D( I )
-               G = SINR*D( I-1 )
-               D( I-1 ) = COSR*D( I-1 )
-               CALL DLARTG( F, G, COSL, SINL, R )
-               D( I ) = R
-               F = COSL*E( I-1 ) + SINL*D( I-1 )
-               D( I-1 ) = COSL*D( I-1 ) - SINL*E( I-1 )
-               IF( I.GT.LL+1 ) THEN
-                  G = SINL*E( I-2 )
-                  E( I-2 ) = COSL*E( I-2 )
-               END IF
-               WORK( I-LL ) = COSR
-               WORK( I-LL+NM1 ) = -SINR
-               WORK( I-LL+NM12 ) = COSL
-               WORK( I-LL+NM13 ) = -SINL
-  150       CONTINUE
-            E( LL ) = F
-*
-*           Test convergence
-*
-            IF( ABS( E( LL ) ).LE.THRESH )
-     $         E( LL ) = ZERO
-*
-*           Update singular vectors if desired
-*
-            IF( NCVT.GT.0 )
-     $         CALL DLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ),
-     $                     WORK( NM13+1 ), VT( LL, 1 ), LDVT )
-            IF( NRU.GT.0 )
-     $         CALL DLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ),
-     $                     WORK( N ), U( 1, LL ), LDU )
-            IF( NCC.GT.0 )
-     $         CALL DLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ),
-     $                     WORK( N ), C( LL, 1 ), LDC )
-         END IF
-      END IF
-*
-*     QR iteration finished, go back and check convergence
-*
-      GO TO 60
-*
-*     All singular values converged, so make them positive
-*
-  160 CONTINUE
-      DO 170 I = 1, N
-         IF( D( I ).LT.ZERO ) THEN
-            D( I ) = -D( I )
-*
-*           Change sign of singular vectors, if desired
-*
-            IF( NCVT.GT.0 )
-     $         CALL DSCAL( NCVT, NEGONE, VT( I, 1 ), LDVT )
-         END IF
-  170 CONTINUE
-*
-*     Sort the singular values into decreasing order (insertion sort on
-*     singular values, but only one transposition per singular vector)
-*
-      DO 190 I = 1, N - 1
-*
-*        Scan for smallest D(I)
-*
-         ISUB = 1
-         SMIN = D( 1 )
-         DO 180 J = 2, N + 1 - I
-            IF( D( J ).LE.SMIN ) THEN
-               ISUB = J
-               SMIN = D( J )
-            END IF
-  180    CONTINUE
-         IF( ISUB.NE.N+1-I ) THEN
-*
-*           Swap singular values and vectors
-*
-            D( ISUB ) = D( N+1-I )
-            D( N+1-I ) = SMIN
-            IF( NCVT.GT.0 )
-     $         CALL DSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( N+1-I, 1 ),
-     $                     LDVT )
-            IF( NRU.GT.0 )
-     $         CALL DSWAP( NRU, U( 1, ISUB ), 1, U( 1, N+1-I ), 1 )
-            IF( NCC.GT.0 )
-     $         CALL DSWAP( NCC, C( ISUB, 1 ), LDC, C( N+1-I, 1 ), LDC )
-         END IF
-  190 CONTINUE
-      GO TO 220
-*
-*     Maximum number of iterations exceeded, failure to converge
-*
-  200 CONTINUE
-      INFO = 0
-      DO 210 I = 1, N - 1
-         IF( E( I ).NE.ZERO )
-     $      INFO = INFO + 1
-  210 CONTINUE
-  220 CONTINUE
-      RETURN
-*
-*     End of DBDSQR
-*
-      END
--- a/lib/linalg/dcabs1.cpp
+++ b/lib/linalg/dcabs1.cpp
@ -0,0 +1,14 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+doublereal dcabs1_(doublecomplex *z__)
+{
+    doublereal ret_val, d__1, d__2;
+    double d_lmp_imag(doublecomplex *);
+    ret_val = (d__1 = z__->r, abs(d__1)) + (d__2 = d_lmp_imag(z__), abs(d__2));
+    return ret_val;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dcabs1.f
+++ b/lib/linalg/dcabs1.f
@ -1,66 +0,0 @@
-*> \brief \b DCABS1
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       DOUBLE PRECISION FUNCTION DCABS1(Z)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX*16 Z
-*       ..
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DCABS1 computes |Re(.)| + |Im(.)| of a double complex number
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] Z
-*> \verbatim
-*>          Z is COMPLEX*16
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup double_blas_level1
-*
-*  =====================================================================
-      DOUBLE PRECISION FUNCTION DCABS1(Z)
-*
-*  -- Reference BLAS level1 routine --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      COMPLEX*16 Z
-*     ..
-*     ..
-*  =====================================================================
-*
-*     .. Intrinsic Functions ..
-      INTRINSIC ABS,DBLE,DIMAG
-*
-      DCABS1 = ABS(DBLE(Z)) + ABS(DIMAG(Z))
-      RETURN
-*
-*     End of DCABS1
-*
-      END
--- a/lib/linalg/dcopy.cpp
+++ b/lib/linalg/dcopy.cpp
@ -0,0 +1,56 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+int dcopy_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy)
+{
+    integer i__1;
+    integer i__, m, ix, iy, mp1;
+    --dy;
+    --dx;
+    if (*n <= 0) {
+        return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+        m = *n % 7;
+        if (m != 0) {
+            i__1 = m;
+            for (i__ = 1; i__ <= i__1; ++i__) {
+                dy[i__] = dx[i__];
+            }
+            if (*n < 7) {
+                return 0;
+            }
+        }
+        mp1 = m + 1;
+        i__1 = *n;
+        for (i__ = mp1; i__ <= i__1; i__ += 7) {
+            dy[i__] = dx[i__];
+            dy[i__ + 1] = dx[i__ + 1];
+            dy[i__ + 2] = dx[i__ + 2];
+            dy[i__ + 3] = dx[i__ + 3];
+            dy[i__ + 4] = dx[i__ + 4];
+            dy[i__ + 5] = dx[i__ + 5];
+            dy[i__ + 6] = dx[i__ + 6];
+        }
+    } else {
+        ix = 1;
+        iy = 1;
+        if (*incx < 0) {
+            ix = (-(*n) + 1) * *incx + 1;
+        }
+        if (*incy < 0) {
+            iy = (-(*n) + 1) * *incy + 1;
+        }
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+            dy[iy] = dx[ix];
+            ix += *incx;
+            iy += *incy;
+        }
+    }
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dcopy.f
+++ b/lib/linalg/dcopy.f
@ -1,146 +0,0 @@
-*> \brief \b DCOPY
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DCOPY(N,DX,INCX,DY,INCY)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION DX(*),DY(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    DCOPY copies a vector, x, to a vector, y.
-*>    uses unrolled loops for increments equal to 1.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>         number of elements in input vector(s)
-*> \endverbatim
-*>
-*> \param[in] DX
-*> \verbatim
-*>          DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>         storage spacing between elements of DX
-*> \endverbatim
-*>
-*> \param[out] DY
-*> \verbatim
-*>          DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>         storage spacing between elements of DY
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup double_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DCOPY(N,DX,INCX,DY,INCY)
-*
-*  -- Reference BLAS level1 routine --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION DX(*),DY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      INTEGER I,IX,IY,M,MP1
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MOD
-*     ..
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*        code for both increments equal to 1
-*
-*
-*        clean-up loop
-*
-         M = MOD(N,7)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               DY(I) = DX(I)
-            END DO
-            IF (N.LT.7) RETURN
-         END IF
-         MP1 = M + 1
-         DO I = MP1,N,7
-            DY(I) = DX(I)
-            DY(I+1) = DX(I+1)
-            DY(I+2) = DX(I+2)
-            DY(I+3) = DX(I+3)
-            DY(I+4) = DX(I+4)
-            DY(I+5) = DX(I+5)
-            DY(I+6) = DX(I+6)
-         END DO
-      ELSE
-*
-*        code for unequal increments or equal increments
-*          not equal to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            DY(IY) = DX(IX)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-*
-*     End of DCOPY
-*
-      END
--- a/lib/linalg/ddot.cpp
+++ b/lib/linalg/ddot.cpp
@ -0,0 +1,58 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy)
+{
+    integer i__1;
+    doublereal ret_val;
+    integer i__, m, ix, iy, mp1;
+    doublereal dtemp;
+    --dy;
+    --dx;
+    ret_val = 0.;
+    dtemp = 0.;
+    if (*n <= 0) {
+        return ret_val;
+    }
+    if (*incx == 1 && *incy == 1) {
+        m = *n % 5;
+        if (m != 0) {
+            i__1 = m;
+            for (i__ = 1; i__ <= i__1; ++i__) {
+                dtemp += dx[i__] * dy[i__];
+            }
+            if (*n < 5) {
+                ret_val = dtemp;
+                return ret_val;
+            }
+        }
+        mp1 = m + 1;
+        i__1 = *n;
+        for (i__ = mp1; i__ <= i__1; i__ += 5) {
+            dtemp = dtemp + dx[i__] * dy[i__] + dx[i__ + 1] * dy[i__ + 1] +
+                    dx[i__ + 2] * dy[i__ + 2] + dx[i__ + 3] * dy[i__ + 3] +
+                    dx[i__ + 4] * dy[i__ + 4];
+        }
+    } else {
+        ix = 1;
+        iy = 1;
+        if (*incx < 0) {
+            ix = (-(*n) + 1) * *incx + 1;
+        }
+        if (*incy < 0) {
+            iy = (-(*n) + 1) * *incy + 1;
+        }
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+            dtemp += dx[ix] * dy[iy];
+            ix += *incx;
+            iy += *incy;
+        }
+    }
+    ret_val = dtemp;
+    return ret_val;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/ddot.f
+++ b/lib/linalg/ddot.f
@ -1,148 +0,0 @@
-*> \brief \b DDOT
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION DX(*),DY(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    DDOT forms the dot product of two vectors.
-*>    uses unrolled loops for increments equal to one.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>         number of elements in input vector(s)
-*> \endverbatim
-*>
-*> \param[in] DX
-*> \verbatim
-*>          DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>         storage spacing between elements of DX
-*> \endverbatim
-*>
-*> \param[in] DY
-*> \verbatim
-*>          DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>         storage spacing between elements of DY
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup double_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY)
-*
-*  -- Reference BLAS level1 routine --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION DX(*),DY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      DOUBLE PRECISION DTEMP
-      INTEGER I,IX,IY,M,MP1
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MOD
-*     ..
-      DDOT = 0.0d0
-      DTEMP = 0.0d0
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*        code for both increments equal to 1
-*
-*
-*        clean-up loop
-*
-         M = MOD(N,5)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               DTEMP = DTEMP + DX(I)*DY(I)
-            END DO
-            IF (N.LT.5) THEN
-               DDOT=DTEMP
-            RETURN
-            END IF
-         END IF
-         MP1 = M + 1
-         DO I = MP1,N,5
-          DTEMP = DTEMP + DX(I)*DY(I) + DX(I+1)*DY(I+1) +
-     $            DX(I+2)*DY(I+2) + DX(I+3)*DY(I+3) + DX(I+4)*DY(I+4)
-         END DO
-      ELSE
-*
-*        code for unequal increments or equal increments
-*          not equal to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            DTEMP = DTEMP + DX(IX)*DY(IY)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      DDOT = DTEMP
-      RETURN
-*
-*     End of DDOT
-*
-      END
--- a/lib/linalg/dgebd2.cpp
+++ b/lib/linalg/dgebd2.cpp
@ -0,0 +1,105 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+static integer c__1 = 1;
+int dgebd2_(integer *m, integer *n, doublereal *a, integer *lda, doublereal *d__, doublereal *e,
+            doublereal *tauq, doublereal *taup, doublereal *work, integer *info)
+{
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    integer i__;
+    extern int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *,
+                      doublereal *, integer *, doublereal *, ftnlen),
+        dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *),
+        xerbla_(char *, integer *, ftnlen);
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    --work;
+    *info = 0;
+    if (*m < 0) {
+        *info = -1;
+    } else if (*n < 0) {
+        *info = -2;
+    } else if (*lda < max(1, *m)) {
+        *info = -4;
+    }
+    if (*info < 0) {
+        i__1 = -(*info);
+        xerbla_((char *)"DGEBD2", &i__1, (ftnlen)6);
+        return 0;
+    }
+    if (*m >= *n) {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+            i__2 = *m - i__ + 1;
+            i__3 = i__ + 1;
+            dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3, *m) + i__ * a_dim1], &c__1,
+                    &tauq[i__]);
+            d__[i__] = a[i__ + i__ * a_dim1];
+            a[i__ + i__ * a_dim1] = 1.;
+            if (i__ < *n) {
+                i__2 = *m - i__ + 1;
+                i__3 = *n - i__;
+                dlarf_((char *)"Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tauq[i__],
+                       &a[i__ + (i__ + 1) * a_dim1], lda, &work[1], (ftnlen)4);
+            }
+            a[i__ + i__ * a_dim1] = d__[i__];
+            if (i__ < *n) {
+                i__2 = *n - i__;
+                i__3 = i__ + 2;
+                dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min(i__3, *n) * a_dim1], lda,
+                        &taup[i__]);
+                e[i__] = a[i__ + (i__ + 1) * a_dim1];
+                a[i__ + (i__ + 1) * a_dim1] = 1.;
+                i__2 = *m - i__;
+                i__3 = *n - i__;
+                dlarf_((char *)"Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1], lda, &taup[i__],
+                       &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1], (ftnlen)5);
+                a[i__ + (i__ + 1) * a_dim1] = e[i__];
+            } else {
+                taup[i__] = 0.;
+            }
+        }
+    } else {
+        i__1 = *m;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+            i__2 = *n - i__ + 1;
+            i__3 = i__ + 1;
+            dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3, *n) * a_dim1], lda,
+                    &taup[i__]);
+            d__[i__] = a[i__ + i__ * a_dim1];
+            a[i__ + i__ * a_dim1] = 1.;
+            if (i__ < *m) {
+                i__2 = *m - i__;
+                i__3 = *n - i__ + 1;
+                dlarf_((char *)"Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[i__],
+                       &a[i__ + 1 + i__ * a_dim1], lda, &work[1], (ftnlen)5);
+            }
+            a[i__ + i__ * a_dim1] = d__[i__];
+            if (i__ < *m) {
+                i__2 = *m - i__;
+                i__3 = i__ + 2;
+                dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3, *m) + i__ * a_dim1], &c__1,
+                        &tauq[i__]);
+                e[i__] = a[i__ + 1 + i__ * a_dim1];
+                a[i__ + 1 + i__ * a_dim1] = 1.;
+                i__2 = *m - i__;
+                i__3 = *n - i__;
+                dlarf_((char *)"Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tauq[i__],
+                       &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1], (ftnlen)4);
+                a[i__ + 1 + i__ * a_dim1] = e[i__];
+            } else {
+                tauq[i__] = 0.;
+            }
+        }
+    }
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dgebd2.f
+++ b/lib/linalg/dgebd2.f
@ -1,317 +0,0 @@
-*> \brief \b DGEBD2 reduces a general matrix to bidiagonal form using an unblocked algorithm.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DGEBD2 + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgebd2.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgebd2.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgebd2.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO )
-*
-*       .. Scalar Arguments ..
-*       INTEGER            INFO, LDA, M, N
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAUP( * ),
-*      $                   TAUQ( * ), WORK( * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DGEBD2 reduces a real general m by n matrix A to upper or lower
-*> bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
-*>
-*> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>          The number of rows in the matrix A.  M >= 0.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The number of columns in the matrix A.  N >= 0.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array, dimension (LDA,N)
-*>          On entry, the m by n general matrix to be reduced.
-*>          On exit,
-*>          if m >= n, the diagonal and the first superdiagonal are
-*>            overwritten with the upper bidiagonal matrix B; the
-*>            elements below the diagonal, with the array TAUQ, represent
-*>            the orthogonal matrix Q as a product of elementary
-*>            reflectors, and the elements above the first superdiagonal,
-*>            with the array TAUP, represent the orthogonal matrix P as
-*>            a product of elementary reflectors;
-*>          if m < n, the diagonal and the first subdiagonal are
-*>            overwritten with the lower bidiagonal matrix B; the
-*>            elements below the first subdiagonal, with the array TAUQ,
-*>            represent the orthogonal matrix Q as a product of
-*>            elementary reflectors, and the elements above the diagonal,
-*>            with the array TAUP, represent the orthogonal matrix P as
-*>            a product of elementary reflectors.
-*>          See Further Details.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(1,M).
-*> \endverbatim
-*>
-*> \param[out] D
-*> \verbatim
-*>          D is DOUBLE PRECISION array, dimension (min(M,N))
-*>          The diagonal elements of the bidiagonal matrix B:
-*>          D(i) = A(i,i).
-*> \endverbatim
-*>
-*> \param[out] E
-*> \verbatim
-*>          E is DOUBLE PRECISION array, dimension (min(M,N)-1)
-*>          The off-diagonal elements of the bidiagonal matrix B:
-*>          if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
-*>          if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
-*> \endverbatim
-*>
-*> \param[out] TAUQ
-*> \verbatim
-*>          TAUQ is DOUBLE PRECISION array, dimension (min(M,N))
-*>          The scalar factors of the elementary reflectors which
-*>          represent the orthogonal matrix Q. See Further Details.
-*> \endverbatim
-*>
-*> \param[out] TAUP
-*> \verbatim
-*>          TAUP is DOUBLE PRECISION array, dimension (min(M,N))
-*>          The scalar factors of the elementary reflectors which
-*>          represent the orthogonal matrix P. See Further Details.
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is DOUBLE PRECISION array, dimension (max(M,N))
-*> \endverbatim
-*>
-*> \param[out] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>          = 0: successful exit.
-*>          < 0: if INFO = -i, the i-th argument had an illegal value.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup doubleGEcomputational
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  The matrices Q and P are represented as products of elementary
-*>  reflectors:
-*>
-*>  If m >= n,
-*>
-*>     Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
-*>
-*>  Each H(i) and G(i) has the form:
-*>
-*>     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T
-*>
-*>  where tauq and taup are real scalars, and v and u are real vectors;
-*>  v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
-*>  u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
-*>  tauq is stored in TAUQ(i) and taup in TAUP(i).
-*>
-*>  If m < n,
-*>
-*>     Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
-*>
-*>  Each H(i) and G(i) has the form:
-*>
-*>     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T
-*>
-*>  where tauq and taup are real scalars, and v and u are real vectors;
-*>  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
-*>  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
-*>  tauq is stored in TAUQ(i) and taup in TAUP(i).
-*>
-*>  The contents of A on exit are illustrated by the following examples:
-*>
-*>  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
-*>
-*>    (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
-*>    (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
-*>    (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
-*>    (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
-*>    (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
-*>    (  v1  v2  v3  v4  v5 )
-*>
-*>  where d and e denote diagonal and off-diagonal elements of B, vi
-*>  denotes an element of the vector defining H(i), and ui an element of
-*>  the vector defining G(i).
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO )
-*
-*  -- LAPACK computational routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      INTEGER            INFO, LDA, M, N
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAUP( * ),
-     $                   TAUQ( * ), WORK( * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION   ZERO, ONE
-      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           DLARF, DLARFG, XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          MAX, MIN
-*     ..
-*     .. Executable Statements ..
-*
-*     Test the input parameters
-*
-      INFO = 0
-      IF( M.LT.0 ) THEN
-         INFO = -1
-      ELSE IF( N.LT.0 ) THEN
-         INFO = -2
-      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
-         INFO = -4
-      END IF
-      IF( INFO.LT.0 ) THEN
-         CALL XERBLA( 'DGEBD2', -INFO )
-         RETURN
-      END IF
-*
-      IF( M.GE.N ) THEN
-*
-*        Reduce to upper bidiagonal form
-*
-         DO 10 I = 1, N
-*
-*           Generate elementary reflector H(i) to annihilate A(i+1:m,i)
-*
-            CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1,
-     $                   TAUQ( I ) )
-            D( I ) = A( I, I )
-            A( I, I ) = ONE
-*
-*           Apply H(i) to A(i:m,i+1:n) from the left
-*
-            IF( I.LT.N )
-     $         CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAUQ( I ),
-     $                     A( I, I+1 ), LDA, WORK )
-            A( I, I ) = D( I )
-*
-            IF( I.LT.N ) THEN
-*
-*              Generate elementary reflector G(i) to annihilate
-*              A(i,i+2:n)
-*
-               CALL DLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ),
-     $                      LDA, TAUP( I ) )
-               E( I ) = A( I, I+1 )
-               A( I, I+1 ) = ONE
-*
-*              Apply G(i) to A(i+1:m,i+1:n) from the right
-*
-               CALL DLARF( 'Right', M-I, N-I, A( I, I+1 ), LDA,
-     $                     TAUP( I ), A( I+1, I+1 ), LDA, WORK )
-               A( I, I+1 ) = E( I )
-            ELSE
-               TAUP( I ) = ZERO
-            END IF
-   10    CONTINUE
-      ELSE
-*
-*        Reduce to lower bidiagonal form
-*
-         DO 20 I = 1, M
-*
-*           Generate elementary reflector G(i) to annihilate A(i,i+1:n)
-*
-            CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA,
-     $                   TAUP( I ) )
-            D( I ) = A( I, I )
-            A( I, I ) = ONE
-*
-*           Apply G(i) to A(i+1:m,i:n) from the right
-*
-            IF( I.LT.M )
-     $         CALL DLARF( 'Right', M-I, N-I+1, A( I, I ), LDA,
-     $                     TAUP( I ), A( I+1, I ), LDA, WORK )
-            A( I, I ) = D( I )
-*
-            IF( I.LT.M ) THEN
-*
-*              Generate elementary reflector H(i) to annihilate
-*              A(i+2:m,i)
-*
-               CALL DLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1,
-     $                      TAUQ( I ) )
-               E( I ) = A( I+1, I )
-               A( I+1, I ) = ONE
-*
-*              Apply H(i) to A(i+1:m,i+1:n) from the left
-*
-               CALL DLARF( 'Left', M-I, N-I, A( I+1, I ), 1, TAUQ( I ),
-     $                     A( I+1, I+1 ), LDA, WORK )
-               A( I+1, I ) = E( I )
-            ELSE
-               TAUQ( I ) = ZERO
-            END IF
-   20    CONTINUE
-      END IF
-      RETURN
-*
-*     End of DGEBD2
-*
-      END
--- a/lib/linalg/dgebrd.cpp
+++ b/lib/linalg/dgebrd.cpp
@ -0,0 +1,129 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__3 = 3;
+static integer c__2 = 2;
+static doublereal c_b21 = -1.;
+static doublereal c_b22 = 1.;
+int dgebrd_(integer *m, integer *n, doublereal *a, integer *lda, doublereal *d__, doublereal *e,
+            doublereal *tauq, doublereal *taup, doublereal *work, integer *lwork, integer *info)
+{
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    integer i__, j, nb, nx, ws;
+    extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
+                      integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
+                      ftnlen, ftnlen);
+    integer nbmin, iinfo, minmn;
+    extern int dgebd2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
+                       doublereal *, doublereal *, doublereal *, integer *),
+        dlabrd_(integer *, integer *, integer *, doublereal *, integer *, doublereal *,
+                doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *,
+                integer *),
+        xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
+                           ftnlen, ftnlen);
+    integer ldwrkx, ldwrky, lwkopt;
+    logical lquery;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    --work;
+    *info = 0;
+    i__1 = 1, i__2 = ilaenv_(&c__1, (char *)"DGEBRD", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+    nb = max(i__1, i__2);
+    lwkopt = (*m + *n) * nb;
+    work[1] = (doublereal)lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+        *info = -1;
+    } else if (*n < 0) {
+        *info = -2;
+    } else if (*lda < max(1, *m)) {
+        *info = -4;
+    } else {
+        i__1 = max(1, *m);
+        if (*lwork < max(i__1, *n) && !lquery) {
+            *info = -10;
+        }
+    }
+    if (*info < 0) {
+        i__1 = -(*info);
+        xerbla_((char *)"DGEBRD", &i__1, (ftnlen)6);
+        return 0;
+    } else if (lquery) {
+        return 0;
+    }
+    minmn = min(*m, *n);
+    if (minmn == 0) {
+        work[1] = 1.;
+        return 0;
+    }
+    ws = max(*m, *n);
+    ldwrkx = *m;
+    ldwrky = *n;
+    if (nb > 1 && nb < minmn) {
+        i__1 = nb, i__2 = ilaenv_(&c__3, (char *)"DGEBRD", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+        nx = max(i__1, i__2);
+        if (nx < minmn) {
+            ws = (*m + *n) * nb;
+            if (*lwork < ws) {
+                nbmin = ilaenv_(&c__2, (char *)"DGEBRD", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+                if (*lwork >= (*m + *n) * nbmin) {
+                    nb = *lwork / (*m + *n);
+                } else {
+                    nb = 1;
+                    nx = minmn;
+                }
+            }
+        }
+    } else {
+        nx = minmn;
+    }
+    i__1 = minmn - nx;
+    i__2 = nb;
+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+        i__3 = *m - i__ + 1;
+        i__4 = *n - i__ + 1;
+        dlabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], &tauq[i__],
+                &taup[i__], &work[1], &ldwrkx, &work[ldwrkx * nb + 1], &ldwrky);
+        i__3 = *m - i__ - nb + 1;
+        i__4 = *n - i__ - nb + 1;
+        dgemm_((char *)"No transpose", (char *)"Transpose", &i__3, &i__4, &nb, &c_b21, &a[i__ + nb + i__ * a_dim1],
+               lda, &work[ldwrkx * nb + nb + 1], &ldwrky, &c_b22,
+               &a[i__ + nb + (i__ + nb) * a_dim1], lda, (ftnlen)12, (ftnlen)9);
+        i__3 = *m - i__ - nb + 1;
+        i__4 = *n - i__ - nb + 1;
+        dgemm_((char *)"No transpose", (char *)"No transpose", &i__3, &i__4, &nb, &c_b21, &work[nb + 1], &ldwrkx,
+               &a[i__ + (i__ + nb) * a_dim1], lda, &c_b22, &a[i__ + nb + (i__ + nb) * a_dim1], lda,
+               (ftnlen)12, (ftnlen)12);
+        if (*m >= *n) {
+            i__3 = i__ + nb - 1;
+            for (j = i__; j <= i__3; ++j) {
+                a[j + j * a_dim1] = d__[j];
+                a[j + (j + 1) * a_dim1] = e[j];
+            }
+        } else {
+            i__3 = i__ + nb - 1;
+            for (j = i__; j <= i__3; ++j) {
+                a[j + j * a_dim1] = d__[j];
+                a[j + 1 + j * a_dim1] = e[j];
+            }
+        }
+    }
+    i__2 = *m - i__ + 1;
+    i__1 = *n - i__ + 1;
+    dgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], &tauq[i__], &taup[i__],
+            &work[1], &iinfo);
+    work[1] = (doublereal)ws;
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dgebrd.f
+++ b/lib/linalg/dgebrd.f
@ -1,349 +0,0 @@
-*> \brief \b DGEBRD
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DGEBRD + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgebrd.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgebrd.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgebrd.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK,
-*                          INFO )
-*
-*       .. Scalar Arguments ..
-*       INTEGER            INFO, LDA, LWORK, M, N
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAUP( * ),
-*      $                   TAUQ( * ), WORK( * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DGEBRD reduces a general real M-by-N matrix A to upper or lower
-*> bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
-*>
-*> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>          The number of rows in the matrix A.  M >= 0.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The number of columns in the matrix A.  N >= 0.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array, dimension (LDA,N)
-*>          On entry, the M-by-N general matrix to be reduced.
-*>          On exit,
-*>          if m >= n, the diagonal and the first superdiagonal are
-*>            overwritten with the upper bidiagonal matrix B; the
-*>            elements below the diagonal, with the array TAUQ, represent
-*>            the orthogonal matrix Q as a product of elementary
-*>            reflectors, and the elements above the first superdiagonal,
-*>            with the array TAUP, represent the orthogonal matrix P as
-*>            a product of elementary reflectors;
-*>          if m < n, the diagonal and the first subdiagonal are
-*>            overwritten with the lower bidiagonal matrix B; the
-*>            elements below the first subdiagonal, with the array TAUQ,
-*>            represent the orthogonal matrix Q as a product of
-*>            elementary reflectors, and the elements above the diagonal,
-*>            with the array TAUP, represent the orthogonal matrix P as
-*>            a product of elementary reflectors.
-*>          See Further Details.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(1,M).
-*> \endverbatim
-*>
-*> \param[out] D
-*> \verbatim
-*>          D is DOUBLE PRECISION array, dimension (min(M,N))
-*>          The diagonal elements of the bidiagonal matrix B:
-*>          D(i) = A(i,i).
-*> \endverbatim
-*>
-*> \param[out] E
-*> \verbatim
-*>          E is DOUBLE PRECISION array, dimension (min(M,N)-1)
-*>          The off-diagonal elements of the bidiagonal matrix B:
-*>          if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
-*>          if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
-*> \endverbatim
-*>
-*> \param[out] TAUQ
-*> \verbatim
-*>          TAUQ is DOUBLE PRECISION array, dimension (min(M,N))
-*>          The scalar factors of the elementary reflectors which
-*>          represent the orthogonal matrix Q. See Further Details.
-*> \endverbatim
-*>
-*> \param[out] TAUP
-*> \verbatim
-*>          TAUP is DOUBLE PRECISION array, dimension (min(M,N))
-*>          The scalar factors of the elementary reflectors which
-*>          represent the orthogonal matrix P. See Further Details.
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
-*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-*> \endverbatim
-*>
-*> \param[in] LWORK
-*> \verbatim
-*>          LWORK is INTEGER
-*>          The length of the array WORK.  LWORK >= max(1,M,N).
-*>          For optimum performance LWORK >= (M+N)*NB, where NB
-*>          is the optimal blocksize.
-*>
-*>          If LWORK = -1, then a workspace query is assumed; the routine
-*>          only calculates the optimal size of the WORK array, returns
-*>          this value as the first entry of the WORK array, and no error
-*>          message related to LWORK is issued by XERBLA.
-*> \endverbatim
-*>
-*> \param[out] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>          = 0:  successful exit
-*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup doubleGEcomputational
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  The matrices Q and P are represented as products of elementary
-*>  reflectors:
-*>
-*>  If m >= n,
-*>
-*>     Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
-*>
-*>  Each H(i) and G(i) has the form:
-*>
-*>     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T
-*>
-*>  where tauq and taup are real scalars, and v and u are real vectors;
-*>  v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
-*>  u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
-*>  tauq is stored in TAUQ(i) and taup in TAUP(i).
-*>
-*>  If m < n,
-*>
-*>     Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
-*>
-*>  Each H(i) and G(i) has the form:
-*>
-*>     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T
-*>
-*>  where tauq and taup are real scalars, and v and u are real vectors;
-*>  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
-*>  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
-*>  tauq is stored in TAUQ(i) and taup in TAUP(i).
-*>
-*>  The contents of A on exit are illustrated by the following examples:
-*>
-*>  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
-*>
-*>    (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
-*>    (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
-*>    (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
-*>    (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
-*>    (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
-*>    (  v1  v2  v3  v4  v5 )
-*>
-*>  where d and e denote diagonal and off-diagonal elements of B, vi
-*>  denotes an element of the vector defining H(i), and ui an element of
-*>  the vector defining G(i).
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK,
-     $                   INFO )
-*
-*  -- LAPACK computational routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      INTEGER            INFO, LDA, LWORK, M, N
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAUP( * ),
-     $                   TAUQ( * ), WORK( * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION   ONE
-      PARAMETER          ( ONE = 1.0D+0 )
-*     ..
-*     .. Local Scalars ..
-      LOGICAL            LQUERY
-      INTEGER            I, IINFO, J, LDWRKX, LDWRKY, LWKOPT, MINMN, NB,
-     $                   NBMIN, NX, WS
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           DGEBD2, DGEMM, DLABRD, XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          DBLE, MAX, MIN
-*     ..
-*     .. External Functions ..
-      INTEGER            ILAENV
-      EXTERNAL           ILAENV
-*     ..
-*     .. Executable Statements ..
-*
-*     Test the input parameters
-*
-      INFO = 0
-      NB = MAX( 1, ILAENV( 1, 'DGEBRD', ' ', M, N, -1, -1 ) )
-      LWKOPT = ( M+N )*NB
-      WORK( 1 ) = DBLE( LWKOPT )
-      LQUERY = ( LWORK.EQ.-1 )
-      IF( M.LT.0 ) THEN
-         INFO = -1
-      ELSE IF( N.LT.0 ) THEN
-         INFO = -2
-      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
-         INFO = -4
-      ELSE IF( LWORK.LT.MAX( 1, M, N ) .AND. .NOT.LQUERY ) THEN
-         INFO = -10
-      END IF
-      IF( INFO.LT.0 ) THEN
-         CALL XERBLA( 'DGEBRD', -INFO )
-         RETURN
-      ELSE IF( LQUERY ) THEN
-         RETURN
-      END IF
-*
-*     Quick return if possible
-*
-      MINMN = MIN( M, N )
-      IF( MINMN.EQ.0 ) THEN
-         WORK( 1 ) = 1
-         RETURN
-      END IF
-*
-      WS = MAX( M, N )
-      LDWRKX = M
-      LDWRKY = N
-*
-      IF( NB.GT.1 .AND. NB.LT.MINMN ) THEN
-*
-*        Set the crossover point NX.
-*
-         NX = MAX( NB, ILAENV( 3, 'DGEBRD', ' ', M, N, -1, -1 ) )
-*
-*        Determine when to switch from blocked to unblocked code.
-*
-         IF( NX.LT.MINMN ) THEN
-            WS = ( M+N )*NB
-            IF( LWORK.LT.WS ) THEN
-*
-*              Not enough work space for the optimal NB, consider using
-*              a smaller block size.
-*
-               NBMIN = ILAENV( 2, 'DGEBRD', ' ', M, N, -1, -1 )
-               IF( LWORK.GE.( M+N )*NBMIN ) THEN
-                  NB = LWORK / ( M+N )
-               ELSE
-                  NB = 1
-                  NX = MINMN
-               END IF
-            END IF
-         END IF
-      ELSE
-         NX = MINMN
-      END IF
-*
-      DO 30 I = 1, MINMN - NX, NB
-*
-*        Reduce rows and columns i:i+nb-1 to bidiagonal form and return
-*        the matrices X and Y which are needed to update the unreduced
-*        part of the matrix
-*
-         CALL DLABRD( M-I+1, N-I+1, NB, A( I, I ), LDA, D( I ), E( I ),
-     $                TAUQ( I ), TAUP( I ), WORK, LDWRKX,
-     $                WORK( LDWRKX*NB+1 ), LDWRKY )
-*
-*        Update the trailing submatrix A(i+nb:m,i+nb:n), using an update
-*        of the form  A := A - V*Y**T - X*U**T
-*
-         CALL DGEMM( 'No transpose', 'Transpose', M-I-NB+1, N-I-NB+1,
-     $               NB, -ONE, A( I+NB, I ), LDA,
-     $               WORK( LDWRKX*NB+NB+1 ), LDWRKY, ONE,
-     $               A( I+NB, I+NB ), LDA )
-         CALL DGEMM( 'No transpose', 'No transpose', M-I-NB+1, N-I-NB+1,
-     $               NB, -ONE, WORK( NB+1 ), LDWRKX, A( I, I+NB ), LDA,
-     $               ONE, A( I+NB, I+NB ), LDA )
-*
-*        Copy diagonal and off-diagonal elements of B back into A
-*
-         IF( M.GE.N ) THEN
-            DO 10 J = I, I + NB - 1
-               A( J, J ) = D( J )
-               A( J, J+1 ) = E( J )
-   10       CONTINUE
-         ELSE
-            DO 20 J = I, I + NB - 1
-               A( J, J ) = D( J )
-               A( J+1, J ) = E( J )
-   20       CONTINUE
-         END IF
-   30 CONTINUE
-*
-*     Use unblocked code to reduce the remainder of the matrix
-*
-      CALL DGEBD2( M-I+1, N-I+1, A( I, I ), LDA, D( I ), E( I ),
-     $             TAUQ( I ), TAUP( I ), WORK, IINFO )
-      WORK( 1 ) = WS
-      RETURN
-*
-*     End of DGEBRD
-*
-      END
--- a/lib/linalg/dgecon.cpp
+++ b/lib/linalg/dgecon.cpp
@ -0,0 +1,101 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+static integer c__1 = 1;
+int dgecon_(char *norm, integer *n, doublereal *a, integer *lda, doublereal *anorm,
+            doublereal *rcond, doublereal *work, integer *iwork, integer *info, ftnlen norm_len)
+{
+    integer a_dim1, a_offset, i__1;
+    doublereal d__1;
+    doublereal sl;
+    integer ix;
+    doublereal su;
+    integer kase, kase1;
+    doublereal scale;
+    extern logical lsame_(char *, char *, ftnlen, ftnlen);
+    integer isave[3];
+    extern int drscl_(integer *, doublereal *, doublereal *, integer *),
+        dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *,
+                integer *);
+    extern doublereal dlamch_(char *, ftnlen);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern int xerbla_(char *, integer *, ftnlen);
+    doublereal ainvnm;
+    extern int dlatrs_(char *, char *, char *, char *, integer *, doublereal *, integer *,
+                       doublereal *, doublereal *, doublereal *, integer *, ftnlen, ftnlen, ftnlen,
+                       ftnlen);
+    logical onenrm;
+    char normin[1];
+    doublereal smlnum;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --work;
+    --iwork;
+    *info = 0;
+    onenrm = *(unsigned char *)norm == '1' || lsame_(norm, (char *)"O", (ftnlen)1, (ftnlen)1);
+    if (!onenrm && !lsame_(norm, (char *)"I", (ftnlen)1, (ftnlen)1)) {
+        *info = -1;
+    } else if (*n < 0) {
+        *info = -2;
+    } else if (*lda < max(1, *n)) {
+        *info = -4;
+    } else if (*anorm < 0.) {
+        *info = -5;
+    }
+    if (*info != 0) {
+        i__1 = -(*info);
+        xerbla_((char *)"DGECON", &i__1, (ftnlen)6);
+        return 0;
+    }
+    *rcond = 0.;
+    if (*n == 0) {
+        *rcond = 1.;
+        return 0;
+    } else if (*anorm == 0.) {
+        return 0;
+    }
+    smlnum = dlamch_((char *)"Safe minimum", (ftnlen)12);
+    ainvnm = 0.;
+    *(unsigned char *)normin = 'N';
+    if (onenrm) {
+        kase1 = 1;
+    } else {
+        kase1 = 2;
+    }
+    kase = 0;
+L10:
+    dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave);
+    if (kase != 0) {
+        if (kase == kase1) {
+            dlatrs_((char *)"Lower", (char *)"No transpose", (char *)"Unit", normin, n, &a[a_offset], lda, &work[1], &sl,
+                    &work[(*n << 1) + 1], info, (ftnlen)5, (ftnlen)12, (ftnlen)4, (ftnlen)1);
+            dlatrs_((char *)"Upper", (char *)"No transpose", (char *)"Non-unit", normin, n, &a[a_offset], lda, &work[1],
+                    &su, &work[*n * 3 + 1], info, (ftnlen)5, (ftnlen)12, (ftnlen)8, (ftnlen)1);
+        } else {
+            dlatrs_((char *)"Upper", (char *)"Transpose", (char *)"Non-unit", normin, n, &a[a_offset], lda, &work[1], &su,
+                    &work[*n * 3 + 1], info, (ftnlen)5, (ftnlen)9, (ftnlen)8, (ftnlen)1);
+            dlatrs_((char *)"Lower", (char *)"Transpose", (char *)"Unit", normin, n, &a[a_offset], lda, &work[1], &sl,
+                    &work[(*n << 1) + 1], info, (ftnlen)5, (ftnlen)9, (ftnlen)4, (ftnlen)1);
+        }
+        scale = sl * su;
+        *(unsigned char *)normin = 'Y';
+        if (scale != 1.) {
+            ix = idamax_(n, &work[1], &c__1);
+            if (scale < (d__1 = work[ix], abs(d__1)) * smlnum || scale == 0.) {
+                goto L20;
+            }
+            drscl_(n, &scale, &work[1], &c__1);
+        }
+        goto L10;
+    }
+    if (ainvnm != 0.) {
+        *rcond = 1. / ainvnm / *anorm;
+    }
+L20:
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dgecon.f
+++ b/lib/linalg/dgecon.f
@ -1,258 +0,0 @@
-*> \brief \b DGECON
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DGECON + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgecon.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgecon.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgecon.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK,
-*                          INFO )
-*
-*       .. Scalar Arguments ..
-*       CHARACTER          NORM
-*       INTEGER            INFO, LDA, N
-*       DOUBLE PRECISION   ANORM, RCOND
-*       ..
-*       .. Array Arguments ..
-*       INTEGER            IWORK( * )
-*       DOUBLE PRECISION   A( LDA, * ), WORK( * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DGECON estimates the reciprocal of the condition number of a general
-*> real matrix A, in either the 1-norm or the infinity-norm, using
-*> the LU factorization computed by DGETRF.
-*>
-*> An estimate is obtained for norm(inv(A)), and the reciprocal of the
-*> condition number is computed as
-*>    RCOND = 1 / ( norm(A) * norm(inv(A)) ).
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies whether the 1-norm condition number or the
-*>          infinity-norm condition number is required:
-*>          = '1' or 'O':  1-norm;
-*>          = 'I':         Infinity-norm.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix A.  N >= 0.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array, dimension (LDA,N)
-*>          The factors L and U from the factorization A = P*L*U
-*>          as computed by DGETRF.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(1,N).
-*> \endverbatim
-*>
-*> \param[in] ANORM
-*> \verbatim
-*>          ANORM is DOUBLE PRECISION
-*>          If NORM = '1' or 'O', the 1-norm of the original matrix A.
-*>          If NORM = 'I', the infinity-norm of the original matrix A.
-*> \endverbatim
-*>
-*> \param[out] RCOND
-*> \verbatim
-*>          RCOND is DOUBLE PRECISION
-*>          The reciprocal of the condition number of the matrix A,
-*>          computed as RCOND = 1/(norm(A) * norm(inv(A))).
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is DOUBLE PRECISION array, dimension (4*N)
-*> \endverbatim
-*>
-*> \param[out] IWORK
-*> \verbatim
-*>          IWORK is INTEGER array, dimension (N)
-*> \endverbatim
-*>
-*> \param[out] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>          = 0:  successful exit
-*>          < 0:  if INFO = -i, the i-th argument had an illegal value
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup doubleGEcomputational
-*
-*  =====================================================================
-      SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK,
-     $                   INFO )
-*
-*  -- LAPACK computational routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      CHARACTER          NORM
-      INTEGER            INFO, LDA, N
-      DOUBLE PRECISION   ANORM, RCOND
-*     ..
-*     .. Array Arguments ..
-      INTEGER            IWORK( * )
-      DOUBLE PRECISION   A( LDA, * ), WORK( * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION   ONE, ZERO
-      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
-*     ..
-*     .. Local Scalars ..
-      LOGICAL            ONENRM
-      CHARACTER          NORMIN
-      INTEGER            IX, KASE, KASE1
-      DOUBLE PRECISION   AINVNM, SCALE, SL, SMLNUM, SU
-*     ..
-*     .. Local Arrays ..
-      INTEGER            ISAVE( 3 )
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME
-      INTEGER            IDAMAX
-      DOUBLE PRECISION   DLAMCH
-      EXTERNAL           LSAME, IDAMAX, DLAMCH
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           DLACN2, DLATRS, DRSCL, XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, MAX
-*     ..
-*     .. Executable Statements ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
-      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
-         INFO = -1
-      ELSE IF( N.LT.0 ) THEN
-         INFO = -2
-      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
-         INFO = -4
-      ELSE IF( ANORM.LT.ZERO ) THEN
-         INFO = -5
-      END IF
-      IF( INFO.NE.0 ) THEN
-         CALL XERBLA( 'DGECON', -INFO )
-         RETURN
-      END IF
-*
-*     Quick return if possible
-*
-      RCOND = ZERO
-      IF( N.EQ.0 ) THEN
-         RCOND = ONE
-         RETURN
-      ELSE IF( ANORM.EQ.ZERO ) THEN
-         RETURN
-      END IF
-*
-      SMLNUM = DLAMCH( 'Safe minimum' )
-*
-*     Estimate the norm of inv(A).
-*
-      AINVNM = ZERO
-      NORMIN = 'N'
-      IF( ONENRM ) THEN
-         KASE1 = 1
-      ELSE
-         KASE1 = 2
-      END IF
-      KASE = 0
-   10 CONTINUE
-      CALL DLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE )
-      IF( KASE.NE.0 ) THEN
-         IF( KASE.EQ.KASE1 ) THEN
-*
-*           Multiply by inv(L).
-*
-            CALL DLATRS( 'Lower', 'No transpose', 'Unit', NORMIN, N, A,
-     $                   LDA, WORK, SL, WORK( 2*N+1 ), INFO )
-*
-*           Multiply by inv(U).
-*
-            CALL DLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N,
-     $                   A, LDA, WORK, SU, WORK( 3*N+1 ), INFO )
-         ELSE
-*
-*           Multiply by inv(U**T).
-*
-            CALL DLATRS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, A,
-     $                   LDA, WORK, SU, WORK( 3*N+1 ), INFO )
-*
-*           Multiply by inv(L**T).
-*
-            CALL DLATRS( 'Lower', 'Transpose', 'Unit', NORMIN, N, A,
-     $                   LDA, WORK, SL, WORK( 2*N+1 ), INFO )
-         END IF
-*
-*        Divide X by 1/(SL*SU) if doing so will not cause overflow.
-*
-         SCALE = SL*SU
-         NORMIN = 'Y'
-         IF( SCALE.NE.ONE ) THEN
-            IX = IDAMAX( N, WORK, 1 )
-            IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO )
-     $         GO TO 20
-            CALL DRSCL( N, SCALE, WORK, 1 )
-         END IF
-         GO TO 10
-      END IF
-*
-*     Compute the estimate of the reciprocal condition number.
-*
-      IF( AINVNM.NE.ZERO )
-     $   RCOND = ( ONE / AINVNM ) / ANORM
-*
-   20 CONTINUE
-      RETURN
-*
-*     End of DGECON
-*
-      END
--- a/lib/linalg/dgelq2.cpp
+++ b/lib/linalg/dgelq2.cpp
@ -0,0 +1,53 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+int dgelq2_(integer *m, integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal *work,
+            integer *info)
+{
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    integer i__, k;
+    doublereal aii;
+    extern int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *,
+                      doublereal *, integer *, doublereal *, ftnlen),
+        dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *),
+        xerbla_(char *, integer *, ftnlen);
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+    *info = 0;
+    if (*m < 0) {
+        *info = -1;
+    } else if (*n < 0) {
+        *info = -2;
+    } else if (*lda < max(1, *m)) {
+        *info = -4;
+    }
+    if (*info != 0) {
+        i__1 = -(*info);
+        xerbla_((char *)"DGELQ2", &i__1, (ftnlen)6);
+        return 0;
+    }
+    k = min(*m, *n);
+    i__1 = k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+        i__2 = *n - i__ + 1;
+        i__3 = i__ + 1;
+        dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3, *n) * a_dim1], lda, &tau[i__]);
+        if (i__ < *m) {
+            aii = a[i__ + i__ * a_dim1];
+            a[i__ + i__ * a_dim1] = 1.;
+            i__2 = *m - i__;
+            i__3 = *n - i__ + 1;
+            dlarf_((char *)"Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__],
+                   &a[i__ + 1 + i__ * a_dim1], lda, &work[1], (ftnlen)5);
+            a[i__ + i__ * a_dim1] = aii;
+        }
+    }
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dgelq2.f
+++ b/lib/linalg/dgelq2.f
@ -1,197 +0,0 @@
-*> \brief \b DGELQ2 computes the LQ factorization of a general rectangular matrix using an unblocked algorithm.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DGELQ2 + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelq2.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelq2.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelq2.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DGELQ2( M, N, A, LDA, TAU, WORK, INFO )
-*
-*       .. Scalar Arguments ..
-*       INTEGER            INFO, LDA, M, N
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DGELQ2 computes an LQ factorization of a real m-by-n matrix A:
-*>
-*>    A = ( L 0 ) *  Q
-*>
-*> where:
-*>
-*>    Q is a n-by-n orthogonal matrix;
-*>    L is a lower-triangular m-by-m matrix;
-*>    0 is a m-by-(n-m) zero matrix, if m < n.
-*>
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>          The number of rows of the matrix A.  M >= 0.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The number of columns of the matrix A.  N >= 0.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array, dimension (LDA,N)
-*>          On entry, the m by n matrix A.
-*>          On exit, the elements on and below the diagonal of the array
-*>          contain the m by min(m,n) lower trapezoidal matrix L (L is
-*>          lower triangular if m <= n); the elements above the diagonal,
-*>          with the array TAU, represent the orthogonal matrix Q as a
-*>          product of elementary reflectors (see Further Details).
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(1,M).
-*> \endverbatim
-*>
-*> \param[out] TAU
-*> \verbatim
-*>          TAU is DOUBLE PRECISION array, dimension (min(M,N))
-*>          The scalar factors of the elementary reflectors (see Further
-*>          Details).
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is DOUBLE PRECISION array, dimension (M)
-*> \endverbatim
-*>
-*> \param[out] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>          = 0: successful exit
-*>          < 0: if INFO = -i, the i-th argument had an illegal value
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup doubleGEcomputational
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  The matrix Q is represented as a product of elementary reflectors
-*>
-*>     Q = H(k) . . . H(2) H(1), where k = min(m,n).
-*>
-*>  Each H(i) has the form
-*>
-*>     H(i) = I - tau * v * v**T
-*>
-*>  where tau is a real scalar, and v is a real vector with
-*>  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
-*>  and tau in TAU(i).
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DGELQ2( M, N, A, LDA, TAU, WORK, INFO )
-*
-*  -- LAPACK computational routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      INTEGER            INFO, LDA, M, N
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION   ONE
-      PARAMETER          ( ONE = 1.0D+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I, K
-      DOUBLE PRECISION   AII
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           DLARF, DLARFG, XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          MAX, MIN
-*     ..
-*     .. Executable Statements ..
-*
-*     Test the input arguments
-*
-      INFO = 0
-      IF( M.LT.0 ) THEN
-         INFO = -1
-      ELSE IF( N.LT.0 ) THEN
-         INFO = -2
-      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
-         INFO = -4
-      END IF
-      IF( INFO.NE.0 ) THEN
-         CALL XERBLA( 'DGELQ2', -INFO )
-         RETURN
-      END IF
-*
-      K = MIN( M, N )
-*
-      DO 10 I = 1, K
-*
-*        Generate elementary reflector H(i) to annihilate A(i,i+1:n)
-*
-         CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA,
-     $                TAU( I ) )
-         IF( I.LT.M ) THEN
-*
-*           Apply H(i) to A(i+1:m,i:n) from the right
-*
-            AII = A( I, I )
-            A( I, I ) = ONE
-            CALL DLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAU( I ),
-     $                  A( I+1, I ), LDA, WORK )
-            A( I, I ) = AII
-         END IF
-   10 CONTINUE
-      RETURN
-*
-*     End of DGELQ2
-*
-      END
--- a/lib/linalg/dgelqf.cpp
+++ b/lib/linalg/dgelqf.cpp
@ -0,0 +1,106 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__3 = 3;
+static integer c__2 = 2;
+int dgelqf_(integer *m, integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal *work,
+            integer *lwork, integer *info)
+{
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
+    extern int dgelq2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
+                       integer *),
+        dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *,
+                integer *, doublereal *, integer *, doublereal *, integer *, doublereal *,
+                integer *, ftnlen, ftnlen, ftnlen, ftnlen),
+        dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *,
+                doublereal *, integer *, ftnlen, ftnlen),
+        xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
+                           ftnlen, ftnlen);
+    integer ldwork, lwkopt;
+    logical lquery;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+    *info = 0;
+    nb = ilaenv_(&c__1, (char *)"DGELQF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+    lwkopt = *m * nb;
+    work[1] = (doublereal)lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+        *info = -1;
+    } else if (*n < 0) {
+        *info = -2;
+    } else if (*lda < max(1, *m)) {
+        *info = -4;
+    } else if (*lwork < max(1, *m) && !lquery) {
+        *info = -7;
+    }
+    if (*info != 0) {
+        i__1 = -(*info);
+        xerbla_((char *)"DGELQF", &i__1, (ftnlen)6);
+        return 0;
+    } else if (lquery) {
+        return 0;
+    }
+    k = min(*m, *n);
+    if (k == 0) {
+        work[1] = 1.;
+        return 0;
+    }
+    nbmin = 2;
+    nx = 0;
+    iws = *m;
+    if (nb > 1 && nb < k) {
+        i__1 = 0, i__2 = ilaenv_(&c__3, (char *)"DGELQF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+        nx = max(i__1, i__2);
+        if (nx < k) {
+            ldwork = *m;
+            iws = ldwork * nb;
+            if (*lwork < iws) {
+                nb = *lwork / ldwork;
+                i__1 = 2,
+                i__2 = ilaenv_(&c__2, (char *)"DGELQF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+                nbmin = max(i__1, i__2);
+            }
+        }
+    }
+    if (nb >= nbmin && nb < k && nx < k) {
+        i__1 = k - nx;
+        i__2 = nb;
+        for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+            i__3 = k - i__ + 1;
+            ib = min(i__3, nb);
+            i__3 = *n - i__ + 1;
+            dgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1], &iinfo);
+            if (i__ + ib <= *m) {
+                i__3 = *n - i__ + 1;
+                dlarft_((char *)"Forward", (char *)"Rowwise", &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__],
+                        &work[1], &ldwork, (ftnlen)7, (ftnlen)7);
+                i__3 = *m - i__ - ib + 1;
+                i__4 = *n - i__ + 1;
+                dlarfb_((char *)"Right", (char *)"No transpose", (char *)"Forward", (char *)"Rowwise", &i__3, &i__4, &ib,
+                        &a[i__ + i__ * a_dim1], lda, &work[1], &ldwork, &a[i__ + ib + i__ * a_dim1],
+                        lda, &work[ib + 1], &ldwork, (ftnlen)5, (ftnlen)12, (ftnlen)7, (ftnlen)7);
+            }
+        }
+    } else {
+        i__ = 1;
+    }
+    if (i__ <= k) {
+        i__2 = *m - i__ + 1;
+        i__1 = *n - i__ + 1;
+        dgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1], &iinfo);
+    }
+    work[1] = (doublereal)iws;
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dgelqf.f
+++ b/lib/linalg/dgelqf.f
@ -1,274 +0,0 @@
-*> \brief \b DGELQF
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DGELQF + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelqf.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelqf.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelqf.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
-*
-*       .. Scalar Arguments ..
-*       INTEGER            INFO, LDA, LWORK, M, N
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DGELQF computes an LQ factorization of a real M-by-N matrix A:
-*>
-*>    A = ( L 0 ) *  Q
-*>
-*> where:
-*>
-*>    Q is a N-by-N orthogonal matrix;
-*>    L is a lower-triangular M-by-M matrix;
-*>    0 is a M-by-(N-M) zero matrix, if M < N.
-*>
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>          The number of rows of the matrix A.  M >= 0.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The number of columns of the matrix A.  N >= 0.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array, dimension (LDA,N)
-*>          On entry, the M-by-N matrix A.
-*>          On exit, the elements on and below the diagonal of the array
-*>          contain the m-by-min(m,n) lower trapezoidal matrix L (L is
-*>          lower triangular if m <= n); the elements above the diagonal,
-*>          with the array TAU, represent the orthogonal matrix Q as a
-*>          product of elementary reflectors (see Further Details).
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(1,M).
-*> \endverbatim
-*>
-*> \param[out] TAU
-*> \verbatim
-*>          TAU is DOUBLE PRECISION array, dimension (min(M,N))
-*>          The scalar factors of the elementary reflectors (see Further
-*>          Details).
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
-*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-*> \endverbatim
-*>
-*> \param[in] LWORK
-*> \verbatim
-*>          LWORK is INTEGER
-*>          The dimension of the array WORK.  LWORK >= max(1,M).
-*>          For optimum performance LWORK >= M*NB, where NB is the
-*>          optimal blocksize.
-*>
-*>          If LWORK = -1, then a workspace query is assumed; the routine
-*>          only calculates the optimal size of the WORK array, returns
-*>          this value as the first entry of the WORK array, and no error
-*>          message related to LWORK is issued by XERBLA.
-*> \endverbatim
-*>
-*> \param[out] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>          = 0:  successful exit
-*>          < 0:  if INFO = -i, the i-th argument had an illegal value
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup doubleGEcomputational
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  The matrix Q is represented as a product of elementary reflectors
-*>
-*>     Q = H(k) . . . H(2) H(1), where k = min(m,n).
-*>
-*>  Each H(i) has the form
-*>
-*>     H(i) = I - tau * v * v**T
-*>
-*>  where tau is a real scalar, and v is a real vector with
-*>  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
-*>  and tau in TAU(i).
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
-*
-*  -- LAPACK computational routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      INTEGER            INFO, LDA, LWORK, M, N
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      LOGICAL            LQUERY
-      INTEGER            I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB,
-     $                   NBMIN, NX
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           DGELQ2, DLARFB, DLARFT, XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          MAX, MIN
-*     ..
-*     .. External Functions ..
-      INTEGER            ILAENV
-      EXTERNAL           ILAENV
-*     ..
-*     .. Executable Statements ..
-*
-*     Test the input arguments
-*
-      INFO = 0
-      NB = ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
-      LWKOPT = M*NB
-      WORK( 1 ) = LWKOPT
-      LQUERY = ( LWORK.EQ.-1 )
-      IF( M.LT.0 ) THEN
-         INFO = -1
-      ELSE IF( N.LT.0 ) THEN
-         INFO = -2
-      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
-         INFO = -4
-      ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN
-         INFO = -7
-      END IF
-      IF( INFO.NE.0 ) THEN
-         CALL XERBLA( 'DGELQF', -INFO )
-         RETURN
-      ELSE IF( LQUERY ) THEN
-         RETURN
-      END IF
-*
-*     Quick return if possible
-*
-      K = MIN( M, N )
-      IF( K.EQ.0 ) THEN
-         WORK( 1 ) = 1
-         RETURN
-      END IF
-*
-      NBMIN = 2
-      NX = 0
-      IWS = M
-      IF( NB.GT.1 .AND. NB.LT.K ) THEN
-*
-*        Determine when to cross over from blocked to unblocked code.
-*
-         NX = MAX( 0, ILAENV( 3, 'DGELQF', ' ', M, N, -1, -1 ) )
-         IF( NX.LT.K ) THEN
-*
-*           Determine if workspace is large enough for blocked code.
-*
-            LDWORK = M
-            IWS = LDWORK*NB
-            IF( LWORK.LT.IWS ) THEN
-*
-*              Not enough workspace to use optimal NB:  reduce NB and
-*              determine the minimum value of NB.
-*
-               NB = LWORK / LDWORK
-               NBMIN = MAX( 2, ILAENV( 2, 'DGELQF', ' ', M, N, -1,
-     $                 -1 ) )
-            END IF
-         END IF
-      END IF
-*
-      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
-*
-*        Use blocked code initially
-*
-         DO 10 I = 1, K - NX, NB
-            IB = MIN( K-I+1, NB )
-*
-*           Compute the LQ factorization of the current block
-*           A(i:i+ib-1,i:n)
-*
-            CALL DGELQ2( IB, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
-     $                   IINFO )
-            IF( I+IB.LE.M ) THEN
-*
-*              Form the triangular factor of the block reflector
-*              H = H(i) H(i+1) . . . H(i+ib-1)
-*
-               CALL DLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ),
-     $                      LDA, TAU( I ), WORK, LDWORK )
-*
-*              Apply H to A(i+ib:m,i:n) from the right
-*
-               CALL DLARFB( 'Right', 'No transpose', 'Forward',
-     $                      'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ),
-     $                      LDA, WORK, LDWORK, A( I+IB, I ), LDA,
-     $                      WORK( IB+1 ), LDWORK )
-            END IF
-   10    CONTINUE
-      ELSE
-         I = 1
-      END IF
-*
-*     Use unblocked code to factor the last or only block.
-*
-      IF( I.LE.K )
-     $   CALL DGELQ2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
-     $                IINFO )
-*
-      WORK( 1 ) = IWS
-      RETURN
-*
-*     End of DGELQF
-*
-      END
--- a/lib/linalg/dgelsd.cpp
+++ b/lib/linalg/dgelsd.cpp
@ -0,0 +1,341 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+static integer c__6 = 6;
+static integer c_n1 = -1;
+static integer c__9 = 9;
+static integer c__0 = 0;
+static integer c__1 = 1;
+static doublereal c_b82 = 0.;
+int dgelsd_(integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b,
+            integer *ldb, doublereal *s, doublereal *rcond, integer *rank, doublereal *work,
+            integer *lwork, integer *iwork, integer *info)
+{
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
+    double log(doublereal);
+    integer ie, il, mm;
+    doublereal eps, anrm, bnrm;
+    integer itau, nlvl, iascl, ibscl;
+    doublereal sfmin;
+    integer minmn, maxmn, itaup, itauq, mnthr, nwork;
+    extern int dlabad_(doublereal *, doublereal *),
+        dgebrd_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
+                doublereal *, doublereal *, doublereal *, integer *, integer *);
+    extern doublereal dlamch_(char *, ftnlen),
+        dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *, ftnlen);
+    extern int dgelqf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
+                       integer *, integer *),
+        dlalsd_(char *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *,
+                integer *, doublereal *, integer *, doublereal *, integer *, integer *, ftnlen),
+        dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *,
+                doublereal *, integer *, integer *, ftnlen),
+        dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
+                integer *, integer *),
+        dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *,
+                ftnlen),
+        dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *,
+                ftnlen),
+        xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
+                           ftnlen, ftnlen);
+    doublereal bignum;
+    extern int dormbr_(char *, char *, char *, integer *, integer *, integer *, doublereal *,
+                       integer *, doublereal *, doublereal *, integer *, doublereal *, integer *,
+                       integer *, ftnlen, ftnlen, ftnlen);
+    integer wlalsd;
+    extern int dormlq_(char *, char *, integer *, integer *, integer *, doublereal *, integer *,
+                       doublereal *, doublereal *, integer *, doublereal *, integer *, integer *,
+                       ftnlen, ftnlen);
+    integer ldwork;
+    extern int dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *,
+                       doublereal *, doublereal *, integer *, doublereal *, integer *, integer *,
+                       ftnlen, ftnlen);
+    integer liwork, minwrk, maxwrk;
+    doublereal smlnum;
+    logical lquery;
+    integer smlsiz;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    --s;
+    --work;
+    --iwork;
+    *info = 0;
+    minmn = min(*m, *n);
+    maxmn = max(*m, *n);
+    mnthr = ilaenv_(&c__6, (char *)"DGELSD", (char *)" ", m, n, nrhs, &c_n1, (ftnlen)6, (ftnlen)1);
+    lquery = *lwork == -1;
+    if (*m < 0) {
+        *info = -1;
+    } else if (*n < 0) {
+        *info = -2;
+    } else if (*nrhs < 0) {
+        *info = -3;
+    } else if (*lda < max(1, *m)) {
+        *info = -5;
+    } else if (*ldb < max(1, maxmn)) {
+        *info = -7;
+    }
+    smlsiz = ilaenv_(&c__9, (char *)"DGELSD", (char *)" ", &c__0, &c__0, &c__0, &c__0, (ftnlen)6, (ftnlen)1);
+    minwrk = 1;
+    liwork = 1;
+    minmn = max(1, minmn);
+    i__1 = (integer)(log((doublereal)minmn / (doublereal)(smlsiz + 1)) / log(2.)) + 1;
+    nlvl = max(i__1, 0);
+    if (*info == 0) {
+        maxwrk = 0;
+        liwork = minmn * 3 * nlvl + minmn * 11;
+        mm = *m;
+        if (*m >= *n && *m >= mnthr) {
+            mm = *n;
+            i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, (char *)"DGEQRF", (char *)" ", m, n, &c_n1, &c_n1,
+                                                    (ftnlen)6, (ftnlen)1);
+            maxwrk = max(i__1, i__2);
+            i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, (char *)"DORMQR", (char *)"LT", m, nrhs, n, &c_n1,
+                                                       (ftnlen)6, (ftnlen)2);
+            maxwrk = max(i__1, i__2);
+        }
+        if (*m >= *n) {
+            i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, (char *)"DGEBRD", (char *)" ", &mm, n, &c_n1,
+                                                               &c_n1, (ftnlen)6, (ftnlen)1);
+            maxwrk = max(i__1, i__2);
+            i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, (char *)"DORMBR", (char *)"QLT", &mm, nrhs, n,
+                                                           &c_n1, (ftnlen)6, (ftnlen)3);
+            maxwrk = max(i__1, i__2);
+            i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, (char *)"DORMBR", (char *)"PLN", n, nrhs, n,
+                                                              &c_n1, (ftnlen)6, (ftnlen)3);
+            maxwrk = max(i__1, i__2);
+            i__1 = smlsiz + 1;
+            wlalsd = *n * 9 + (*n << 1) * smlsiz + (*n << 3) * nlvl + *n * *nrhs + i__1 * i__1;
+            i__1 = maxwrk, i__2 = *n * 3 + wlalsd;
+            maxwrk = max(i__1, i__2);
+            i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1, i__2),
+            i__2 = *n * 3 + wlalsd;
+            minwrk = max(i__1, i__2);
+        }
+        if (*n > *m) {
+            i__1 = smlsiz + 1;
+            wlalsd = *m * 9 + (*m << 1) * smlsiz + (*m << 3) * nlvl + *m * *nrhs + i__1 * i__1;
+            if (*n >= mnthr) {
+                maxwrk = *m + *m * ilaenv_(&c__1, (char *)"DGELQF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6,
+                                           (ftnlen)1);
+                i__1 = maxwrk, i__2 = *m * *m + (*m << 2) +
+                                      (*m << 1) * ilaenv_(&c__1, (char *)"DGEBRD", (char *)" ", m, m, &c_n1, &c_n1,
+                                                          (ftnlen)6, (ftnlen)1);
+                maxwrk = max(i__1, i__2);
+                i__1 = maxwrk, i__2 = *m * *m + (*m << 2) +
+                                      *nrhs * ilaenv_(&c__1, (char *)"DORMBR", (char *)"QLT", m, nrhs, m, &c_n1,
+                                                      (ftnlen)6, (ftnlen)3);
+                maxwrk = max(i__1, i__2);
+                i__1 = maxwrk, i__2 = *m * *m + (*m << 2) +
+                                      (*m - 1) * ilaenv_(&c__1, (char *)"DORMBR", (char *)"PLN", m, nrhs, m, &c_n1,
+                                                         (ftnlen)6, (ftnlen)3);
+                maxwrk = max(i__1, i__2);
+                if (*nrhs > 1) {
+                    i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
+                    maxwrk = max(i__1, i__2);
+                } else {
+                    i__1 = maxwrk, i__2 = *m * *m + (*m << 1);
+                    maxwrk = max(i__1, i__2);
+                }
+                i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, (char *)"DORMLQ", (char *)"LT", n, nrhs, m, &c_n1,
+                                                           (ftnlen)6, (ftnlen)2);
+                maxwrk = max(i__1, i__2);
+                i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + wlalsd;
+                maxwrk = max(i__1, i__2);
+                i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3, i__4), i__3 = max(i__3, *nrhs),
+                i__4 = *n - *m * 3;
+                i__1 = maxwrk, i__2 = (*m << 2) + *m * *m + max(i__3, i__4);
+                maxwrk = max(i__1, i__2);
+            } else {
+                maxwrk = *m * 3 + (*n + *m) * ilaenv_(&c__1, (char *)"DGEBRD", (char *)" ", m, n, &c_n1, &c_n1,
+                                                      (ftnlen)6, (ftnlen)1);
+                i__1 = maxwrk, i__2 = *m * 3 + *nrhs * ilaenv_(&c__1, (char *)"DORMBR", (char *)"QLT", m, nrhs, n,
+                                                               &c_n1, (ftnlen)6, (ftnlen)3);
+                maxwrk = max(i__1, i__2);
+                i__1 = maxwrk, i__2 = *m * 3 + *m * ilaenv_(&c__1, (char *)"DORMBR", (char *)"PLN", n, nrhs, m,
+                                                            &c_n1, (ftnlen)6, (ftnlen)3);
+                maxwrk = max(i__1, i__2);
+                i__1 = maxwrk, i__2 = *m * 3 + wlalsd;
+                maxwrk = max(i__1, i__2);
+            }
+            i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *m, i__1 = max(i__1, i__2),
+            i__2 = *m * 3 + wlalsd;
+            minwrk = max(i__1, i__2);
+        }
+        minwrk = min(minwrk, maxwrk);
+        work[1] = (doublereal)maxwrk;
+        iwork[1] = liwork;
+        if (*lwork < minwrk && !lquery) {
+            *info = -12;
+        }
+    }
+    if (*info != 0) {
+        i__1 = -(*info);
+        xerbla_((char *)"DGELSD", &i__1, (ftnlen)6);
+        return 0;
+    } else if (lquery) {
+        goto L10;
+    }
+    if (*m == 0 || *n == 0) {
+        *rank = 0;
+        return 0;
+    }
+    eps = dlamch_((char *)"P", (ftnlen)1);
+    sfmin = dlamch_((char *)"S", (ftnlen)1);
+    smlnum = sfmin / eps;
+    bignum = 1. / smlnum;
+    dlabad_(&smlnum, &bignum);
+    anrm = dlange_((char *)"M", m, n, &a[a_offset], lda, &work[1], (ftnlen)1);
+    iascl = 0;
+    if (anrm > 0. && anrm < smlnum) {
+        dlascl_((char *)"G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, info, (ftnlen)1);
+        iascl = 1;
+    } else if (anrm > bignum) {
+        dlascl_((char *)"G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, info, (ftnlen)1);
+        iascl = 2;
+    } else if (anrm == 0.) {
+        i__1 = max(*m, *n);
+        dlaset_((char *)"F", &i__1, nrhs, &c_b82, &c_b82, &b[b_offset], ldb, (ftnlen)1);
+        dlaset_((char *)"F", &minmn, &c__1, &c_b82, &c_b82, &s[1], &c__1, (ftnlen)1);
+        *rank = 0;
+        goto L10;
+    }
+    bnrm = dlange_((char *)"M", m, nrhs, &b[b_offset], ldb, &work[1], (ftnlen)1);
+    ibscl = 0;
+    if (bnrm > 0. && bnrm < smlnum) {
+        dlascl_((char *)"G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
+        ibscl = 1;
+    } else if (bnrm > bignum) {
+        dlascl_((char *)"G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
+        ibscl = 2;
+    }
+    if (*m < *n) {
+        i__1 = *n - *m;
+        dlaset_((char *)"F", &i__1, nrhs, &c_b82, &c_b82, &b[*m + 1 + b_dim1], ldb, (ftnlen)1);
+    }
+    if (*m >= *n) {
+        mm = *m;
+        if (*m >= mnthr) {
+            mm = *n;
+            itau = 1;
+            nwork = itau + *n;
+            i__1 = *lwork - nwork + 1;
+            dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, info);
+            i__1 = *lwork - nwork + 1;
+            dormqr_((char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[b_offset], ldb,
+                    &work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1);
+            if (*n > 1) {
+                i__1 = *n - 1;
+                i__2 = *n - 1;
+                dlaset_((char *)"L", &i__1, &i__2, &c_b82, &c_b82, &a[a_dim1 + 2], lda, (ftnlen)1);
+            }
+        }
+        ie = 1;
+        itauq = ie + *n;
+        itaup = itauq + *n;
+        nwork = itaup + *n;
+        i__1 = *lwork - nwork + 1;
+        dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup],
+                &work[nwork], &i__1, info);
+        i__1 = *lwork - nwork + 1;
+        dormbr_((char *)"Q", (char *)"L", (char *)"T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], &b[b_offset], ldb,
+                &work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
+        dlalsd_((char *)"U", &smlsiz, n, nrhs, &s[1], &work[ie], &b[b_offset], ldb, rcond, rank,
+                &work[nwork], &iwork[1], info, (ftnlen)1);
+        if (*info != 0) {
+            goto L10;
+        }
+        i__1 = *lwork - nwork + 1;
+        dormbr_((char *)"P", (char *)"L", (char *)"N", n, nrhs, n, &a[a_offset], lda, &work[itaup], &b[b_offset], ldb,
+                &work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
+    } else {
+        i__1 = *m, i__2 = (*m << 1) - 4, i__1 = max(i__1, i__2), i__1 = max(i__1, *nrhs),
+        i__2 = *n - *m * 3, i__1 = max(i__1, i__2);
+        if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1, wlalsd)) {
+            ldwork = *m;
+            i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3, i__4), i__3 = max(i__3, *nrhs),
+            i__4 = *n - *m * 3;
+            i__1 = (*m << 2) + *m * *lda + max(i__3, i__4), i__2 = *m * *lda + *m + *m * *nrhs,
+            i__1 = max(i__1, i__2), i__2 = (*m << 2) + *m * *lda + wlalsd;
+            if (*lwork >= max(i__1, i__2)) {
+                ldwork = *lda;
+            }
+            itau = 1;
+            nwork = *m + 1;
+            i__1 = *lwork - nwork + 1;
+            dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, info);
+            il = nwork;
+            dlacpy_((char *)"L", m, m, &a[a_offset], lda, &work[il], &ldwork, (ftnlen)1);
+            i__1 = *m - 1;
+            i__2 = *m - 1;
+            dlaset_((char *)"U", &i__1, &i__2, &c_b82, &c_b82, &work[il + ldwork], &ldwork, (ftnlen)1);
+            ie = il + ldwork * *m;
+            itauq = ie + *m;
+            itaup = itauq + *m;
+            nwork = itaup + *m;
+            i__1 = *lwork - nwork + 1;
+            dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], &work[itaup],
+                    &work[nwork], &i__1, info);
+            i__1 = *lwork - nwork + 1;
+            dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, m, &work[il], &ldwork, &work[itauq], &b[b_offset], ldb,
+                    &work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
+            dlalsd_((char *)"U", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], ldb, rcond, rank,
+                    &work[nwork], &iwork[1], info, (ftnlen)1);
+            if (*info != 0) {
+                goto L10;
+            }
+            i__1 = *lwork - nwork + 1;
+            dormbr_((char *)"P", (char *)"L", (char *)"N", m, nrhs, m, &work[il], &ldwork, &work[itaup], &b[b_offset], ldb,
+                    &work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
+            i__1 = *n - *m;
+            dlaset_((char *)"F", &i__1, nrhs, &c_b82, &c_b82, &b[*m + 1 + b_dim1], ldb, (ftnlen)1);
+            nwork = itau + *m;
+            i__1 = *lwork - nwork + 1;
+            dormlq_((char *)"L", (char *)"T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[b_offset], ldb,
+                    &work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1);
+        } else {
+            ie = 1;
+            itauq = ie + *m;
+            itaup = itauq + *m;
+            nwork = itaup + *m;
+            i__1 = *lwork - nwork + 1;
+            dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup],
+                    &work[nwork], &i__1, info);
+            i__1 = *lwork - nwork + 1;
+            dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, &work[itauq], &b[b_offset], ldb,
+                    &work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
+            dlalsd_((char *)"L", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], ldb, rcond, rank,
+                    &work[nwork], &iwork[1], info, (ftnlen)1);
+            if (*info != 0) {
+                goto L10;
+            }
+            i__1 = *lwork - nwork + 1;
+            dormbr_((char *)"P", (char *)"L", (char *)"N", n, nrhs, m, &a[a_offset], lda, &work[itaup], &b[b_offset], ldb,
+                    &work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
+        }
+    }
+    if (iascl == 1) {
+        dlascl_((char *)"G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
+        dlascl_((char *)"G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &minmn, info, (ftnlen)1);
+    } else if (iascl == 2) {
+        dlascl_((char *)"G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
+        dlascl_((char *)"G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &minmn, info, (ftnlen)1);
+    }
+    if (ibscl == 1) {
+        dlascl_((char *)"G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
+    } else if (ibscl == 2) {
+        dlascl_((char *)"G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
+    }
+L10:
+    work[1] = (doublereal)maxwrk;
+    iwork[1] = liwork;
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dgelsd.f
+++ b/lib/linalg/dgelsd.f
@ -1,626 +0,0 @@
-*> \brief <b> DGELSD computes the minimum-norm solution to a linear least squares problem for GE matrices</b>
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DGELSD + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelsd.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelsd.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelsd.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
-*                          WORK, LWORK, IWORK, INFO )
-*
-*       .. Scalar Arguments ..
-*       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
-*       DOUBLE PRECISION   RCOND
-*       ..
-*       .. Array Arguments ..
-*       INTEGER            IWORK( * )
-*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DGELSD computes the minimum-norm solution to a real linear least
-*> squares problem:
-*>     minimize 2-norm(| b - A*x |)
-*> using the singular value decomposition (SVD) of A. A is an M-by-N
-*> matrix which may be rank-deficient.
-*>
-*> Several right hand side vectors b and solution vectors x can be
-*> handled in a single call; they are stored as the columns of the
-*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution
-*> matrix X.
-*>
-*> The problem is solved in three steps:
-*> (1) Reduce the coefficient matrix A to bidiagonal form with
-*>     Householder transformations, reducing the original problem
-*>     into a "bidiagonal least squares problem" (BLS)
-*> (2) Solve the BLS using a divide and conquer approach.
-*> (3) Apply back all the Householder transformations to solve
-*>     the original least squares problem.
-*>
-*> The effective rank of A is determined by treating as zero those
-*> singular values which are less than RCOND times the largest singular
-*> value.
-*>
-*> The divide and conquer algorithm makes very mild assumptions about
-*> floating point arithmetic. It will work on machines with a guard
-*> digit in add/subtract, or on those binary machines without guard
-*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
-*> Cray-2. It could conceivably fail on hexadecimal or decimal machines
-*> without guard digits, but we know of none.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>          The number of rows of A. M >= 0.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The number of columns of A. N >= 0.
-*> \endverbatim
-*>
-*> \param[in] NRHS
-*> \verbatim
-*>          NRHS is INTEGER
-*>          The number of right hand sides, i.e., the number of columns
-*>          of the matrices B and X. NRHS >= 0.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array, dimension (LDA,N)
-*>          On entry, the M-by-N matrix A.
-*>          On exit, A has been destroyed.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(1,M).
-*> \endverbatim
-*>
-*> \param[in,out] B
-*> \verbatim
-*>          B is DOUBLE PRECISION array, dimension (LDB,NRHS)
-*>          On entry, the M-by-NRHS right hand side matrix B.
-*>          On exit, B is overwritten by the N-by-NRHS solution
-*>          matrix X.  If m >= n and RANK = n, the residual
-*>          sum-of-squares for the solution in the i-th column is given
-*>          by the sum of squares of elements n+1:m in that column.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>          The leading dimension of the array B. LDB >= max(1,max(M,N)).
-*> \endverbatim
-*>
-*> \param[out] S
-*> \verbatim
-*>          S is DOUBLE PRECISION array, dimension (min(M,N))
-*>          The singular values of A in decreasing order.
-*>          The condition number of A in the 2-norm = S(1)/S(min(m,n)).
-*> \endverbatim
-*>
-*> \param[in] RCOND
-*> \verbatim
-*>          RCOND is DOUBLE PRECISION
-*>          RCOND is used to determine the effective rank of A.
-*>          Singular values S(i) <= RCOND*S(1) are treated as zero.
-*>          If RCOND < 0, machine precision is used instead.
-*> \endverbatim
-*>
-*> \param[out] RANK
-*> \verbatim
-*>          RANK is INTEGER
-*>          The effective rank of A, i.e., the number of singular values
-*>          which are greater than RCOND*S(1).
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
-*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-*> \endverbatim
-*>
-*> \param[in] LWORK
-*> \verbatim
-*>          LWORK is INTEGER
-*>          The dimension of the array WORK. LWORK must be at least 1.
-*>          The exact minimum amount of workspace needed depends on M,
-*>          N and NRHS. As long as LWORK is at least
-*>              12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2,
-*>          if M is greater than or equal to N or
-*>              12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2,
-*>          if M is less than N, the code will execute correctly.
-*>          SMLSIZ is returned by ILAENV and is equal to the maximum
-*>          size of the subproblems at the bottom of the computation
-*>          tree (usually about 25), and
-*>             NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
-*>          For good performance, LWORK should generally be larger.
-*>
-*>          If LWORK = -1, then a workspace query is assumed; the routine
-*>          only calculates the optimal size of the WORK array, returns
-*>          this value as the first entry of the WORK array, and no error
-*>          message related to LWORK is issued by XERBLA.
-*> \endverbatim
-*>
-*> \param[out] IWORK
-*> \verbatim
-*>          IWORK is INTEGER array, dimension (MAX(1,LIWORK))
-*>          LIWORK >= max(1, 3 * MINMN * NLVL + 11 * MINMN),
-*>          where MINMN = MIN( M,N ).
-*>          On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK.
-*> \endverbatim
-*>
-*> \param[out] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>          = 0:  successful exit
-*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
-*>          > 0:  the algorithm for computing the SVD failed to converge;
-*>                if INFO = i, i off-diagonal elements of an intermediate
-*>                bidiagonal form did not converge to zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup doubleGEsolve
-*
-*> \par Contributors:
-*  ==================
-*>
-*>     Ming Gu and Ren-Cang Li, Computer Science Division, University of
-*>       California at Berkeley, USA \n
-*>     Osni Marques, LBNL/NERSC, USA \n
-*
-*  =====================================================================
-      SUBROUTINE DGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
-     $                   WORK, LWORK, IWORK, INFO )
-*
-*  -- LAPACK driver routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
-      DOUBLE PRECISION   RCOND
-*     ..
-*     .. Array Arguments ..
-      INTEGER            IWORK( * )
-      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION   ZERO, ONE, TWO
-      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 )
-*     ..
-*     .. Local Scalars ..
-      LOGICAL            LQUERY
-      INTEGER            IASCL, IBSCL, IE, IL, ITAU, ITAUP, ITAUQ,
-     $                   LDWORK, LIWORK, MAXMN, MAXWRK, MINMN, MINWRK,
-     $                   MM, MNTHR, NLVL, NWORK, SMLSIZ, WLALSD
-      DOUBLE PRECISION   ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           DGEBRD, DGELQF, DGEQRF, DLABAD, DLACPY, DLALSD,
-     $                   DLASCL, DLASET, DORMBR, DORMLQ, DORMQR, XERBLA
-*     ..
-*     .. External Functions ..
-      INTEGER            ILAENV
-      DOUBLE PRECISION   DLAMCH, DLANGE
-      EXTERNAL           ILAENV, DLAMCH, DLANGE
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          DBLE, INT, LOG, MAX, MIN
-*     ..
-*     .. Executable Statements ..
-*
-*     Test the input arguments.
-*
-      INFO = 0
-      MINMN = MIN( M, N )
-      MAXMN = MAX( M, N )
-      MNTHR = ILAENV( 6, 'DGELSD', ' ', M, N, NRHS, -1 )
-      LQUERY = ( LWORK.EQ.-1 )
-      IF( M.LT.0 ) THEN
-         INFO = -1
-      ELSE IF( N.LT.0 ) THEN
-         INFO = -2
-      ELSE IF( NRHS.LT.0 ) THEN
-         INFO = -3
-      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
-         INFO = -5
-      ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN
-         INFO = -7
-      END IF
-*
-      SMLSIZ = ILAENV( 9, 'DGELSD', ' ', 0, 0, 0, 0 )
-*
-*     Compute workspace.
-*     (Note: Comments in the code beginning "Workspace:" describe the
-*     minimal amount of workspace needed at that point in the code,
-*     as well as the preferred amount for good performance.
-*     NB refers to the optimal block size for the immediately
-*     following subroutine, as returned by ILAENV.)
-*
-      MINWRK = 1
-      LIWORK = 1
-      MINMN = MAX( 1, MINMN )
-      NLVL = MAX( INT( LOG( DBLE( MINMN ) / DBLE( SMLSIZ+1 ) ) /
-     $       LOG( TWO ) ) + 1, 0 )
-*
-      IF( INFO.EQ.0 ) THEN
-         MAXWRK = 0
-         LIWORK = 3*MINMN*NLVL + 11*MINMN
-         MM = M
-         IF( M.GE.N .AND. M.GE.MNTHR ) THEN
-*
-*           Path 1a - overdetermined, with many more rows than columns.
-*
-            MM = N
-            MAXWRK = MAX( MAXWRK, N+N*ILAENV( 1, 'DGEQRF', ' ', M, N,
-     $               -1, -1 ) )
-            MAXWRK = MAX( MAXWRK, N+NRHS*
-     $               ILAENV( 1, 'DORMQR', 'LT', M, NRHS, N, -1 ) )
-         END IF
-         IF( M.GE.N ) THEN
-*
-*           Path 1 - overdetermined or exactly determined.
-*
-            MAXWRK = MAX( MAXWRK, 3*N+( MM+N )*
-     $               ILAENV( 1, 'DGEBRD', ' ', MM, N, -1, -1 ) )
-            MAXWRK = MAX( MAXWRK, 3*N+NRHS*
-     $               ILAENV( 1, 'DORMBR', 'QLT', MM, NRHS, N, -1 ) )
-            MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
-     $               ILAENV( 1, 'DORMBR', 'PLN', N, NRHS, N, -1 ) )
-            WLALSD = 9*N+2*N*SMLSIZ+8*N*NLVL+N*NRHS+(SMLSIZ+1)**2
-            MAXWRK = MAX( MAXWRK, 3*N+WLALSD )
-            MINWRK = MAX( 3*N+MM, 3*N+NRHS, 3*N+WLALSD )
-         END IF
-         IF( N.GT.M ) THEN
-            WLALSD = 9*M+2*M*SMLSIZ+8*M*NLVL+M*NRHS+(SMLSIZ+1)**2
-            IF( N.GE.MNTHR ) THEN
-*
-*              Path 2a - underdetermined, with many more columns
-*              than rows.
-*
-               MAXWRK = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
-               MAXWRK = MAX( MAXWRK, M*M+4*M+2*M*
-     $                  ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
-               MAXWRK = MAX( MAXWRK, M*M+4*M+NRHS*
-     $                  ILAENV( 1, 'DORMBR', 'QLT', M, NRHS, M, -1 ) )
-               MAXWRK = MAX( MAXWRK, M*M+4*M+( M-1 )*
-     $                  ILAENV( 1, 'DORMBR', 'PLN', M, NRHS, M, -1 ) )
-               IF( NRHS.GT.1 ) THEN
-                  MAXWRK = MAX( MAXWRK, M*M+M+M*NRHS )
-               ELSE
-                  MAXWRK = MAX( MAXWRK, M*M+2*M )
-               END IF
-               MAXWRK = MAX( MAXWRK, M+NRHS*
-     $                  ILAENV( 1, 'DORMLQ', 'LT', N, NRHS, M, -1 ) )
-               MAXWRK = MAX( MAXWRK, M*M+4*M+WLALSD )
-!     XXX: Ensure the Path 2a case below is triggered.  The workspace
-!     calculation should use queries for all routines eventually.
-               MAXWRK = MAX( MAXWRK,
-     $              4*M+M*M+MAX( M, 2*M-4, NRHS, N-3*M ) )
-            ELSE
-*
-*              Path 2 - remaining underdetermined cases.
-*
-               MAXWRK = 3*M + ( N+M )*ILAENV( 1, 'DGEBRD', ' ', M, N,
-     $                  -1, -1 )
-               MAXWRK = MAX( MAXWRK, 3*M+NRHS*
-     $                  ILAENV( 1, 'DORMBR', 'QLT', M, NRHS, N, -1 ) )
-               MAXWRK = MAX( MAXWRK, 3*M+M*
-     $                  ILAENV( 1, 'DORMBR', 'PLN', N, NRHS, M, -1 ) )
-               MAXWRK = MAX( MAXWRK, 3*M+WLALSD )
-            END IF
-            MINWRK = MAX( 3*M+NRHS, 3*M+M, 3*M+WLALSD )
-         END IF
-         MINWRK = MIN( MINWRK, MAXWRK )
-         WORK( 1 ) = MAXWRK
-         IWORK( 1 ) = LIWORK
-
-         IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
-            INFO = -12
-         END IF
-      END IF
-*
-      IF( INFO.NE.0 ) THEN
-         CALL XERBLA( 'DGELSD', -INFO )
-         RETURN
-      ELSE IF( LQUERY ) THEN
-         GO TO 10
-      END IF
-*
-*     Quick return if possible.
-*
-      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
-         RANK = 0
-         RETURN
-      END IF
-*
-*     Get machine parameters.
-*
-      EPS = DLAMCH( 'P' )
-      SFMIN = DLAMCH( 'S' )
-      SMLNUM = SFMIN / EPS
-      BIGNUM = ONE / SMLNUM
-      CALL DLABAD( SMLNUM, BIGNUM )
-*
-*     Scale A if max entry outside range [SMLNUM,BIGNUM].
-*
-      ANRM = DLANGE( 'M', M, N, A, LDA, WORK )
-      IASCL = 0
-      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
-*
-*        Scale matrix norm up to SMLNUM.
-*
-         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
-         IASCL = 1
-      ELSE IF( ANRM.GT.BIGNUM ) THEN
-*
-*        Scale matrix norm down to BIGNUM.
-*
-         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
-         IASCL = 2
-      ELSE IF( ANRM.EQ.ZERO ) THEN
-*
-*        Matrix all zero. Return zero solution.
-*
-         CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
-         CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 )
-         RANK = 0
-         GO TO 10
-      END IF
-*
-*     Scale B if max entry outside range [SMLNUM,BIGNUM].
-*
-      BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK )
-      IBSCL = 0
-      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
-*
-*        Scale matrix norm up to SMLNUM.
-*
-         CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
-         IBSCL = 1
-      ELSE IF( BNRM.GT.BIGNUM ) THEN
-*
-*        Scale matrix norm down to BIGNUM.
-*
-         CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
-         IBSCL = 2
-      END IF
-*
-*     If M < N make sure certain entries of B are zero.
-*
-      IF( M.LT.N )
-     $   CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
-*
-*     Overdetermined case.
-*
-      IF( M.GE.N ) THEN
-*
-*        Path 1 - overdetermined or exactly determined.
-*
-         MM = M
-         IF( M.GE.MNTHR ) THEN
-*
-*           Path 1a - overdetermined, with many more rows than columns.
-*
-            MM = N
-            ITAU = 1
-            NWORK = ITAU + N
-*
-*           Compute A=Q*R.
-*           (Workspace: need 2*N, prefer N+N*NB)
-*
-            CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
-     $                   LWORK-NWORK+1, INFO )
-*
-*           Multiply B by transpose(Q).
-*           (Workspace: need N+NRHS, prefer N+NRHS*NB)
-*
-            CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B,
-     $                   LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
-*
-*           Zero out below R.
-*
-            IF( N.GT.1 ) THEN
-               CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
-            END IF
-         END IF
-*
-         IE = 1
-         ITAUQ = IE + N
-         ITAUP = ITAUQ + N
-         NWORK = ITAUP + N
-*
-*        Bidiagonalize R in A.
-*        (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
-*
-         CALL DGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
-     $                WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
-     $                INFO )
-*
-*        Multiply B by transpose of left bidiagonalizing vectors of R.
-*        (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
-*
-         CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ),
-     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
-*
-*        Solve the bidiagonal least squares problem.
-*
-         CALL DLALSD( 'U', SMLSIZ, N, NRHS, S, WORK( IE ), B, LDB,
-     $                RCOND, RANK, WORK( NWORK ), IWORK, INFO )
-         IF( INFO.NE.0 ) THEN
-            GO TO 10
-         END IF
-*
-*        Multiply B by right bidiagonalizing vectors of R.
-*
-         CALL DORMBR( 'P', 'L', 'N', N, NRHS, N, A, LDA, WORK( ITAUP ),
-     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
-*
-      ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+
-     $         MAX( M, 2*M-4, NRHS, N-3*M, WLALSD ) ) THEN
-*
-*        Path 2a - underdetermined, with many more columns than rows
-*        and sufficient workspace for an efficient algorithm.
-*
-         LDWORK = M
-         IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ),
-     $       M*LDA+M+M*NRHS, 4*M+M*LDA+WLALSD ) )LDWORK = LDA
-         ITAU = 1
-         NWORK = M + 1
-*
-*        Compute A=L*Q.
-*        (Workspace: need 2*M, prefer M+M*NB)
-*
-         CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
-     $                LWORK-NWORK+1, INFO )
-         IL = NWORK
-*
-*        Copy L to WORK(IL), zeroing out above its diagonal.
-*
-         CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK )
-         CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ),
-     $                LDWORK )
-         IE = IL + LDWORK*M
-         ITAUQ = IE + M
-         ITAUP = ITAUQ + M
-         NWORK = ITAUP + M
-*
-*        Bidiagonalize L in WORK(IL).
-*        (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
-*
-         CALL DGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ),
-     $                WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
-     $                LWORK-NWORK+1, INFO )
-*
-*        Multiply B by transpose of left bidiagonalizing vectors of L.
-*        (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
-*
-         CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK,
-     $                WORK( ITAUQ ), B, LDB, WORK( NWORK ),
-     $                LWORK-NWORK+1, INFO )
-*
-*        Solve the bidiagonal least squares problem.
-*
-         CALL DLALSD( 'U', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB,
-     $                RCOND, RANK, WORK( NWORK ), IWORK, INFO )
-         IF( INFO.NE.0 ) THEN
-            GO TO 10
-         END IF
-*
-*        Multiply B by right bidiagonalizing vectors of L.
-*
-         CALL DORMBR( 'P', 'L', 'N', M, NRHS, M, WORK( IL ), LDWORK,
-     $                WORK( ITAUP ), B, LDB, WORK( NWORK ),
-     $                LWORK-NWORK+1, INFO )
-*
-*        Zero out below first M rows of B.
-*
-         CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
-         NWORK = ITAU + M
-*
-*        Multiply transpose(Q) by B.
-*        (Workspace: need M+NRHS, prefer M+NRHS*NB)
-*
-         CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B,
-     $                LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
-*
-      ELSE
-*
-*        Path 2 - remaining underdetermined cases.
-*
-         IE = 1
-         ITAUQ = IE + M
-         ITAUP = ITAUQ + M
-         NWORK = ITAUP + M
-*
-*        Bidiagonalize A.
-*        (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
-*
-         CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
-     $                WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
-     $                INFO )
-*
-*        Multiply B by transpose of left bidiagonalizing vectors.
-*        (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
-*
-         CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ),
-     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
-*
-*        Solve the bidiagonal least squares problem.
-*
-         CALL DLALSD( 'L', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB,
-     $                RCOND, RANK, WORK( NWORK ), IWORK, INFO )
-         IF( INFO.NE.0 ) THEN
-            GO TO 10
-         END IF
-*
-*        Multiply B by right bidiagonalizing vectors of A.
-*
-         CALL DORMBR( 'P', 'L', 'N', N, NRHS, M, A, LDA, WORK( ITAUP ),
-     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
-*
-      END IF
-*
-*     Undo scaling.
-*
-      IF( IASCL.EQ.1 ) THEN
-         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
-         CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
-     $                INFO )
-      ELSE IF( IASCL.EQ.2 ) THEN
-         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
-         CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
-     $                INFO )
-      END IF
-      IF( IBSCL.EQ.1 ) THEN
-         CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
-      ELSE IF( IBSCL.EQ.2 ) THEN
-         CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
-      END IF
-*
-   10 CONTINUE
-      WORK( 1 ) = MAXWRK
-      IWORK( 1 ) = LIWORK
-      RETURN
-*
-*     End of DGELSD
-*
-      END
--- a/lib/linalg/dgelss.cpp
+++ b/lib/linalg/dgelss.cpp
@ -0,0 +1,466 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+static integer c__6 = 6;
+static integer c_n1 = -1;
+static integer c__0 = 0;
+static doublereal c_b46 = 0.;
+static integer c__1 = 1;
+static doublereal c_b79 = 1.;
+int dgelss_(integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b,
+            integer *ldb, doublereal *s, doublereal *rcond, integer *rank, doublereal *work,
+            integer *lwork, integer *info)
+{
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
+    doublereal d__1;
+    integer i__, bl, ie, il, mm;
+    doublereal dum[1], eps, thr, anrm, bnrm;
+    integer itau, lwork_dgebrd__, lwork_dgelqf__, lwork_dgeqrf__, lwork_dorgbr__, lwork_dormbr__,
+        lwork_dormlq__, lwork_dormqr__;
+    extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
+                      integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
+                      ftnlen, ftnlen);
+    integer iascl, ibscl;
+    extern int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *,
+                      doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen),
+        drscl_(integer *, doublereal *, doublereal *, integer *);
+    integer chunk;
+    doublereal sfmin;
+    integer minmn;
+    extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *);
+    integer maxmn, itaup, itauq, mnthr, iwork;
+    extern int dlabad_(doublereal *, doublereal *),
+        dgebrd_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
+                doublereal *, doublereal *, doublereal *, integer *, integer *);
+    extern doublereal dlamch_(char *, ftnlen),
+        dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *, ftnlen);
+    integer bdspac;
+    extern int dgelqf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
+                       integer *, integer *),
+        dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *,
+                doublereal *, integer *, integer *, ftnlen),
+        dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
+                integer *, integer *),
+        dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *,
+                ftnlen),
+        dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *,
+                ftnlen),
+        xerbla_(char *, integer *, ftnlen),
+        dbdsqr_(char *, integer *, integer *, integer *, integer *, doublereal *, doublereal *,
+                doublereal *, integer *, doublereal *, integer *, doublereal *, integer *,
+                doublereal *, integer *, ftnlen),
+        dorgbr_(char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *,
+                doublereal *, integer *, integer *, ftnlen);
+    doublereal bignum;
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
+                           ftnlen, ftnlen);
+    extern int dormbr_(char *, char *, char *, integer *, integer *, integer *, doublereal *,
+                       integer *, doublereal *, doublereal *, integer *, doublereal *, integer *,
+                       integer *, ftnlen, ftnlen, ftnlen),
+        dormlq_(char *, char *, integer *, integer *, integer *, doublereal *, integer *,
+                doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, ftnlen,
+                ftnlen);
+    integer ldwork;
+    extern int dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *,
+                       doublereal *, doublereal *, integer *, doublereal *, integer *, integer *,
+                       ftnlen, ftnlen);
+    integer minwrk, maxwrk;
+    doublereal smlnum;
+    logical lquery;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    --s;
+    --work;
+    *info = 0;
+    minmn = min(*m, *n);
+    maxmn = max(*m, *n);
+    lquery = *lwork == -1;
+    if (*m < 0) {
+        *info = -1;
+    } else if (*n < 0) {
+        *info = -2;
+    } else if (*nrhs < 0) {
+        *info = -3;
+    } else if (*lda < max(1, *m)) {
+        *info = -5;
+    } else if (*ldb < max(1, maxmn)) {
+        *info = -7;
+    }
+    if (*info == 0) {
+        minwrk = 1;
+        maxwrk = 1;
+        if (minmn > 0) {
+            mm = *m;
+            mnthr = ilaenv_(&c__6, (char *)"DGELSS", (char *)" ", m, n, nrhs, &c_n1, (ftnlen)6, (ftnlen)1);
+            if (*m >= *n && *m >= mnthr) {
+                dgeqrf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, info);
+                lwork_dgeqrf__ = (integer)dum[0];
+                dormqr_((char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, dum, &b[b_offset], ldb, dum, &c_n1,
+                        info, (ftnlen)1, (ftnlen)1);
+                lwork_dormqr__ = (integer)dum[0];
+                mm = *n;
+                i__1 = maxwrk, i__2 = *n + lwork_dgeqrf__;
+                maxwrk = max(i__1, i__2);
+                i__1 = maxwrk, i__2 = *n + lwork_dormqr__;
+                maxwrk = max(i__1, i__2);
+            }
+            if (*m >= *n) {
+                i__1 = 1, i__2 = *n * 5;
+                bdspac = max(i__1, i__2);
+                dgebrd_(&mm, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1, info);
+                lwork_dgebrd__ = (integer)dum[0];
+                dormbr_((char *)"Q", (char *)"L", (char *)"T", &mm, nrhs, n, &a[a_offset], lda, dum, &b[b_offset], ldb, dum,
+                        &c_n1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
+                lwork_dormbr__ = (integer)dum[0];
+                dorgbr_((char *)"P", n, n, n, &a[a_offset], lda, dum, dum, &c_n1, info, (ftnlen)1);
+                lwork_dorgbr__ = (integer)dum[0];
+                i__1 = maxwrk, i__2 = *n * 3 + lwork_dgebrd__;
+                maxwrk = max(i__1, i__2);
+                i__1 = maxwrk, i__2 = *n * 3 + lwork_dormbr__;
+                maxwrk = max(i__1, i__2);
+                i__1 = maxwrk, i__2 = *n * 3 + lwork_dorgbr__;
+                maxwrk = max(i__1, i__2);
+                maxwrk = max(maxwrk, bdspac);
+                i__1 = maxwrk, i__2 = *n * *nrhs;
+                maxwrk = max(i__1, i__2);
+                i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1, i__2);
+                minwrk = max(i__1, bdspac);
+                maxwrk = max(minwrk, maxwrk);
+            }
+            if (*n > *m) {
+                i__1 = 1, i__2 = *m * 5;
+                bdspac = max(i__1, i__2);
+                i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *n, i__1 = max(i__1, i__2);
+                minwrk = max(i__1, bdspac);
+                if (*n >= mnthr) {
+                    dgelqf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, info);
+                    lwork_dgelqf__ = (integer)dum[0];
+                    dgebrd_(m, m, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1, info);
+                    lwork_dgebrd__ = (integer)dum[0];
+                    dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, dum, &b[b_offset], ldb,
+                            dum, &c_n1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
+                    lwork_dormbr__ = (integer)dum[0];
+                    dorgbr_((char *)"P", m, m, m, &a[a_offset], lda, dum, dum, &c_n1, info, (ftnlen)1);
+                    lwork_dorgbr__ = (integer)dum[0];
+                    dormlq_((char *)"L", (char *)"T", n, nrhs, m, &a[a_offset], lda, dum, &b[b_offset], ldb, dum,
+                            &c_n1, info, (ftnlen)1, (ftnlen)1);
+                    lwork_dormlq__ = (integer)dum[0];
+                    maxwrk = *m + lwork_dgelqf__;
+                    i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + lwork_dgebrd__;
+                    maxwrk = max(i__1, i__2);
+                    i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + lwork_dormbr__;
+                    maxwrk = max(i__1, i__2);
+                    i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + lwork_dorgbr__;
+                    maxwrk = max(i__1, i__2);
+                    i__1 = maxwrk, i__2 = *m * *m + *m + bdspac;
+                    maxwrk = max(i__1, i__2);
+                    if (*nrhs > 1) {
+                        i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
+                        maxwrk = max(i__1, i__2);
+                    } else {
+                        i__1 = maxwrk, i__2 = *m * *m + (*m << 1);
+                        maxwrk = max(i__1, i__2);
+                    }
+                    i__1 = maxwrk, i__2 = *m + lwork_dormlq__;
+                    maxwrk = max(i__1, i__2);
+                } else {
+                    dgebrd_(m, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1, info);
+                    lwork_dgebrd__ = (integer)dum[0];
+                    dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, m, &a[a_offset], lda, dum, &b[b_offset], ldb,
+                            dum, &c_n1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
+                    lwork_dormbr__ = (integer)dum[0];
+                    dorgbr_((char *)"P", m, n, m, &a[a_offset], lda, dum, dum, &c_n1, info, (ftnlen)1);
+                    lwork_dorgbr__ = (integer)dum[0];
+                    maxwrk = *m * 3 + lwork_dgebrd__;
+                    i__1 = maxwrk, i__2 = *m * 3 + lwork_dormbr__;
+                    maxwrk = max(i__1, i__2);
+                    i__1 = maxwrk, i__2 = *m * 3 + lwork_dorgbr__;
+                    maxwrk = max(i__1, i__2);
+                    maxwrk = max(maxwrk, bdspac);
+                    i__1 = maxwrk, i__2 = *n * *nrhs;
+                    maxwrk = max(i__1, i__2);
+                }
+            }
+            maxwrk = max(minwrk, maxwrk);
+        }
+        work[1] = (doublereal)maxwrk;
+        if (*lwork < minwrk && !lquery) {
+            *info = -12;
+        }
+    }
+    if (*info != 0) {
+        i__1 = -(*info);
+        xerbla_((char *)"DGELSS", &i__1, (ftnlen)6);
+        return 0;
+    } else if (lquery) {
+        return 0;
+    }
+    if (*m == 0 || *n == 0) {
+        *rank = 0;
+        return 0;
+    }
+    eps = dlamch_((char *)"P", (ftnlen)1);
+    sfmin = dlamch_((char *)"S", (ftnlen)1);
+    smlnum = sfmin / eps;
+    bignum = 1. / smlnum;
+    dlabad_(&smlnum, &bignum);
+    anrm = dlange_((char *)"M", m, n, &a[a_offset], lda, &work[1], (ftnlen)1);
+    iascl = 0;
+    if (anrm > 0. && anrm < smlnum) {
+        dlascl_((char *)"G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, info, (ftnlen)1);
+        iascl = 1;
+    } else if (anrm > bignum) {
+        dlascl_((char *)"G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, info, (ftnlen)1);
+        iascl = 2;
+    } else if (anrm == 0.) {
+        i__1 = max(*m, *n);
+        dlaset_((char *)"F", &i__1, nrhs, &c_b46, &c_b46, &b[b_offset], ldb, (ftnlen)1);
+        dlaset_((char *)"F", &minmn, &c__1, &c_b46, &c_b46, &s[1], &minmn, (ftnlen)1);
+        *rank = 0;
+        goto L70;
+    }
+    bnrm = dlange_((char *)"M", m, nrhs, &b[b_offset], ldb, &work[1], (ftnlen)1);
+    ibscl = 0;
+    if (bnrm > 0. && bnrm < smlnum) {
+        dlascl_((char *)"G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
+        ibscl = 1;
+    } else if (bnrm > bignum) {
+        dlascl_((char *)"G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
+        ibscl = 2;
+    }
+    if (*m >= *n) {
+        mm = *m;
+        if (*m >= mnthr) {
+            mm = *n;
+            itau = 1;
+            iwork = itau + *n;
+            i__1 = *lwork - iwork + 1;
+            dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__1, info);
+            i__1 = *lwork - iwork + 1;
+            dormqr_((char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[b_offset], ldb,
+                    &work[iwork], &i__1, info, (ftnlen)1, (ftnlen)1);
+            if (*n > 1) {
+                i__1 = *n - 1;
+                i__2 = *n - 1;
+                dlaset_((char *)"L", &i__1, &i__2, &c_b46, &c_b46, &a[a_dim1 + 2], lda, (ftnlen)1);
+            }
+        }
+        ie = 1;
+        itauq = ie + *n;
+        itaup = itauq + *n;
+        iwork = itaup + *n;
+        i__1 = *lwork - iwork + 1;
+        dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup],
+                &work[iwork], &i__1, info);
+        i__1 = *lwork - iwork + 1;
+        dormbr_((char *)"Q", (char *)"L", (char *)"T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], &b[b_offset], ldb,
+                &work[iwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
+        i__1 = *lwork - iwork + 1;
+        dorgbr_((char *)"P", n, n, n, &a[a_offset], lda, &work[itaup], &work[iwork], &i__1, info,
+                (ftnlen)1);
+        iwork = ie + *n;
+        dbdsqr_((char *)"U", n, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], lda, dum, &c__1,
+                &b[b_offset], ldb, &work[iwork], info, (ftnlen)1);
+        if (*info != 0) {
+            goto L70;
+        }
+        d__1 = *rcond * s[1];
+        thr = max(d__1, sfmin);
+        if (*rcond < 0.) {
+            d__1 = eps * s[1];
+            thr = max(d__1, sfmin);
+        }
+        *rank = 0;
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+            if (s[i__] > thr) {
+                drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb);
+                ++(*rank);
+            } else {
+                dlaset_((char *)"F", &c__1, nrhs, &c_b46, &c_b46, &b[i__ + b_dim1], ldb, (ftnlen)1);
+            }
+        }
+        if (*lwork >= *ldb * *nrhs && *nrhs > 1) {
+            dgemm_((char *)"T", (char *)"N", n, nrhs, n, &c_b79, &a[a_offset], lda, &b[b_offset], ldb, &c_b46,
+                   &work[1], ldb, (ftnlen)1, (ftnlen)1);
+            dlacpy_((char *)"G", n, nrhs, &work[1], ldb, &b[b_offset], ldb, (ftnlen)1);
+        } else if (*nrhs > 1) {
+            chunk = *lwork / *n;
+            i__1 = *nrhs;
+            i__2 = chunk;
+            for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+                i__3 = *nrhs - i__ + 1;
+                bl = min(i__3, chunk);
+                dgemm_((char *)"T", (char *)"N", n, &bl, n, &c_b79, &a[a_offset], lda, &b[i__ * b_dim1 + 1], ldb,
+                       &c_b46, &work[1], n, (ftnlen)1, (ftnlen)1);
+                dlacpy_((char *)"G", n, &bl, &work[1], n, &b[i__ * b_dim1 + 1], ldb, (ftnlen)1);
+            }
+        } else {
+            dgemv_((char *)"T", n, n, &c_b79, &a[a_offset], lda, &b[b_offset], &c__1, &c_b46, &work[1],
+                   &c__1, (ftnlen)1);
+            dcopy_(n, &work[1], &c__1, &b[b_offset], &c__1);
+        }
+    } else {
+        i__2 = *m, i__1 = (*m << 1) - 4, i__2 = max(i__2, i__1), i__2 = max(i__2, *nrhs),
+        i__1 = *n - *m * 3;
+        if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__2, i__1)) {
+            ldwork = *m;
+            i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3, i__4), i__3 = max(i__3, *nrhs),
+            i__4 = *n - *m * 3;
+            i__2 = (*m << 2) + *m * *lda + max(i__3, i__4), i__1 = *m * *lda + *m + *m * *nrhs;
+            if (*lwork >= max(i__2, i__1)) {
+                ldwork = *lda;
+            }
+            itau = 1;
+            iwork = *m + 1;
+            i__2 = *lwork - iwork + 1;
+            dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__2, info);
+            il = iwork;
+            dlacpy_((char *)"L", m, m, &a[a_offset], lda, &work[il], &ldwork, (ftnlen)1);
+            i__2 = *m - 1;
+            i__1 = *m - 1;
+            dlaset_((char *)"U", &i__2, &i__1, &c_b46, &c_b46, &work[il + ldwork], &ldwork, (ftnlen)1);
+            ie = il + ldwork * *m;
+            itauq = ie + *m;
+            itaup = itauq + *m;
+            iwork = itaup + *m;
+            i__2 = *lwork - iwork + 1;
+            dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], &work[itaup],
+                    &work[iwork], &i__2, info);
+            i__2 = *lwork - iwork + 1;
+            dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, m, &work[il], &ldwork, &work[itauq], &b[b_offset], ldb,
+                    &work[iwork], &i__2, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
+            i__2 = *lwork - iwork + 1;
+            dorgbr_((char *)"P", m, m, m, &work[il], &ldwork, &work[itaup], &work[iwork], &i__2, info,
+                    (ftnlen)1);
+            iwork = ie + *m;
+            dbdsqr_((char *)"U", m, m, &c__0, nrhs, &s[1], &work[ie], &work[il], &ldwork, &a[a_offset], lda,
+                    &b[b_offset], ldb, &work[iwork], info, (ftnlen)1);
+            if (*info != 0) {
+                goto L70;
+            }
+            d__1 = *rcond * s[1];
+            thr = max(d__1, sfmin);
+            if (*rcond < 0.) {
+                d__1 = eps * s[1];
+                thr = max(d__1, sfmin);
+            }
+            *rank = 0;
+            i__2 = *m;
+            for (i__ = 1; i__ <= i__2; ++i__) {
+                if (s[i__] > thr) {
+                    drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb);
+                    ++(*rank);
+                } else {
+                    dlaset_((char *)"F", &c__1, nrhs, &c_b46, &c_b46, &b[i__ + b_dim1], ldb, (ftnlen)1);
+                }
+            }
+            iwork = ie;
+            if (*lwork >= *ldb * *nrhs + iwork - 1 && *nrhs > 1) {
+                dgemm_((char *)"T", (char *)"N", m, nrhs, m, &c_b79, &work[il], &ldwork, &b[b_offset], ldb, &c_b46,
+                       &work[iwork], ldb, (ftnlen)1, (ftnlen)1);
+                dlacpy_((char *)"G", m, nrhs, &work[iwork], ldb, &b[b_offset], ldb, (ftnlen)1);
+            } else if (*nrhs > 1) {
+                chunk = (*lwork - iwork + 1) / *m;
+                i__2 = *nrhs;
+                i__1 = chunk;
+                for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
+                    i__3 = *nrhs - i__ + 1;
+                    bl = min(i__3, chunk);
+                    dgemm_((char *)"T", (char *)"N", m, &bl, m, &c_b79, &work[il], &ldwork, &b[i__ * b_dim1 + 1],
+                           ldb, &c_b46, &work[iwork], m, (ftnlen)1, (ftnlen)1);
+                    dlacpy_((char *)"G", m, &bl, &work[iwork], m, &b[i__ * b_dim1 + 1], ldb, (ftnlen)1);
+                }
+            } else {
+                dgemv_((char *)"T", m, m, &c_b79, &work[il], &ldwork, &b[b_dim1 + 1], &c__1, &c_b46,
+                       &work[iwork], &c__1, (ftnlen)1);
+                dcopy_(m, &work[iwork], &c__1, &b[b_dim1 + 1], &c__1);
+            }
+            i__1 = *n - *m;
+            dlaset_((char *)"F", &i__1, nrhs, &c_b46, &c_b46, &b[*m + 1 + b_dim1], ldb, (ftnlen)1);
+            iwork = itau + *m;
+            i__1 = *lwork - iwork + 1;
+            dormlq_((char *)"L", (char *)"T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[b_offset], ldb,
+                    &work[iwork], &i__1, info, (ftnlen)1, (ftnlen)1);
+        } else {
+            ie = 1;
+            itauq = ie + *m;
+            itaup = itauq + *m;
+            iwork = itaup + *m;
+            i__1 = *lwork - iwork + 1;
+            dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup],
+                    &work[iwork], &i__1, info);
+            i__1 = *lwork - iwork + 1;
+            dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, &work[itauq], &b[b_offset], ldb,
+                    &work[iwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
+            i__1 = *lwork - iwork + 1;
+            dorgbr_((char *)"P", m, n, m, &a[a_offset], lda, &work[itaup], &work[iwork], &i__1, info,
+                    (ftnlen)1);
+            iwork = ie + *m;
+            dbdsqr_((char *)"L", m, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], lda, dum, &c__1,
+                    &b[b_offset], ldb, &work[iwork], info, (ftnlen)1);
+            if (*info != 0) {
+                goto L70;
+            }
+            d__1 = *rcond * s[1];
+            thr = max(d__1, sfmin);
+            if (*rcond < 0.) {
+                d__1 = eps * s[1];
+                thr = max(d__1, sfmin);
+            }
+            *rank = 0;
+            i__1 = *m;
+            for (i__ = 1; i__ <= i__1; ++i__) {
+                if (s[i__] > thr) {
+                    drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb);
+                    ++(*rank);
+                } else {
+                    dlaset_((char *)"F", &c__1, nrhs, &c_b46, &c_b46, &b[i__ + b_dim1], ldb, (ftnlen)1);
+                }
+            }
+            if (*lwork >= *ldb * *nrhs && *nrhs > 1) {
+                dgemm_((char *)"T", (char *)"N", n, nrhs, m, &c_b79, &a[a_offset], lda, &b[b_offset], ldb, &c_b46,
+                       &work[1], ldb, (ftnlen)1, (ftnlen)1);
+                dlacpy_((char *)"F", n, nrhs, &work[1], ldb, &b[b_offset], ldb, (ftnlen)1);
+            } else if (*nrhs > 1) {
+                chunk = *lwork / *n;
+                i__1 = *nrhs;
+                i__2 = chunk;
+                for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+                    i__3 = *nrhs - i__ + 1;
+                    bl = min(i__3, chunk);
+                    dgemm_((char *)"T", (char *)"N", n, &bl, m, &c_b79, &a[a_offset], lda, &b[i__ * b_dim1 + 1],
+                           ldb, &c_b46, &work[1], n, (ftnlen)1, (ftnlen)1);
+                    dlacpy_((char *)"F", n, &bl, &work[1], n, &b[i__ * b_dim1 + 1], ldb, (ftnlen)1);
+                }
+            } else {
+                dgemv_((char *)"T", m, n, &c_b79, &a[a_offset], lda, &b[b_offset], &c__1, &c_b46, &work[1],
+                       &c__1, (ftnlen)1);
+                dcopy_(n, &work[1], &c__1, &b[b_offset], &c__1);
+            }
+        }
+    }
+    if (iascl == 1) {
+        dlascl_((char *)"G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
+        dlascl_((char *)"G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &minmn, info, (ftnlen)1);
+    } else if (iascl == 2) {
+        dlascl_((char *)"G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
+        dlascl_((char *)"G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &minmn, info, (ftnlen)1);
+    }
+    if (ibscl == 1) {
+        dlascl_((char *)"G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
+    } else if (ibscl == 2) {
+        dlascl_((char *)"G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
+    }
+L70:
+    work[1] = (doublereal)maxwrk;
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dgelss.f
+++ b/lib/linalg/dgelss.f
@ -1,744 +0,0 @@
-*> \brief <b> DGELSS solves overdetermined or underdetermined systems for GE matrices</b>
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DGELSS + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelss.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelss.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelss.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
-*                          WORK, LWORK, INFO )
-*
-*       .. Scalar Arguments ..
-*       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
-*       DOUBLE PRECISION   RCOND
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DGELSS computes the minimum norm solution to a real linear least
-*> squares problem:
-*>
-*> Minimize 2-norm(| b - A*x |).
-*>
-*> using the singular value decomposition (SVD) of A. A is an M-by-N
-*> matrix which may be rank-deficient.
-*>
-*> Several right hand side vectors b and solution vectors x can be
-*> handled in a single call; they are stored as the columns of the
-*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
-*> X.
-*>
-*> The effective rank of A is determined by treating as zero those
-*> singular values which are less than RCOND times the largest singular
-*> value.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>          The number of rows of the matrix A. M >= 0.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The number of columns of the matrix A. N >= 0.
-*> \endverbatim
-*>
-*> \param[in] NRHS
-*> \verbatim
-*>          NRHS is INTEGER
-*>          The number of right hand sides, i.e., the number of columns
-*>          of the matrices B and X. NRHS >= 0.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array, dimension (LDA,N)
-*>          On entry, the M-by-N matrix A.
-*>          On exit, the first min(m,n) rows of A are overwritten with
-*>          its right singular vectors, stored rowwise.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(1,M).
-*> \endverbatim
-*>
-*> \param[in,out] B
-*> \verbatim
-*>          B is DOUBLE PRECISION array, dimension (LDB,NRHS)
-*>          On entry, the M-by-NRHS right hand side matrix B.
-*>          On exit, B is overwritten by the N-by-NRHS solution
-*>          matrix X.  If m >= n and RANK = n, the residual
-*>          sum-of-squares for the solution in the i-th column is given
-*>          by the sum of squares of elements n+1:m in that column.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>          The leading dimension of the array B. LDB >= max(1,max(M,N)).
-*> \endverbatim
-*>
-*> \param[out] S
-*> \verbatim
-*>          S is DOUBLE PRECISION array, dimension (min(M,N))
-*>          The singular values of A in decreasing order.
-*>          The condition number of A in the 2-norm = S(1)/S(min(m,n)).
-*> \endverbatim
-*>
-*> \param[in] RCOND
-*> \verbatim
-*>          RCOND is DOUBLE PRECISION
-*>          RCOND is used to determine the effective rank of A.
-*>          Singular values S(i) <= RCOND*S(1) are treated as zero.
-*>          If RCOND < 0, machine precision is used instead.
-*> \endverbatim
-*>
-*> \param[out] RANK
-*> \verbatim
-*>          RANK is INTEGER
-*>          The effective rank of A, i.e., the number of singular values
-*>          which are greater than RCOND*S(1).
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
-*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-*> \endverbatim
-*>
-*> \param[in] LWORK
-*> \verbatim
-*>          LWORK is INTEGER
-*>          The dimension of the array WORK. LWORK >= 1, and also:
-*>          LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS )
-*>          For good performance, LWORK should generally be larger.
-*>
-*>          If LWORK = -1, then a workspace query is assumed; the routine
-*>          only calculates the optimal size of the WORK array, returns
-*>          this value as the first entry of the WORK array, and no error
-*>          message related to LWORK is issued by XERBLA.
-*> \endverbatim
-*>
-*> \param[out] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>          = 0:  successful exit
-*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
-*>          > 0:  the algorithm for computing the SVD failed to converge;
-*>                if INFO = i, i off-diagonal elements of an intermediate
-*>                bidiagonal form did not converge to zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup doubleGEsolve
-*
-*  =====================================================================
-      SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
-     $                   WORK, LWORK, INFO )
-*
-*  -- LAPACK driver routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
-      DOUBLE PRECISION   RCOND
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION   ZERO, ONE
-      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
-*     ..
-*     .. Local Scalars ..
-      LOGICAL            LQUERY
-      INTEGER            BDSPAC, BL, CHUNK, I, IASCL, IBSCL, IE, IL,
-     $                   ITAU, ITAUP, ITAUQ, IWORK, LDWORK, MAXMN,
-     $                   MAXWRK, MINMN, MINWRK, MM, MNTHR
-      INTEGER            LWORK_DGEQRF, LWORK_DORMQR, LWORK_DGEBRD,
-     $                   LWORK_DORMBR, LWORK_DORGBR, LWORK_DORMLQ,
-     $                   LWORK_DGELQF
-      DOUBLE PRECISION   ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM, THR
-*     ..
-*     .. Local Arrays ..
-      DOUBLE PRECISION   DUM( 1 )
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           DBDSQR, DCOPY, DGEBRD, DGELQF, DGEMM, DGEMV,
-     $                   DGEQRF, DLABAD, DLACPY, DLASCL, DLASET, DORGBR,
-     $                   DORMBR, DORMLQ, DORMQR, DRSCL, XERBLA
-*     ..
-*     .. External Functions ..
-      INTEGER            ILAENV
-      DOUBLE PRECISION   DLAMCH, DLANGE
-      EXTERNAL           ILAENV, DLAMCH, DLANGE
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          MAX, MIN
-*     ..
-*     .. Executable Statements ..
-*
-*     Test the input arguments
-*
-      INFO = 0
-      MINMN = MIN( M, N )
-      MAXMN = MAX( M, N )
-      LQUERY = ( LWORK.EQ.-1 )
-      IF( M.LT.0 ) THEN
-         INFO = -1
-      ELSE IF( N.LT.0 ) THEN
-         INFO = -2
-      ELSE IF( NRHS.LT.0 ) THEN
-         INFO = -3
-      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
-         INFO = -5
-      ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN
-         INFO = -7
-      END IF
-*
-*     Compute workspace
-*      (Note: Comments in the code beginning "Workspace:" describe the
-*       minimal amount of workspace needed at that point in the code,
-*       as well as the preferred amount for good performance.
-*       NB refers to the optimal block size for the immediately
-*       following subroutine, as returned by ILAENV.)
-*
-      IF( INFO.EQ.0 ) THEN
-         MINWRK = 1
-         MAXWRK = 1
-         IF( MINMN.GT.0 ) THEN
-            MM = M
-            MNTHR = ILAENV( 6, 'DGELSS', ' ', M, N, NRHS, -1 )
-            IF( M.GE.N .AND. M.GE.MNTHR ) THEN
-*
-*              Path 1a - overdetermined, with many more rows than
-*                        columns
-*
-*              Compute space needed for DGEQRF
-               CALL DGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, INFO )
-               LWORK_DGEQRF = INT( DUM(1) )
-*              Compute space needed for DORMQR
-               CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, DUM(1), B,
-     $                   LDB, DUM(1), -1, INFO )
-               LWORK_DORMQR = INT( DUM(1) )
-               MM = N
-               MAXWRK = MAX( MAXWRK, N + LWORK_DGEQRF )
-               MAXWRK = MAX( MAXWRK, N + LWORK_DORMQR )
-            END IF
-            IF( M.GE.N ) THEN
-*
-*              Path 1 - overdetermined or exactly determined
-*
-*              Compute workspace needed for DBDSQR
-*
-               BDSPAC = MAX( 1, 5*N )
-*              Compute space needed for DGEBRD
-               CALL DGEBRD( MM, N, A, LDA, S, DUM(1), DUM(1),
-     $                      DUM(1), DUM(1), -1, INFO )
-               LWORK_DGEBRD = INT( DUM(1) )
-*              Compute space needed for DORMBR
-               CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, DUM(1),
-     $                B, LDB, DUM(1), -1, INFO )
-               LWORK_DORMBR = INT( DUM(1) )
-*              Compute space needed for DORGBR
-               CALL DORGBR( 'P', N, N, N, A, LDA, DUM(1),
-     $                   DUM(1), -1, INFO )
-               LWORK_DORGBR = INT( DUM(1) )
-*              Compute total workspace needed
-               MAXWRK = MAX( MAXWRK, 3*N + LWORK_DGEBRD )
-               MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORMBR )
-               MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR )
-               MAXWRK = MAX( MAXWRK, BDSPAC )
-               MAXWRK = MAX( MAXWRK, N*NRHS )
-               MINWRK = MAX( 3*N + MM, 3*N + NRHS, BDSPAC )
-               MAXWRK = MAX( MINWRK, MAXWRK )
-            END IF
-            IF( N.GT.M ) THEN
-*
-*              Compute workspace needed for DBDSQR
-*
-               BDSPAC = MAX( 1, 5*M )
-               MINWRK = MAX( 3*M+NRHS, 3*M+N, BDSPAC )
-               IF( N.GE.MNTHR ) THEN
-*
-*                 Path 2a - underdetermined, with many more columns
-*                 than rows
-*
-*                 Compute space needed for DGELQF
-                  CALL DGELQF( M, N, A, LDA, DUM(1), DUM(1),
-     $                -1, INFO )
-                  LWORK_DGELQF = INT( DUM(1) )
-*                 Compute space needed for DGEBRD
-                  CALL DGEBRD( M, M, A, LDA, S, DUM(1), DUM(1),
-     $                      DUM(1), DUM(1), -1, INFO )
-                  LWORK_DGEBRD = INT( DUM(1) )
-*                 Compute space needed for DORMBR
-                  CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA,
-     $                DUM(1), B, LDB, DUM(1), -1, INFO )
-                  LWORK_DORMBR = INT( DUM(1) )
-*                 Compute space needed for DORGBR
-                  CALL DORGBR( 'P', M, M, M, A, LDA, DUM(1),
-     $                   DUM(1), -1, INFO )
-                  LWORK_DORGBR = INT( DUM(1) )
-*                 Compute space needed for DORMLQ
-                  CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, DUM(1),
-     $                 B, LDB, DUM(1), -1, INFO )
-                  LWORK_DORMLQ = INT( DUM(1) )
-*                 Compute total workspace needed
-                  MAXWRK = M + LWORK_DGELQF
-                  MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_DGEBRD )
-                  MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_DORMBR )
-                  MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_DORGBR )
-                  MAXWRK = MAX( MAXWRK, M*M + M + BDSPAC )
-                  IF( NRHS.GT.1 ) THEN
-                     MAXWRK = MAX( MAXWRK, M*M + M + M*NRHS )
-                  ELSE
-                     MAXWRK = MAX( MAXWRK, M*M + 2*M )
-                  END IF
-                  MAXWRK = MAX( MAXWRK, M + LWORK_DORMLQ )
-               ELSE
-*
-*                 Path 2 - underdetermined
-*
-*                 Compute space needed for DGEBRD
-                  CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
-     $                      DUM(1), DUM(1), -1, INFO )
-                  LWORK_DGEBRD = INT( DUM(1) )
-*                 Compute space needed for DORMBR
-                  CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, A, LDA,
-     $                DUM(1), B, LDB, DUM(1), -1, INFO )
-                  LWORK_DORMBR = INT( DUM(1) )
-*                 Compute space needed for DORGBR
-                  CALL DORGBR( 'P', M, N, M, A, LDA, DUM(1),
-     $                   DUM(1), -1, INFO )
-                  LWORK_DORGBR = INT( DUM(1) )
-                  MAXWRK = 3*M + LWORK_DGEBRD
-                  MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORMBR )
-                  MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR )
-                  MAXWRK = MAX( MAXWRK, BDSPAC )
-                  MAXWRK = MAX( MAXWRK, N*NRHS )
-               END IF
-            END IF
-            MAXWRK = MAX( MINWRK, MAXWRK )
-         END IF
-         WORK( 1 ) = MAXWRK
-*
-         IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY )
-     $      INFO = -12
-      END IF
-*
-      IF( INFO.NE.0 ) THEN
-         CALL XERBLA( 'DGELSS', -INFO )
-         RETURN
-      ELSE IF( LQUERY ) THEN
-         RETURN
-      END IF
-*
-*     Quick return if possible
-*
-      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
-         RANK = 0
-         RETURN
-      END IF
-*
-*     Get machine parameters
-*
-      EPS = DLAMCH( 'P' )
-      SFMIN = DLAMCH( 'S' )
-      SMLNUM = SFMIN / EPS
-      BIGNUM = ONE / SMLNUM
-      CALL DLABAD( SMLNUM, BIGNUM )
-*
-*     Scale A if max element outside range [SMLNUM,BIGNUM]
-*
-      ANRM = DLANGE( 'M', M, N, A, LDA, WORK )
-      IASCL = 0
-      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
-*
-*        Scale matrix norm up to SMLNUM
-*
-         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
-         IASCL = 1
-      ELSE IF( ANRM.GT.BIGNUM ) THEN
-*
-*        Scale matrix norm down to BIGNUM
-*
-         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
-         IASCL = 2
-      ELSE IF( ANRM.EQ.ZERO ) THEN
-*
-*        Matrix all zero. Return zero solution.
-*
-         CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
-         CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, MINMN )
-         RANK = 0
-         GO TO 70
-      END IF
-*
-*     Scale B if max element outside range [SMLNUM,BIGNUM]
-*
-      BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK )
-      IBSCL = 0
-      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
-*
-*        Scale matrix norm up to SMLNUM
-*
-         CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
-         IBSCL = 1
-      ELSE IF( BNRM.GT.BIGNUM ) THEN
-*
-*        Scale matrix norm down to BIGNUM
-*
-         CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
-         IBSCL = 2
-      END IF
-*
-*     Overdetermined case
-*
-      IF( M.GE.N ) THEN
-*
-*        Path 1 - overdetermined or exactly determined
-*
-         MM = M
-         IF( M.GE.MNTHR ) THEN
-*
-*           Path 1a - overdetermined, with many more rows than columns
-*
-            MM = N
-            ITAU = 1
-            IWORK = ITAU + N
-*
-*           Compute A=Q*R
-*           (Workspace: need 2*N, prefer N+N*NB)
-*
-            CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
-     $                   LWORK-IWORK+1, INFO )
-*
-*           Multiply B by transpose(Q)
-*           (Workspace: need N+NRHS, prefer N+NRHS*NB)
-*
-            CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B,
-     $                   LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
-*
-*           Zero out below R
-*
-            IF( N.GT.1 )
-     $         CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
-         END IF
-*
-         IE = 1
-         ITAUQ = IE + N
-         ITAUP = ITAUQ + N
-         IWORK = ITAUP + N
-*
-*        Bidiagonalize R in A
-*        (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
-*
-         CALL DGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
-     $                WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
-     $                INFO )
-*
-*        Multiply B by transpose of left bidiagonalizing vectors of R
-*        (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
-*
-         CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ),
-     $                B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
-*
-*        Generate right bidiagonalizing vectors of R in A
-*        (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
-*
-         CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
-     $                WORK( IWORK ), LWORK-IWORK+1, INFO )
-         IWORK = IE + N
-*
-*        Perform bidiagonal QR iteration
-*          multiply B by transpose of left singular vectors
-*          compute right singular vectors in A
-*        (Workspace: need BDSPAC)
-*
-         CALL DBDSQR( 'U', N, N, 0, NRHS, S, WORK( IE ), A, LDA, DUM,
-     $                1, B, LDB, WORK( IWORK ), INFO )
-         IF( INFO.NE.0 )
-     $      GO TO 70
-*
-*        Multiply B by reciprocals of singular values
-*
-         THR = MAX( RCOND*S( 1 ), SFMIN )
-         IF( RCOND.LT.ZERO )
-     $      THR = MAX( EPS*S( 1 ), SFMIN )
-         RANK = 0
-         DO 10 I = 1, N
-            IF( S( I ).GT.THR ) THEN
-               CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
-               RANK = RANK + 1
-            ELSE
-               CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
-            END IF
-   10    CONTINUE
-*
-*        Multiply B by right singular vectors
-*        (Workspace: need N, prefer N*NRHS)
-*
-         IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
-            CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, A, LDA, B, LDB, ZERO,
-     $                  WORK, LDB )
-            CALL DLACPY( 'G', N, NRHS, WORK, LDB, B, LDB )
-         ELSE IF( NRHS.GT.1 ) THEN
-            CHUNK = LWORK / N
-            DO 20 I = 1, NRHS, CHUNK
-               BL = MIN( NRHS-I+1, CHUNK )
-               CALL DGEMM( 'T', 'N', N, BL, N, ONE, A, LDA, B( 1, I ),
-     $                     LDB, ZERO, WORK, N )
-               CALL DLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB )
-   20       CONTINUE
-         ELSE
-            CALL DGEMV( 'T', N, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
-            CALL DCOPY( N, WORK, 1, B, 1 )
-         END IF
-*
-      ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+
-     $         MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN
-*
-*        Path 2a - underdetermined, with many more columns than rows
-*        and sufficient workspace for an efficient algorithm
-*
-         LDWORK = M
-         IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ),
-     $       M*LDA+M+M*NRHS ) )LDWORK = LDA
-         ITAU = 1
-         IWORK = M + 1
-*
-*        Compute A=L*Q
-*        (Workspace: need 2*M, prefer M+M*NB)
-*
-         CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
-     $                LWORK-IWORK+1, INFO )
-         IL = IWORK
-*
-*        Copy L to WORK(IL), zeroing out above it
-*
-         CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK )
-         CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ),
-     $                LDWORK )
-         IE = IL + LDWORK*M
-         ITAUQ = IE + M
-         ITAUP = ITAUQ + M
-         IWORK = ITAUP + M
-*
-*        Bidiagonalize L in WORK(IL)
-*        (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
-*
-         CALL DGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ),
-     $                WORK( ITAUQ ), WORK( ITAUP ), WORK( IWORK ),
-     $                LWORK-IWORK+1, INFO )
-*
-*        Multiply B by transpose of left bidiagonalizing vectors of L
-*        (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
-*
-         CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK,
-     $                WORK( ITAUQ ), B, LDB, WORK( IWORK ),
-     $                LWORK-IWORK+1, INFO )
-*
-*        Generate right bidiagonalizing vectors of R in WORK(IL)
-*        (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB)
-*
-         CALL DORGBR( 'P', M, M, M, WORK( IL ), LDWORK, WORK( ITAUP ),
-     $                WORK( IWORK ), LWORK-IWORK+1, INFO )
-         IWORK = IE + M
-*
-*        Perform bidiagonal QR iteration,
-*           computing right singular vectors of L in WORK(IL) and
-*           multiplying B by transpose of left singular vectors
-*        (Workspace: need M*M+M+BDSPAC)
-*
-         CALL DBDSQR( 'U', M, M, 0, NRHS, S, WORK( IE ), WORK( IL ),
-     $                LDWORK, A, LDA, B, LDB, WORK( IWORK ), INFO )
-         IF( INFO.NE.0 )
-     $      GO TO 70
-*
-*        Multiply B by reciprocals of singular values
-*
-         THR = MAX( RCOND*S( 1 ), SFMIN )
-         IF( RCOND.LT.ZERO )
-     $      THR = MAX( EPS*S( 1 ), SFMIN )
-         RANK = 0
-         DO 30 I = 1, M
-            IF( S( I ).GT.THR ) THEN
-               CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
-               RANK = RANK + 1
-            ELSE
-               CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
-            END IF
-   30    CONTINUE
-         IWORK = IE
-*
-*        Multiply B by right singular vectors of L in WORK(IL)
-*        (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS)
-*
-         IF( LWORK.GE.LDB*NRHS+IWORK-1 .AND. NRHS.GT.1 ) THEN
-            CALL DGEMM( 'T', 'N', M, NRHS, M, ONE, WORK( IL ), LDWORK,
-     $                  B, LDB, ZERO, WORK( IWORK ), LDB )
-            CALL DLACPY( 'G', M, NRHS, WORK( IWORK ), LDB, B, LDB )
-         ELSE IF( NRHS.GT.1 ) THEN
-            CHUNK = ( LWORK-IWORK+1 ) / M
-            DO 40 I = 1, NRHS, CHUNK
-               BL = MIN( NRHS-I+1, CHUNK )
-               CALL DGEMM( 'T', 'N', M, BL, M, ONE, WORK( IL ), LDWORK,
-     $                     B( 1, I ), LDB, ZERO, WORK( IWORK ), M )
-               CALL DLACPY( 'G', M, BL, WORK( IWORK ), M, B( 1, I ),
-     $                      LDB )
-   40       CONTINUE
-         ELSE
-            CALL DGEMV( 'T', M, M, ONE, WORK( IL ), LDWORK, B( 1, 1 ),
-     $                  1, ZERO, WORK( IWORK ), 1 )
-            CALL DCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 )
-         END IF
-*
-*        Zero out below first M rows of B
-*
-         CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
-         IWORK = ITAU + M
-*
-*        Multiply transpose(Q) by B
-*        (Workspace: need M+NRHS, prefer M+NRHS*NB)
-*
-         CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B,
-     $                LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
-*
-      ELSE
-*
-*        Path 2 - remaining underdetermined cases
-*
-         IE = 1
-         ITAUQ = IE + M
-         ITAUP = ITAUQ + M
-         IWORK = ITAUP + M
-*
-*        Bidiagonalize A
-*        (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
-*
-         CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
-     $                WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
-     $                INFO )
-*
-*        Multiply B by transpose of left bidiagonalizing vectors
-*        (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
-*
-         CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ),
-     $                B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
-*
-*        Generate right bidiagonalizing vectors in A
-*        (Workspace: need 4*M, prefer 3*M+M*NB)
-*
-         CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
-     $                WORK( IWORK ), LWORK-IWORK+1, INFO )
-         IWORK = IE + M
-*
-*        Perform bidiagonal QR iteration,
-*           computing right singular vectors of A in A and
-*           multiplying B by transpose of left singular vectors
-*        (Workspace: need BDSPAC)
-*
-         CALL DBDSQR( 'L', M, N, 0, NRHS, S, WORK( IE ), A, LDA, DUM,
-     $                1, B, LDB, WORK( IWORK ), INFO )
-         IF( INFO.NE.0 )
-     $      GO TO 70
-*
-*        Multiply B by reciprocals of singular values
-*
-         THR = MAX( RCOND*S( 1 ), SFMIN )
-         IF( RCOND.LT.ZERO )
-     $      THR = MAX( EPS*S( 1 ), SFMIN )
-         RANK = 0
-         DO 50 I = 1, M
-            IF( S( I ).GT.THR ) THEN
-               CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
-               RANK = RANK + 1
-            ELSE
-               CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
-            END IF
-   50    CONTINUE
-*
-*        Multiply B by right singular vectors of A
-*        (Workspace: need N, prefer N*NRHS)
-*
-         IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
-            CALL DGEMM( 'T', 'N', N, NRHS, M, ONE, A, LDA, B, LDB, ZERO,
-     $                  WORK, LDB )
-            CALL DLACPY( 'F', N, NRHS, WORK, LDB, B, LDB )
-         ELSE IF( NRHS.GT.1 ) THEN
-            CHUNK = LWORK / N
-            DO 60 I = 1, NRHS, CHUNK
-               BL = MIN( NRHS-I+1, CHUNK )
-               CALL DGEMM( 'T', 'N', N, BL, M, ONE, A, LDA, B( 1, I ),
-     $                     LDB, ZERO, WORK, N )
-               CALL DLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB )
-   60       CONTINUE
-         ELSE
-            CALL DGEMV( 'T', M, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
-            CALL DCOPY( N, WORK, 1, B, 1 )
-         END IF
-      END IF
-*
-*     Undo scaling
-*
-      IF( IASCL.EQ.1 ) THEN
-         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
-         CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
-     $                INFO )
-      ELSE IF( IASCL.EQ.2 ) THEN
-         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
-         CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
-     $                INFO )
-      END IF
-      IF( IBSCL.EQ.1 ) THEN
-         CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
-      ELSE IF( IBSCL.EQ.2 ) THEN
-         CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
-      END IF
-*
-   70 CONTINUE
-      WORK( 1 ) = MAXWRK
-      RETURN
-*
-*     End of DGELSS
-*
-      END
--- a/lib/linalg/dgemm.cpp
+++ b/lib/linalg/dgemm.cpp
@ -0,0 +1,173 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+int dgemm_(char *transa, char *transb, integer *m, integer *n, integer *k, doublereal *alpha,
+           doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *beta,
+           doublereal *c__, integer *ldc, ftnlen transa_len, ftnlen transb_len)
+{
+    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3;
+    integer i__, j, l, info;
+    logical nota, notb;
+    doublereal temp;
+    extern logical lsame_(char *, char *, ftnlen, ftnlen);
+    integer nrowa, nrowb;
+    extern int xerbla_(char *, integer *, ftnlen);
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    nota = lsame_(transa, (char *)"N", (ftnlen)1, (ftnlen)1);
+    notb = lsame_(transb, (char *)"N", (ftnlen)1, (ftnlen)1);
+    if (nota) {
+        nrowa = *m;
+    } else {
+        nrowa = *k;
+    }
+    if (notb) {
+        nrowb = *k;
+    } else {
+        nrowb = *n;
+    }
+    info = 0;
+    if (!nota && !lsame_(transa, (char *)"C", (ftnlen)1, (ftnlen)1) &&
+        !lsame_(transa, (char *)"T", (ftnlen)1, (ftnlen)1)) {
+        info = 1;
+    } else if (!notb && !lsame_(transb, (char *)"C", (ftnlen)1, (ftnlen)1) &&
+               !lsame_(transb, (char *)"T", (ftnlen)1, (ftnlen)1)) {
+        info = 2;
+    } else if (*m < 0) {
+        info = 3;
+    } else if (*n < 0) {
+        info = 4;
+    } else if (*k < 0) {
+        info = 5;
+    } else if (*lda < max(1, nrowa)) {
+        info = 8;
+    } else if (*ldb < max(1, nrowb)) {
+        info = 10;
+    } else if (*ldc < max(1, *m)) {
+        info = 13;
+    }
+    if (info != 0) {
+        xerbla_((char *)"DGEMM ", &info, (ftnlen)6);
+        return 0;
+    }
+    if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
+        return 0;
+    }
+    if (*alpha == 0.) {
+        if (*beta == 0.) {
+            i__1 = *n;
+            for (j = 1; j <= i__1; ++j) {
+                i__2 = *m;
+                for (i__ = 1; i__ <= i__2; ++i__) {
+                    c__[i__ + j * c_dim1] = 0.;
+                }
+            }
+        } else {
+            i__1 = *n;
+            for (j = 1; j <= i__1; ++j) {
+                i__2 = *m;
+                for (i__ = 1; i__ <= i__2; ++i__) {
+                    c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+                }
+            }
+        }
+        return 0;
+    }
+    if (notb) {
+        if (nota) {
+            i__1 = *n;
+            for (j = 1; j <= i__1; ++j) {
+                if (*beta == 0.) {
+                    i__2 = *m;
+                    for (i__ = 1; i__ <= i__2; ++i__) {
+                        c__[i__ + j * c_dim1] = 0.;
+                    }
+                } else if (*beta != 1.) {
+                    i__2 = *m;
+                    for (i__ = 1; i__ <= i__2; ++i__) {
+                        c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+                    }
+                }
+                i__2 = *k;
+                for (l = 1; l <= i__2; ++l) {
+                    temp = *alpha * b[l + j * b_dim1];
+                    i__3 = *m;
+                    for (i__ = 1; i__ <= i__3; ++i__) {
+                        c__[i__ + j * c_dim1] += temp * a[i__ + l * a_dim1];
+                    }
+                }
+            }
+        } else {
+            i__1 = *n;
+            for (j = 1; j <= i__1; ++j) {
+                i__2 = *m;
+                for (i__ = 1; i__ <= i__2; ++i__) {
+                    temp = 0.;
+                    i__3 = *k;
+                    for (l = 1; l <= i__3; ++l) {
+                        temp += a[l + i__ * a_dim1] * b[l + j * b_dim1];
+                    }
+                    if (*beta == 0.) {
+                        c__[i__ + j * c_dim1] = *alpha * temp;
+                    } else {
+                        c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[i__ + j * c_dim1];
+                    }
+                }
+            }
+        }
+    } else {
+        if (nota) {
+            i__1 = *n;
+            for (j = 1; j <= i__1; ++j) {
+                if (*beta == 0.) {
+                    i__2 = *m;
+                    for (i__ = 1; i__ <= i__2; ++i__) {
+                        c__[i__ + j * c_dim1] = 0.;
+                    }
+                } else if (*beta != 1.) {
+                    i__2 = *m;
+                    for (i__ = 1; i__ <= i__2; ++i__) {
+                        c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+                    }
+                }
+                i__2 = *k;
+                for (l = 1; l <= i__2; ++l) {
+                    temp = *alpha * b[j + l * b_dim1];
+                    i__3 = *m;
+                    for (i__ = 1; i__ <= i__3; ++i__) {
+                        c__[i__ + j * c_dim1] += temp * a[i__ + l * a_dim1];
+                    }
+                }
+            }
+        } else {
+            i__1 = *n;
+            for (j = 1; j <= i__1; ++j) {
+                i__2 = *m;
+                for (i__ = 1; i__ <= i__2; ++i__) {
+                    temp = 0.;
+                    i__3 = *k;
+                    for (l = 1; l <= i__3; ++l) {
+                        temp += a[l + i__ * a_dim1] * b[j + l * b_dim1];
+                    }
+                    if (*beta == 0.) {
+                        c__[i__ + j * c_dim1] = *alpha * temp;
+                    } else {
+                        c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[i__ + j * c_dim1];
+                    }
+                }
+            }
+        }
+    }
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dgemm.f
+++ b/lib/linalg/dgemm.f
@ -1,379 +0,0 @@
-*> \brief \b DGEMM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION ALPHA,BETA
-*       INTEGER K,LDA,LDB,LDC,M,N
-*       CHARACTER TRANSA,TRANSB
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DGEMM  performs one of the matrix-matrix operations
-*>
-*>    C := alpha*op( A )*op( B ) + beta*C,
-*>
-*> where  op( X ) is one of
-*>
-*>    op( X ) = X   or   op( X ) = X**T,
-*>
-*> alpha and beta are scalars, and A, B and C are matrices, with op( A )
-*> an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] TRANSA
-*> \verbatim
-*>          TRANSA is CHARACTER*1
-*>           On entry, TRANSA specifies the form of op( A ) to be used in
-*>           the matrix multiplication as follows:
-*>
-*>              TRANSA = 'N' or 'n',  op( A ) = A.
-*>
-*>              TRANSA = 'T' or 't',  op( A ) = A**T.
-*>
-*>              TRANSA = 'C' or 'c',  op( A ) = A**T.
-*> \endverbatim
-*>
-*> \param[in] TRANSB
-*> \verbatim
-*>          TRANSB is CHARACTER*1
-*>           On entry, TRANSB specifies the form of op( B ) to be used in
-*>           the matrix multiplication as follows:
-*>
-*>              TRANSB = 'N' or 'n',  op( B ) = B.
-*>
-*>              TRANSB = 'T' or 't',  op( B ) = B**T.
-*>
-*>              TRANSB = 'C' or 'c',  op( B ) = B**T.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry,  M  specifies  the number  of rows  of the  matrix
-*>           op( A )  and of the  matrix  C.  M  must  be at least  zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N  specifies the number  of columns of the matrix
-*>           op( B ) and the number of columns of the matrix C. N must be
-*>           at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry,  K  specifies  the number of columns of the matrix
-*>           op( A ) and the number of rows of the matrix op( B ). K must
-*>           be at least  zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is DOUBLE PRECISION.
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array, dimension ( LDA, ka ), where ka is
-*>           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
-*>           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by m  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. When  TRANSA = 'N' or 'n' then
-*>           LDA must be at least  max( 1, m ), otherwise  LDA must be at
-*>           least  max( 1, k ).
-*> \endverbatim
-*>
-*> \param[in] B
-*> \verbatim
-*>          B is DOUBLE PRECISION array, dimension ( LDB, kb ), where kb is
-*>           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
-*>           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
-*>           part of the array  B  must contain the matrix  B,  otherwise
-*>           the leading  n by k  part of the array  B  must contain  the
-*>           matrix B.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in the calling (sub) program. When  TRANSB = 'N' or 'n' then
-*>           LDB must be at least  max( 1, k ), otherwise  LDB must be at
-*>           least  max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is DOUBLE PRECISION.
-*>           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
-*>           supplied as zero then C need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] C
-*> \verbatim
-*>          C is DOUBLE PRECISION array, dimension ( LDC, N )
-*>           Before entry, the leading  m by n  part of the array  C must
-*>           contain the matrix  C,  except when  beta  is zero, in which
-*>           case C need not be set on entry.
-*>           On exit, the array  C  is overwritten by the  m by n  matrix
-*>           ( alpha*op( A )*op( B ) + beta*C ).
-*> \endverbatim
-*>
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup double_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*  -- Reference BLAS level3 routine --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION ALPHA,BETA
-      INTEGER K,LDA,LDB,LDC,M,N
-      CHARACTER TRANSA,TRANSB
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION TEMP
-      INTEGER I,INFO,J,L,NROWA,NROWB
-      LOGICAL NOTA,NOTB
-*     ..
-*     .. Parameters ..
-      DOUBLE PRECISION ONE,ZERO
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*
-*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
-*     transposed and set  NROWA and NROWB  as the number of rows of  A
-*     and  B  respectively.
-*
-      NOTA = LSAME(TRANSA,'N')
-      NOTB = LSAME(TRANSB,'N')
-      IF (NOTA) THEN
-          NROWA = M
-      ELSE
-          NROWA = K
-      END IF
-      IF (NOTB) THEN
-          NROWB = K
-      ELSE
-          NROWB = N
-      END IF
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF ((.NOT.NOTA) .AND. (.NOT.LSAME(TRANSA,'C')) .AND.
-     +    (.NOT.LSAME(TRANSA,'T'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.NOTB) .AND. (.NOT.LSAME(TRANSB,'C')) .AND.
-     +         (.NOT.LSAME(TRANSB,'T'))) THEN
-          INFO = 2
-      ELSE IF (M.LT.0) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (K.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 8
-      ELSE IF (LDB.LT.MAX(1,NROWB)) THEN
-          INFO = 10
-      ELSE IF (LDC.LT.MAX(1,M)) THEN
-          INFO = 13
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DGEMM ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-     +    (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*
-*     And if  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          IF (BETA.EQ.ZERO) THEN
-              DO 20 J = 1,N
-                  DO 10 I = 1,M
-                      C(I,J) = ZERO
-   10             CONTINUE
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  DO 30 I = 1,M
-                      C(I,J) = BETA*C(I,J)
-   30             CONTINUE
-   40         CONTINUE
-          END IF
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (NOTB) THEN
-          IF (NOTA) THEN
-*
-*           Form  C := alpha*A*B + beta*C.
-*
-              DO 90 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 50 I = 1,M
-                          C(I,J) = ZERO
-   50                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 60 I = 1,M
-                          C(I,J) = BETA*C(I,J)
-   60                 CONTINUE
-                  END IF
-                  DO 80 L = 1,K
-                      TEMP = ALPHA*B(L,J)
-                      DO 70 I = 1,M
-                          C(I,J) = C(I,J) + TEMP*A(I,L)
-   70                 CONTINUE
-   80             CONTINUE
-   90         CONTINUE
-          ELSE
-*
-*           Form  C := alpha*A**T*B + beta*C
-*
-              DO 120 J = 1,N
-                  DO 110 I = 1,M
-                      TEMP = ZERO
-                      DO 100 L = 1,K
-                          TEMP = TEMP + A(L,I)*B(L,J)
-  100                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  110             CONTINUE
-  120         CONTINUE
-          END IF
-      ELSE
-          IF (NOTA) THEN
-*
-*           Form  C := alpha*A*B**T + beta*C
-*
-              DO 170 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 130 I = 1,M
-                          C(I,J) = ZERO
-  130                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 140 I = 1,M
-                          C(I,J) = BETA*C(I,J)
-  140                 CONTINUE
-                  END IF
-                  DO 160 L = 1,K
-                      TEMP = ALPHA*B(J,L)
-                      DO 150 I = 1,M
-                          C(I,J) = C(I,J) + TEMP*A(I,L)
-  150                 CONTINUE
-  160             CONTINUE
-  170         CONTINUE
-          ELSE
-*
-*           Form  C := alpha*A**T*B**T + beta*C
-*
-              DO 200 J = 1,N
-                  DO 190 I = 1,M
-                      TEMP = ZERO
-                      DO 180 L = 1,K
-                          TEMP = TEMP + A(L,I)*B(J,L)
-  180                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  190             CONTINUE
-  200         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of DGEMM
-*
-      END
--- a/lib/linalg/dgemv.cpp
+++ b/lib/linalg/dgemv.cpp
@ -0,0 +1,149 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+int dgemv_(char *trans, integer *m, integer *n, doublereal *alpha, doublereal *a, integer *lda,
+           doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy,
+           ftnlen trans_len)
+{
+    integer a_dim1, a_offset, i__1, i__2;
+    integer i__, j, ix, iy, jx, jy, kx, ky, info;
+    doublereal temp;
+    integer lenx, leny;
+    extern logical lsame_(char *, char *, ftnlen, ftnlen);
+    extern int xerbla_(char *, integer *, ftnlen);
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --x;
+    --y;
+    info = 0;
+    if (!lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && !lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1) &&
+        !lsame_(trans, (char *)"C", (ftnlen)1, (ftnlen)1)) {
+        info = 1;
+    } else if (*m < 0) {
+        info = 2;
+    } else if (*n < 0) {
+        info = 3;
+    } else if (*lda < max(1, *m)) {
+        info = 6;
+    } else if (*incx == 0) {
+        info = 8;
+    } else if (*incy == 0) {
+        info = 11;
+    }
+    if (info != 0) {
+        xerbla_((char *)"DGEMV ", &info, (ftnlen)6);
+        return 0;
+    }
+    if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) {
+        return 0;
+    }
+    if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) {
+        lenx = *n;
+        leny = *m;
+    } else {
+        lenx = *m;
+        leny = *n;
+    }
+    if (*incx > 0) {
+        kx = 1;
+    } else {
+        kx = 1 - (lenx - 1) * *incx;
+    }
+    if (*incy > 0) {
+        ky = 1;
+    } else {
+        ky = 1 - (leny - 1) * *incy;
+    }
+    if (*beta != 1.) {
+        if (*incy == 1) {
+            if (*beta == 0.) {
+                i__1 = leny;
+                for (i__ = 1; i__ <= i__1; ++i__) {
+                    y[i__] = 0.;
+                }
+            } else {
+                i__1 = leny;
+                for (i__ = 1; i__ <= i__1; ++i__) {
+                    y[i__] = *beta * y[i__];
+                }
+            }
+        } else {
+            iy = ky;
+            if (*beta == 0.) {
+                i__1 = leny;
+                for (i__ = 1; i__ <= i__1; ++i__) {
+                    y[iy] = 0.;
+                    iy += *incy;
+                }
+            } else {
+                i__1 = leny;
+                for (i__ = 1; i__ <= i__1; ++i__) {
+                    y[iy] = *beta * y[iy];
+                    iy += *incy;
+                }
+            }
+        }
+    }
+    if (*alpha == 0.) {
+        return 0;
+    }
+    if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) {
+        jx = kx;
+        if (*incy == 1) {
+            i__1 = *n;
+            for (j = 1; j <= i__1; ++j) {
+                temp = *alpha * x[jx];
+                i__2 = *m;
+                for (i__ = 1; i__ <= i__2; ++i__) {
+                    y[i__] += temp * a[i__ + j * a_dim1];
+                }
+                jx += *incx;
+            }
+        } else {
+            i__1 = *n;
+            for (j = 1; j <= i__1; ++j) {
+                temp = *alpha * x[jx];
+                iy = ky;
+                i__2 = *m;
+                for (i__ = 1; i__ <= i__2; ++i__) {
+                    y[iy] += temp * a[i__ + j * a_dim1];
+                    iy += *incy;
+                }
+                jx += *incx;
+            }
+        }
+    } else {
+        jy = ky;
+        if (*incx == 1) {
+            i__1 = *n;
+            for (j = 1; j <= i__1; ++j) {
+                temp = 0.;
+                i__2 = *m;
+                for (i__ = 1; i__ <= i__2; ++i__) {
+                    temp += a[i__ + j * a_dim1] * x[i__];
+                }
+                y[jy] += *alpha * temp;
+                jy += *incy;
+            }
+        } else {
+            i__1 = *n;
+            for (j = 1; j <= i__1; ++j) {
+                temp = 0.;
+                ix = kx;
+                i__2 = *m;
+                for (i__ = 1; i__ <= i__2; ++i__) {
+                    temp += a[i__ + j * a_dim1] * x[ix];
+                    ix += *incx;
+                }
+                y[jy] += *alpha * temp;
+                jy += *incy;
+            }
+        }
+    }
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dgemv.f
+++ b/lib/linalg/dgemv.f
@ -1,327 +0,0 @@
-*> \brief \b DGEMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION ALPHA,BETA
-*       INTEGER INCX,INCY,LDA,M,N
-*       CHARACTER TRANS
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DGEMV  performs one of the matrix-vector operations
-*>
-*>    y := alpha*A*x + beta*y,   or   y := alpha*A**T*x + beta*y,
-*>
-*> where alpha and beta are scalars, x and y are vectors and A is an
-*> m by n matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
-*>
-*>              TRANS = 'T' or 't'   y := alpha*A**T*x + beta*y.
-*>
-*>              TRANS = 'C' or 'c'   y := alpha*A**T*x + beta*y.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is DOUBLE PRECISION.
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array, dimension ( LDA, N )
-*>           Before entry, the leading m by n part of the array A must
-*>           contain the matrix of coefficients.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, m ).
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array, dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
-*>           Before entry, the incremented array X must contain the
-*>           vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is DOUBLE PRECISION.
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is DOUBLE PRECISION array, dimension at least
-*>           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
-*>           Before entry with BETA non-zero, the incremented array Y
-*>           must contain the vector y. On exit, Y is overwritten by the
-*>           updated vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup double_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*  -- Reference BLAS level2 routine --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION ALPHA,BETA
-      INTEGER INCX,INCY,LDA,M,N
-      CHARACTER TRANS
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION ONE,ZERO
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION TEMP
-      INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY,LENX,LENY
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +    .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 1
-      ELSE IF (M.LT.0) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (LDA.LT.MAX(1,M)) THEN
-          INFO = 6
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 8
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DGEMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-     +    ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
-*     up the start points in  X  and  Y.
-*
-      IF (LSAME(TRANS,'N')) THEN
-          LENX = N
-          LENY = M
-      ELSE
-          LENX = M
-          LENY = N
-      END IF
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (LENX-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (LENY-1)*INCY
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*
-*     First form  y := beta*y.
-*
-      IF (BETA.NE.ONE) THEN
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,LENY
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,LENY
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,LENY
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,LENY
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (ALPHA.EQ.ZERO) RETURN
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  y := alpha*A*x + y.
-*
-          JX = KX
-          IF (INCY.EQ.1) THEN
-              DO 60 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  DO 50 I = 1,M
-                      Y(I) = Y(I) + TEMP*A(I,J)
-   50             CONTINUE
-                  JX = JX + INCX
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  IY = KY
-                  DO 70 I = 1,M
-                      Y(IY) = Y(IY) + TEMP*A(I,J)
-                      IY = IY + INCY
-   70             CONTINUE
-                  JX = JX + INCX
-   80         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  y := alpha*A**T*x + y.
-*
-          JY = KY
-          IF (INCX.EQ.1) THEN
-              DO 100 J = 1,N
-                  TEMP = ZERO
-                  DO 90 I = 1,M
-                      TEMP = TEMP + A(I,J)*X(I)
-   90             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  100         CONTINUE
-          ELSE
-              DO 120 J = 1,N
-                  TEMP = ZERO
-                  IX = KX
-                  DO 110 I = 1,M
-                      TEMP = TEMP + A(I,J)*X(IX)
-                      IX = IX + INCX
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  120         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of DGEMV
-*
-      END
--- a/lib/linalg/dgeqr2.cpp
+++ b/lib/linalg/dgeqr2.cpp
@ -0,0 +1,54 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+static integer c__1 = 1;
+int dgeqr2_(integer *m, integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal *work,
+            integer *info)
+{
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    integer i__, k;
+    doublereal aii;
+    extern int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *,
+                      doublereal *, integer *, doublereal *, ftnlen),
+        dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *),
+        xerbla_(char *, integer *, ftnlen);
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+    *info = 0;
+    if (*m < 0) {
+        *info = -1;
+    } else if (*n < 0) {
+        *info = -2;
+    } else if (*lda < max(1, *m)) {
+        *info = -4;
+    }
+    if (*info != 0) {
+        i__1 = -(*info);
+        xerbla_((char *)"DGEQR2", &i__1, (ftnlen)6);
+        return 0;
+    }
+    k = min(*m, *n);
+    i__1 = k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+        i__2 = *m - i__ + 1;
+        i__3 = i__ + 1;
+        dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3, *m) + i__ * a_dim1], &c__1, &tau[i__]);
+        if (i__ < *n) {
+            aii = a[i__ + i__ * a_dim1];
+            a[i__ + i__ * a_dim1] = 1.;
+            i__2 = *m - i__ + 1;
+            i__3 = *n - i__;
+            dlarf_((char *)"Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[i__],
+                   &a[i__ + (i__ + 1) * a_dim1], lda, &work[1], (ftnlen)4);
+            a[i__ + i__ * a_dim1] = aii;
+        }
+    }
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dgeqr2.f
+++ b/lib/linalg/dgeqr2.f
@ -1,198 +0,0 @@
-*> \brief \b DGEQR2 computes the QR factorization of a general rectangular matrix using an unblocked algorithm.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DGEQR2 + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqr2.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqr2.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqr2.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DGEQR2( M, N, A, LDA, TAU, WORK, INFO )
-*
-*       .. Scalar Arguments ..
-*       INTEGER            INFO, LDA, M, N
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DGEQR2 computes a QR factorization of a real m-by-n matrix A:
-*>
-*>    A = Q * ( R ),
-*>            ( 0 )
-*>
-*> where:
-*>
-*>    Q is a m-by-m orthogonal matrix;
-*>    R is an upper-triangular n-by-n matrix;
-*>    0 is a (m-n)-by-n zero matrix, if m > n.
-*>
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>          The number of rows of the matrix A.  M >= 0.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The number of columns of the matrix A.  N >= 0.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array, dimension (LDA,N)
-*>          On entry, the m by n matrix A.
-*>          On exit, the elements on and above the diagonal of the array
-*>          contain the min(m,n) by n upper trapezoidal matrix R (R is
-*>          upper triangular if m >= n); the elements below the diagonal,
-*>          with the array TAU, represent the orthogonal matrix Q as a
-*>          product of elementary reflectors (see Further Details).
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(1,M).
-*> \endverbatim
-*>
-*> \param[out] TAU
-*> \verbatim
-*>          TAU is DOUBLE PRECISION array, dimension (min(M,N))
-*>          The scalar factors of the elementary reflectors (see Further
-*>          Details).
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is DOUBLE PRECISION array, dimension (N)
-*> \endverbatim
-*>
-*> \param[out] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>          = 0: successful exit
-*>          < 0: if INFO = -i, the i-th argument had an illegal value
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup doubleGEcomputational
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  The matrix Q is represented as a product of elementary reflectors
-*>
-*>     Q = H(1) H(2) . . . H(k), where k = min(m,n).
-*>
-*>  Each H(i) has the form
-*>
-*>     H(i) = I - tau * v * v**T
-*>
-*>  where tau is a real scalar, and v is a real vector with
-*>  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
-*>  and tau in TAU(i).
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DGEQR2( M, N, A, LDA, TAU, WORK, INFO )
-*
-*  -- LAPACK computational routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      INTEGER            INFO, LDA, M, N
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION   ONE
-      PARAMETER          ( ONE = 1.0D+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I, K
-      DOUBLE PRECISION   AII
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           DLARF, DLARFG, XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          MAX, MIN
-*     ..
-*     .. Executable Statements ..
-*
-*     Test the input arguments
-*
-      INFO = 0
-      IF( M.LT.0 ) THEN
-         INFO = -1
-      ELSE IF( N.LT.0 ) THEN
-         INFO = -2
-      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
-         INFO = -4
-      END IF
-      IF( INFO.NE.0 ) THEN
-         CALL XERBLA( 'DGEQR2', -INFO )
-         RETURN
-      END IF
-*
-      K = MIN( M, N )
-*
-      DO 10 I = 1, K
-*
-*        Generate elementary reflector H(i) to annihilate A(i+1:m,i)
-*
-         CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1,
-     $                TAU( I ) )
-         IF( I.LT.N ) THEN
-*
-*           Apply H(i) to A(i:m,i+1:n) from the left
-*
-            AII = A( I, I )
-            A( I, I ) = ONE
-            CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ),
-     $                  A( I, I+1 ), LDA, WORK )
-            A( I, I ) = AII
-         END IF
-   10 CONTINUE
-      RETURN
-*
-*     End of DGEQR2
-*
-      END
--- a/lib/linalg/dgeqrf.cpp
+++ b/lib/linalg/dgeqrf.cpp
@ -0,0 +1,113 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__3 = 3;
+static integer c__2 = 2;
+int dgeqrf_(integer *m, integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal *work,
+            integer *lwork, integer *info)
+{
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
+    extern int dgeqr2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
+                       integer *),
+        dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *,
+                integer *, doublereal *, integer *, doublereal *, integer *, doublereal *,
+                integer *, ftnlen, ftnlen, ftnlen, ftnlen),
+        dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *,
+                doublereal *, integer *, ftnlen, ftnlen),
+        xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
+                           ftnlen, ftnlen);
+    integer ldwork, lwkopt;
+    logical lquery;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+    k = min(*m, *n);
+    *info = 0;
+    nb = ilaenv_(&c__1, (char *)"DGEQRF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+    lquery = *lwork == -1;
+    if (*m < 0) {
+        *info = -1;
+    } else if (*n < 0) {
+        *info = -2;
+    } else if (*lda < max(1, *m)) {
+        *info = -4;
+    } else if (!lquery) {
+        if (*lwork <= 0 || *m > 0 && *lwork < max(1, *n)) {
+            *info = -7;
+        }
+    }
+    if (*info != 0) {
+        i__1 = -(*info);
+        xerbla_((char *)"DGEQRF", &i__1, (ftnlen)6);
+        return 0;
+    } else if (lquery) {
+        if (k == 0) {
+            lwkopt = 1;
+        } else {
+            lwkopt = *n * nb;
+        }
+        work[1] = (doublereal)lwkopt;
+        return 0;
+    }
+    if (k == 0) {
+        work[1] = 1.;
+        return 0;
+    }
+    nbmin = 2;
+    nx = 0;
+    iws = *n;
+    if (nb > 1 && nb < k) {
+        i__1 = 0, i__2 = ilaenv_(&c__3, (char *)"DGEQRF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+        nx = max(i__1, i__2);
+        if (nx < k) {
+            ldwork = *n;
+            iws = ldwork * nb;
+            if (*lwork < iws) {
+                nb = *lwork / ldwork;
+                i__1 = 2,
+                i__2 = ilaenv_(&c__2, (char *)"DGEQRF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+                nbmin = max(i__1, i__2);
+            }
+        }
+    }
+    if (nb >= nbmin && nb < k && nx < k) {
+        i__1 = k - nx;
+        i__2 = nb;
+        for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+            i__3 = k - i__ + 1;
+            ib = min(i__3, nb);
+            i__3 = *m - i__ + 1;
+            dgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1], &iinfo);
+            if (i__ + ib <= *n) {
+                i__3 = *m - i__ + 1;
+                dlarft_((char *)"Forward", (char *)"Columnwise", &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__],
+                        &work[1], &ldwork, (ftnlen)7, (ftnlen)10);
+                i__3 = *m - i__ + 1;
+                i__4 = *n - i__ - ib + 1;
+                dlarfb_((char *)"Left", (char *)"Transpose", (char *)"Forward", (char *)"Columnwise", &i__3, &i__4, &ib,
+                        &a[i__ + i__ * a_dim1], lda, &work[1], &ldwork,
+                        &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib + 1], &ldwork, (ftnlen)4,
+                        (ftnlen)9, (ftnlen)7, (ftnlen)10);
+            }
+        }
+    } else {
+        i__ = 1;
+    }
+    if (i__ <= k) {
+        i__2 = *m - i__ + 1;
+        i__1 = *n - i__ + 1;
+        dgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1], &iinfo);
+    }
+    work[1] = (doublereal)iws;
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dgeqrf.f
+++ b/lib/linalg/dgeqrf.f
@ -1,282 +0,0 @@
-*> \brief \b DGEQRF
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DGEQRF + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqrf.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqrf.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqrf.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
-*
-*       .. Scalar Arguments ..
-*       INTEGER            INFO, LDA, LWORK, M, N
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DGEQRF computes a QR factorization of a real M-by-N matrix A:
-*>
-*>    A = Q * ( R ),
-*>            ( 0 )
-*>
-*> where:
-*>
-*>    Q is a M-by-M orthogonal matrix;
-*>    R is an upper-triangular N-by-N matrix;
-*>    0 is a (M-N)-by-N zero matrix, if M > N.
-*>
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>          The number of rows of the matrix A.  M >= 0.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The number of columns of the matrix A.  N >= 0.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array, dimension (LDA,N)
-*>          On entry, the M-by-N matrix A.
-*>          On exit, the elements on and above the diagonal of the array
-*>          contain the min(M,N)-by-N upper trapezoidal matrix R (R is
-*>          upper triangular if m >= n); the elements below the diagonal,
-*>          with the array TAU, represent the orthogonal matrix Q as a
-*>          product of min(m,n) elementary reflectors (see Further
-*>          Details).
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(1,M).
-*> \endverbatim
-*>
-*> \param[out] TAU
-*> \verbatim
-*>          TAU is DOUBLE PRECISION array, dimension (min(M,N))
-*>          The scalar factors of the elementary reflectors (see Further
-*>          Details).
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
-*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-*> \endverbatim
-*>
-*> \param[in] LWORK
-*> \verbatim
-*>          LWORK is INTEGER
-*>          The dimension of the array WORK.
-*>          LWORK >= 1, if MIN(M,N) = 0, and LWORK >= N, otherwise.
-*>          For optimum performance LWORK >= N*NB, where NB is
-*>          the optimal blocksize.
-*>
-*>          If LWORK = -1, then a workspace query is assumed; the routine
-*>          only calculates the optimal size of the WORK array, returns
-*>          this value as the first entry of the WORK array, and no error
-*>          message related to LWORK is issued by XERBLA.
-*> \endverbatim
-*>
-*> \param[out] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>          = 0:  successful exit
-*>          < 0:  if INFO = -i, the i-th argument had an illegal value
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup doubleGEcomputational
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  The matrix Q is represented as a product of elementary reflectors
-*>
-*>     Q = H(1) H(2) . . . H(k), where k = min(m,n).
-*>
-*>  Each H(i) has the form
-*>
-*>     H(i) = I - tau * v * v**T
-*>
-*>  where tau is a real scalar, and v is a real vector with
-*>  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
-*>  and tau in TAU(i).
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
-*
-*  -- LAPACK computational routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      INTEGER            INFO, LDA, LWORK, M, N
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      LOGICAL            LQUERY
-      INTEGER            I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB,
-     $                   NBMIN, NX
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           DGEQR2, DLARFB, DLARFT, XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          MAX, MIN
-*     ..
-*     .. External Functions ..
-      INTEGER            ILAENV
-      EXTERNAL           ILAENV
-*     ..
-*     .. Executable Statements ..
-*
-*     Test the input arguments
-*
-      K = MIN( M, N )
-      INFO = 0
-      NB = ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
-      LQUERY = ( LWORK.EQ.-1 )
-      IF( M.LT.0 ) THEN
-         INFO = -1
-      ELSE IF( N.LT.0 ) THEN
-         INFO = -2
-      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
-         INFO = -4
-      ELSE IF( .NOT.LQUERY ) THEN
-         IF( LWORK.LE.0 .OR. ( M.GT.0 .AND. LWORK.LT.MAX( 1, N ) ) )
-     $      INFO = -7
-      END IF
-      IF( INFO.NE.0 ) THEN
-         CALL XERBLA( 'DGEQRF', -INFO )
-         RETURN
-      ELSE IF( LQUERY ) THEN
-         IF( K.EQ.0 ) THEN
-            LWKOPT = 1
-         ELSE
-            LWKOPT = N*NB
-         END IF
-         WORK( 1 ) = LWKOPT
-         RETURN
-      END IF
-*
-*     Quick return if possible
-*
-      IF( K.EQ.0 ) THEN
-         WORK( 1 ) = 1
-         RETURN
-      END IF
-*
-      NBMIN = 2
-      NX = 0
-      IWS = N
-      IF( NB.GT.1 .AND. NB.LT.K ) THEN
-*
-*        Determine when to cross over from blocked to unblocked code.
-*
-         NX = MAX( 0, ILAENV( 3, 'DGEQRF', ' ', M, N, -1, -1 ) )
-         IF( NX.LT.K ) THEN
-*
-*           Determine if workspace is large enough for blocked code.
-*
-            LDWORK = N
-            IWS = LDWORK*NB
-            IF( LWORK.LT.IWS ) THEN
-*
-*              Not enough workspace to use optimal NB:  reduce NB and
-*              determine the minimum value of NB.
-*
-               NB = LWORK / LDWORK
-               NBMIN = MAX( 2, ILAENV( 2, 'DGEQRF', ' ', M, N, -1,
-     $                 -1 ) )
-            END IF
-         END IF
-      END IF
-*
-      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
-*
-*        Use blocked code initially
-*
-         DO 10 I = 1, K - NX, NB
-            IB = MIN( K-I+1, NB )
-*
-*           Compute the QR factorization of the current block
-*           A(i:m,i:i+ib-1)
-*
-            CALL DGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), WORK,
-     $                   IINFO )
-            IF( I+IB.LE.N ) THEN
-*
-*              Form the triangular factor of the block reflector
-*              H = H(i) H(i+1) . . . H(i+ib-1)
-*
-               CALL DLARFT( 'Forward', 'Columnwise', M-I+1, IB,
-     $                      A( I, I ), LDA, TAU( I ), WORK, LDWORK )
-*
-*              Apply H**T to A(i:m,i+ib:n) from the left
-*
-               CALL DLARFB( 'Left', 'Transpose', 'Forward',
-     $                      'Columnwise', M-I+1, N-I-IB+1, IB,
-     $                      A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ),
-     $                      LDA, WORK( IB+1 ), LDWORK )
-            END IF
-   10    CONTINUE
-      ELSE
-         I = 1
-      END IF
-*
-*     Use unblocked code to factor the last or only block.
-*
-      IF( I.LE.K )
-     $   CALL DGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
-     $                IINFO )
-*
-      WORK( 1 ) = IWS
-      RETURN
-*
-*     End of DGEQRF
-*
-      END
--- a/lib/linalg/dger.cpp
+++ b/lib/linalg/dger.cpp
@ -0,0 +1,77 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+int dger_(integer *m, integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *y,
+          integer *incy, doublereal *a, integer *lda)
+{
+    integer a_dim1, a_offset, i__1, i__2;
+    integer i__, j, ix, jy, kx, info;
+    doublereal temp;
+    extern int xerbla_(char *, integer *, ftnlen);
+    --x;
+    --y;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    info = 0;
+    if (*m < 0) {
+        info = 1;
+    } else if (*n < 0) {
+        info = 2;
+    } else if (*incx == 0) {
+        info = 5;
+    } else if (*incy == 0) {
+        info = 7;
+    } else if (*lda < max(1, *m)) {
+        info = 9;
+    }
+    if (info != 0) {
+        xerbla_((char *)"DGER  ", &info, (ftnlen)6);
+        return 0;
+    }
+    if (*m == 0 || *n == 0 || *alpha == 0.) {
+        return 0;
+    }
+    if (*incy > 0) {
+        jy = 1;
+    } else {
+        jy = 1 - (*n - 1) * *incy;
+    }
+    if (*incx == 1) {
+        i__1 = *n;
+        for (j = 1; j <= i__1; ++j) {
+            if (y[jy] != 0.) {
+                temp = *alpha * y[jy];
+                i__2 = *m;
+                for (i__ = 1; i__ <= i__2; ++i__) {
+                    a[i__ + j * a_dim1] += x[i__] * temp;
+                }
+            }
+            jy += *incy;
+        }
+    } else {
+        if (*incx > 0) {
+            kx = 1;
+        } else {
+            kx = 1 - (*m - 1) * *incx;
+        }
+        i__1 = *n;
+        for (j = 1; j <= i__1; ++j) {
+            if (y[jy] != 0.) {
+                temp = *alpha * y[jy];
+                ix = kx;
+                i__2 = *m;
+                for (i__ = 1; i__ <= i__2; ++i__) {
+                    a[i__ + j * a_dim1] += x[ix] * temp;
+                    ix += *incx;
+                }
+            }
+            jy += *incy;
+        }
+    }
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dger.f
+++ b/lib/linalg/dger.f
@ -1,224 +0,0 @@
-*> \brief \b DGER
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DGER(M,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION ALPHA
-*       INTEGER INCX,INCY,LDA,M,N
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DGER   performs the rank 1 operation
-*>
-*>    A := alpha*x*y**T + A,
-*>
-*> where alpha is a scalar, x is an m element vector, y is an n element
-*> vector and A is an m by n matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is DOUBLE PRECISION.
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array, dimension at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the m
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] Y
-*> \verbatim
-*>          Y is DOUBLE PRECISION array, dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array, dimension ( LDA, N )
-*>           Before entry, the leading m by n part of the array A must
-*>           contain the matrix of coefficients. On exit, A is
-*>           overwritten by the updated matrix.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup double_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DGER(M,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-*
-*  -- Reference BLAS level2 routine --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION ALPHA
-      INTEGER INCX,INCY,LDA,M,N
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION ZERO
-      PARAMETER (ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION TEMP
-      INTEGER I,INFO,IX,J,JY,KX
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (M.LT.0) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 5
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 7
-      ELSE IF (LDA.LT.MAX(1,M)) THEN
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DGER  ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*
-      IF (INCY.GT.0) THEN
-          JY = 1
-      ELSE
-          JY = 1 - (N-1)*INCY
-      END IF
-      IF (INCX.EQ.1) THEN
-          DO 20 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*Y(JY)
-                  DO 10 I = 1,M
-                      A(I,J) = A(I,J) + X(I)*TEMP
-   10             CONTINUE
-              END IF
-              JY = JY + INCY
-   20     CONTINUE
-      ELSE
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (M-1)*INCX
-          END IF
-          DO 40 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*Y(JY)
-                  IX = KX
-                  DO 30 I = 1,M
-                      A(I,J) = A(I,J) + X(IX)*TEMP
-                      IX = IX + INCX
-   30             CONTINUE
-              END IF
-              JY = JY + INCY
-   40     CONTINUE
-      END IF
-*
-      RETURN
-*
-*     End of DGER
-*
-      END
--- a/lib/linalg/dgesv.cpp
+++ b/lib/linalg/dgesv.cpp
@ -0,0 +1,44 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+int dgesv_(integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, doublereal *b,
+           integer *ldb, integer *info)
+{
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+    extern int dgetrf_(integer *, integer *, doublereal *, integer *, integer *, integer *),
+        xerbla_(char *, integer *, ftnlen),
+        dgetrs_(char *, integer *, integer *, doublereal *, integer *, integer *, doublereal *,
+                integer *, integer *, ftnlen);
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    *info = 0;
+    if (*n < 0) {
+        *info = -1;
+    } else if (*nrhs < 0) {
+        *info = -2;
+    } else if (*lda < max(1, *n)) {
+        *info = -4;
+    } else if (*ldb < max(1, *n)) {
+        *info = -7;
+    }
+    if (*info != 0) {
+        i__1 = -(*info);
+        xerbla_((char *)"DGESV ", &i__1, (ftnlen)6);
+        return 0;
+    }
+    dgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info);
+    if (*info == 0) {
+        dgetrs_((char *)"No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], ldb, info,
+                (ftnlen)12);
+    }
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dgesv.f
+++ b/lib/linalg/dgesv.f
@ -1,176 +0,0 @@
-*> \brief <b> DGESV computes the solution to system of linear equations A * X = B for GE matrices</b>
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DGESV + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesv.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesv.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesv.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO )
-*
-*       .. Scalar Arguments ..
-*       INTEGER            INFO, LDA, LDB, N, NRHS
-*       ..
-*       .. Array Arguments ..
-*       INTEGER            IPIV( * )
-*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DGESV computes the solution to a real system of linear equations
-*>    A * X = B,
-*> where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
-*>
-*> The LU decomposition with partial pivoting and row interchanges is
-*> used to factor A as
-*>    A = P * L * U,
-*> where P is a permutation matrix, L is unit lower triangular, and U is
-*> upper triangular.  The factored form of A is then used to solve the
-*> system of equations A * X = B.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The number of linear equations, i.e., the order of the
-*>          matrix A.  N >= 0.
-*> \endverbatim
-*>
-*> \param[in] NRHS
-*> \verbatim
-*>          NRHS is INTEGER
-*>          The number of right hand sides, i.e., the number of columns
-*>          of the matrix B.  NRHS >= 0.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array, dimension (LDA,N)
-*>          On entry, the N-by-N coefficient matrix A.
-*>          On exit, the factors L and U from the factorization
-*>          A = P*L*U; the unit diagonal elements of L are not stored.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(1,N).
-*> \endverbatim
-*>
-*> \param[out] IPIV
-*> \verbatim
-*>          IPIV is INTEGER array, dimension (N)
-*>          The pivot indices that define the permutation matrix P;
-*>          row i of the matrix was interchanged with row IPIV(i).
-*> \endverbatim
-*>
-*> \param[in,out] B
-*> \verbatim
-*>          B is DOUBLE PRECISION array, dimension (LDB,NRHS)
-*>          On entry, the N-by-NRHS matrix of right hand side matrix B.
-*>          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>          The leading dimension of the array B.  LDB >= max(1,N).
-*> \endverbatim
-*>
-*> \param[out] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>          = 0:  successful exit
-*>          < 0:  if INFO = -i, the i-th argument had an illegal value
-*>          > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization
-*>                has been completed, but the factor U is exactly
-*>                singular, so the solution could not be computed.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup doubleGEsolve
-*
-*  =====================================================================
-      SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO )
-*
-*  -- LAPACK driver routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      INTEGER            INFO, LDA, LDB, N, NRHS
-*     ..
-*     .. Array Arguments ..
-      INTEGER            IPIV( * )
-      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Subroutines ..
-      EXTERNAL           DGETRF, DGETRS, XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          MAX
-*     ..
-*     .. Executable Statements ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF( N.LT.0 ) THEN
-         INFO = -1
-      ELSE IF( NRHS.LT.0 ) THEN
-         INFO = -2
-      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
-         INFO = -4
-      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
-         INFO = -7
-      END IF
-      IF( INFO.NE.0 ) THEN
-         CALL XERBLA( 'DGESV ', -INFO )
-         RETURN
-      END IF
-*
-*     Compute the LU factorization of A.
-*
-      CALL DGETRF( N, N, A, LDA, IPIV, INFO )
-      IF( INFO.EQ.0 ) THEN
-*
-*        Solve the system A*X = B, overwriting B with X.
-*
-         CALL DGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, B, LDB,
-     $                INFO )
-      END IF
-      RETURN
-*
-*     End of DGESV
-*
-      END
--- a/lib/linalg/dgesvd.cpp
+++ b/lib/linalg/dgesvd.cpp
--- a/lib/linalg/dgesvd.f
+++ b/lib/linalg/dgesvd.f
--- a/lib/linalg/dgetf2.cpp
+++ b/lib/linalg/dgetf2.cpp
@ -0,0 +1,76 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+static integer c__1 = 1;
+static doublereal c_b8 = -1.;
+int dgetf2_(integer *m, integer *n, doublereal *a, integer *lda, integer *ipiv, integer *info)
+{
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    doublereal d__1;
+    integer i__, j, jp;
+    extern int dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *,
+                     integer *, doublereal *, integer *),
+        dscal_(integer *, doublereal *, doublereal *, integer *);
+    doublereal sfmin;
+    extern int dswap_(integer *, doublereal *, integer *, doublereal *, integer *);
+    extern doublereal dlamch_(char *, ftnlen);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern int xerbla_(char *, integer *, ftnlen);
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+    *info = 0;
+    if (*m < 0) {
+        *info = -1;
+    } else if (*n < 0) {
+        *info = -2;
+    } else if (*lda < max(1, *m)) {
+        *info = -4;
+    }
+    if (*info != 0) {
+        i__1 = -(*info);
+        xerbla_((char *)"DGETF2", &i__1, (ftnlen)6);
+        return 0;
+    }
+    if (*m == 0 || *n == 0) {
+        return 0;
+    }
+    sfmin = dlamch_((char *)"S", (ftnlen)1);
+    i__1 = min(*m, *n);
+    for (j = 1; j <= i__1; ++j) {
+        i__2 = *m - j + 1;
+        jp = j - 1 + idamax_(&i__2, &a[j + j * a_dim1], &c__1);
+        ipiv[j] = jp;
+        if (a[jp + j * a_dim1] != 0.) {
+            if (jp != j) {
+                dswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda);
+            }
+            if (j < *m) {
+                if ((d__1 = a[j + j * a_dim1], abs(d__1)) >= sfmin) {
+                    i__2 = *m - j;
+                    d__1 = 1. / a[j + j * a_dim1];
+                    dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1);
+                } else {
+                    i__2 = *m - j;
+                    for (i__ = 1; i__ <= i__2; ++i__) {
+                        a[j + i__ + j * a_dim1] /= a[j + j * a_dim1];
+                    }
+                }
+            }
+        } else if (*info == 0) {
+            *info = j;
+        }
+        if (j < min(*m, *n)) {
+            i__2 = *m - j;
+            i__3 = *n - j;
+            dger_(&i__2, &i__3, &c_b8, &a[j + 1 + j * a_dim1], &c__1, &a[j + (j + 1) * a_dim1], lda,
+                  &a[j + 1 + (j + 1) * a_dim1], lda);
+        }
+    }
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dgetf2.f
+++ b/lib/linalg/dgetf2.f
@ -1,210 +0,0 @@
-*> \brief \b DGETF2 computes the LU factorization of a general m-by-n matrix using partial pivoting with row interchanges (unblocked algorithm).
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DGETF2 + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetf2.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetf2.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetf2.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DGETF2( M, N, A, LDA, IPIV, INFO )
-*
-*       .. Scalar Arguments ..
-*       INTEGER            INFO, LDA, M, N
-*       ..
-*       .. Array Arguments ..
-*       INTEGER            IPIV( * )
-*       DOUBLE PRECISION   A( LDA, * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DGETF2 computes an LU factorization of a general m-by-n matrix A
-*> using partial pivoting with row interchanges.
-*>
-*> The factorization has the form
-*>    A = P * L * U
-*> where P is a permutation matrix, L is lower triangular with unit
-*> diagonal elements (lower trapezoidal if m > n), and U is upper
-*> triangular (upper trapezoidal if m < n).
-*>
-*> This is the right-looking Level 2 BLAS version of the algorithm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>          The number of rows of the matrix A.  M >= 0.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The number of columns of the matrix A.  N >= 0.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array, dimension (LDA,N)
-*>          On entry, the m by n matrix to be factored.
-*>          On exit, the factors L and U from the factorization
-*>          A = P*L*U; the unit diagonal elements of L are not stored.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(1,M).
-*> \endverbatim
-*>
-*> \param[out] IPIV
-*> \verbatim
-*>          IPIV is INTEGER array, dimension (min(M,N))
-*>          The pivot indices; for 1 <= i <= min(M,N), row i of the
-*>          matrix was interchanged with row IPIV(i).
-*> \endverbatim
-*>
-*> \param[out] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>          = 0: successful exit
-*>          < 0: if INFO = -k, the k-th argument had an illegal value
-*>          > 0: if INFO = k, U(k,k) is exactly zero. The factorization
-*>               has been completed, but the factor U is exactly
-*>               singular, and division by zero will occur if it is used
-*>               to solve a system of equations.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup doubleGEcomputational
-*
-*  =====================================================================
-      SUBROUTINE DGETF2( M, N, A, LDA, IPIV, INFO )
-*
-*  -- LAPACK computational routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      INTEGER            INFO, LDA, M, N
-*     ..
-*     .. Array Arguments ..
-      INTEGER            IPIV( * )
-      DOUBLE PRECISION   A( LDA, * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION   ONE, ZERO
-      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION   SFMIN
-      INTEGER            I, J, JP
-*     ..
-*     .. External Functions ..
-      DOUBLE PRECISION   DLAMCH
-      INTEGER            IDAMAX
-      EXTERNAL           DLAMCH, IDAMAX
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           DGER, DSCAL, DSWAP, XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          MAX, MIN
-*     ..
-*     .. Executable Statements ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF( M.LT.0 ) THEN
-         INFO = -1
-      ELSE IF( N.LT.0 ) THEN
-         INFO = -2
-      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
-         INFO = -4
-      END IF
-      IF( INFO.NE.0 ) THEN
-         CALL XERBLA( 'DGETF2', -INFO )
-         RETURN
-      END IF
-*
-*     Quick return if possible
-*
-      IF( M.EQ.0 .OR. N.EQ.0 )
-     $   RETURN
-*
-*     Compute machine safe minimum
-*
-      SFMIN = DLAMCH('S')
-*
-      DO 10 J = 1, MIN( M, N )
-*
-*        Find pivot and test for singularity.
-*
-         JP = J - 1 + IDAMAX( M-J+1, A( J, J ), 1 )
-         IPIV( J ) = JP
-         IF( A( JP, J ).NE.ZERO ) THEN
-*
-*           Apply the interchange to columns 1:N.
-*
-            IF( JP.NE.J )
-     $         CALL DSWAP( N, A( J, 1 ), LDA, A( JP, 1 ), LDA )
-*
-*           Compute elements J+1:M of J-th column.
-*
-            IF( J.LT.M ) THEN
-               IF( ABS(A( J, J )) .GE. SFMIN ) THEN
-                  CALL DSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 )
-               ELSE
-                 DO 20 I = 1, M-J
-                    A( J+I, J ) = A( J+I, J ) / A( J, J )
-   20            CONTINUE
-               END IF
-            END IF
-*
-         ELSE IF( INFO.EQ.0 ) THEN
-*
-            INFO = J
-         END IF
-*
-         IF( J.LT.MIN( M, N ) ) THEN
-*
-*           Update trailing submatrix.
-*
-            CALL DGER( M-J, N-J, -ONE, A( J+1, J ), 1, A( J, J+1 ), LDA,
-     $                 A( J+1, J+1 ), LDA )
-         END IF
-   10 CONTINUE
-      RETURN
-*
-*     End of DGETF2
-*
-      END
--- a/lib/linalg/dgetrf.cpp
+++ b/lib/linalg/dgetrf.cpp
@ -0,0 +1,90 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static doublereal c_b16 = 1.;
+static doublereal c_b19 = -1.;
+int dgetrf_(integer *m, integer *n, doublereal *a, integer *lda, integer *ipiv, integer *info)
+{
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    integer i__, j, jb, nb;
+    extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
+                      integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
+                      ftnlen, ftnlen);
+    integer iinfo;
+    extern int dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *,
+                      doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen,
+                      ftnlen),
+        xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
+                           ftnlen, ftnlen);
+    extern int dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *,
+                       integer *),
+        dgetrf2_(integer *, integer *, doublereal *, integer *, integer *, integer *);
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+    *info = 0;
+    if (*m < 0) {
+        *info = -1;
+    } else if (*n < 0) {
+        *info = -2;
+    } else if (*lda < max(1, *m)) {
+        *info = -4;
+    }
+    if (*info != 0) {
+        i__1 = -(*info);
+        xerbla_((char *)"DGETRF", &i__1, (ftnlen)6);
+        return 0;
+    }
+    if (*m == 0 || *n == 0) {
+        return 0;
+    }
+    nb = ilaenv_(&c__1, (char *)"DGETRF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+    if (nb <= 1 || nb >= min(*m, *n)) {
+        dgetrf2_(m, n, &a[a_offset], lda, &ipiv[1], info);
+    } else {
+        i__1 = min(*m, *n);
+        i__2 = nb;
+        for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+            i__3 = min(*m, *n) - j + 1;
+            jb = min(i__3, nb);
+            i__3 = *m - j + 1;
+            dgetrf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo);
+            if (*info == 0 && iinfo > 0) {
+                *info = iinfo + j - 1;
+            }
+            i__4 = *m, i__5 = j + jb - 1;
+            i__3 = min(i__4, i__5);
+            for (i__ = j; i__ <= i__3; ++i__) {
+                ipiv[i__] = j - 1 + ipiv[i__];
+            }
+            i__3 = j - 1;
+            i__4 = j + jb - 1;
+            dlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1);
+            if (j + jb <= *n) {
+                i__3 = *n - j - jb + 1;
+                i__4 = j + jb - 1;
+                dlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, &ipiv[1], &c__1);
+                i__3 = *n - j - jb + 1;
+                dtrsm_((char *)"Left", (char *)"Lower", (char *)"No transpose", (char *)"Unit", &jb, &i__3, &c_b16,
+                       &a[j + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda, (ftnlen)4,
+                       (ftnlen)5, (ftnlen)12, (ftnlen)4);
+                if (j + jb <= *m) {
+                    i__3 = *m - j - jb + 1;
+                    i__4 = *n - j - jb + 1;
+                    dgemm_((char *)"No transpose", (char *)"No transpose", &i__3, &i__4, &jb, &c_b19,
+                           &a[j + jb + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda, &c_b16,
+                           &a[j + jb + (j + jb) * a_dim1], lda, (ftnlen)12, (ftnlen)12);
+                }
+            }
+        }
+    }
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dgetrf.f
+++ b/lib/linalg/dgetrf.f
@ -1,222 +0,0 @@
-*> \brief \b DGETRF
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DGETRF + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetrf.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetrf.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetrf.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO )
-*
-*       .. Scalar Arguments ..
-*       INTEGER            INFO, LDA, M, N
-*       ..
-*       .. Array Arguments ..
-*       INTEGER            IPIV( * )
-*       DOUBLE PRECISION   A( LDA, * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DGETRF computes an LU factorization of a general M-by-N matrix A
-*> using partial pivoting with row interchanges.
-*>
-*> The factorization has the form
-*>    A = P * L * U
-*> where P is a permutation matrix, L is lower triangular with unit
-*> diagonal elements (lower trapezoidal if m > n), and U is upper
-*> triangular (upper trapezoidal if m < n).
-*>
-*> This is the right-looking Level 3 BLAS version of the algorithm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>          The number of rows of the matrix A.  M >= 0.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The number of columns of the matrix A.  N >= 0.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array, dimension (LDA,N)
-*>          On entry, the M-by-N matrix to be factored.
-*>          On exit, the factors L and U from the factorization
-*>          A = P*L*U; the unit diagonal elements of L are not stored.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(1,M).
-*> \endverbatim
-*>
-*> \param[out] IPIV
-*> \verbatim
-*>          IPIV is INTEGER array, dimension (min(M,N))
-*>          The pivot indices; for 1 <= i <= min(M,N), row i of the
-*>          matrix was interchanged with row IPIV(i).
-*> \endverbatim
-*>
-*> \param[out] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>          = 0:  successful exit
-*>          < 0:  if INFO = -i, the i-th argument had an illegal value
-*>          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
-*>                has been completed, but the factor U is exactly
-*>                singular, and division by zero will occur if it is used
-*>                to solve a system of equations.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup doubleGEcomputational
-*
-*  =====================================================================
-      SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO )
-*
-*  -- LAPACK computational routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      INTEGER            INFO, LDA, M, N
-*     ..
-*     .. Array Arguments ..
-      INTEGER            IPIV( * )
-      DOUBLE PRECISION   A( LDA, * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION   ONE
-      PARAMETER          ( ONE = 1.0D+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I, IINFO, J, JB, NB
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           DGEMM, DGETRF2, DLASWP, DTRSM, XERBLA
-*     ..
-*     .. External Functions ..
-      INTEGER            ILAENV
-      EXTERNAL           ILAENV
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          MAX, MIN
-*     ..
-*     .. Executable Statements ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF( M.LT.0 ) THEN
-         INFO = -1
-      ELSE IF( N.LT.0 ) THEN
-         INFO = -2
-      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
-         INFO = -4
-      END IF
-      IF( INFO.NE.0 ) THEN
-         CALL XERBLA( 'DGETRF', -INFO )
-         RETURN
-      END IF
-*
-*     Quick return if possible
-*
-      IF( M.EQ.0 .OR. N.EQ.0 )
-     $   RETURN
-*
-*     Determine the block size for this environment.
-*
-      NB = ILAENV( 1, 'DGETRF', ' ', M, N, -1, -1 )
-      IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
-*
-*        Use unblocked code.
-*
-         CALL DGETRF2( M, N, A, LDA, IPIV, INFO )
-      ELSE
-*
-*        Use blocked code.
-*
-         DO 20 J = 1, MIN( M, N ), NB
-            JB = MIN( MIN( M, N )-J+1, NB )
-*
-*           Factor diagonal and subdiagonal blocks and test for exact
-*           singularity.
-*
-            CALL DGETRF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
-*
-*           Adjust INFO and the pivot indices.
-*
-            IF( INFO.EQ.0 .AND. IINFO.GT.0 )
-     $         INFO = IINFO + J - 1
-            DO 10 I = J, MIN( M, J+JB-1 )
-               IPIV( I ) = J - 1 + IPIV( I )
-   10       CONTINUE
-*
-*           Apply interchanges to columns 1:J-1.
-*
-            CALL DLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 )
-*
-            IF( J+JB.LE.N ) THEN
-*
-*              Apply interchanges to columns J+JB:N.
-*
-               CALL DLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1,
-     $                      IPIV, 1 )
-*
-*              Compute block row of U.
-*
-               CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB,
-     $                     N-J-JB+1, ONE, A( J, J ), LDA, A( J, J+JB ),
-     $                     LDA )
-               IF( J+JB.LE.M ) THEN
-*
-*                 Update trailing submatrix.
-*
-                  CALL DGEMM( 'No transpose', 'No transpose', M-J-JB+1,
-     $                        N-J-JB+1, JB, -ONE, A( J+JB, J ), LDA,
-     $                        A( J, J+JB ), LDA, ONE, A( J+JB, J+JB ),
-     $                        LDA )
-               END IF
-            END IF
-   20    CONTINUE
-      END IF
-      RETURN
-*
-*     End of DGETRF
-*
-      END
--- a/lib/linalg/dgetrf2.cpp
+++ b/lib/linalg/dgetrf2.cpp
@ -0,0 +1,106 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+static integer c__1 = 1;
+static doublereal c_b13 = 1.;
+static doublereal c_b16 = -1.;
+int dgetrf2_(integer *m, integer *n, doublereal *a, integer *lda, integer *ipiv, integer *info)
+{
+    integer a_dim1, a_offset, i__1, i__2;
+    doublereal d__1;
+    integer i__, n1, n2;
+    doublereal temp;
+    extern int dscal_(integer *, doublereal *, doublereal *, integer *),
+        dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
+               integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen,
+               ftnlen);
+    integer iinfo;
+    doublereal sfmin;
+    extern int dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *,
+                      doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen,
+                      ftnlen);
+    extern doublereal dlamch_(char *, ftnlen);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern int xerbla_(char *, integer *, ftnlen),
+        dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *);
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+    *info = 0;
+    if (*m < 0) {
+        *info = -1;
+    } else if (*n < 0) {
+        *info = -2;
+    } else if (*lda < max(1, *m)) {
+        *info = -4;
+    }
+    if (*info != 0) {
+        i__1 = -(*info);
+        xerbla_((char *)"DGETRF2", &i__1, (ftnlen)7);
+        return 0;
+    }
+    if (*m == 0 || *n == 0) {
+        return 0;
+    }
+    if (*m == 1) {
+        ipiv[1] = 1;
+        if (a[a_dim1 + 1] == 0.) {
+            *info = 1;
+        }
+    } else if (*n == 1) {
+        sfmin = dlamch_((char *)"S", (ftnlen)1);
+        i__ = idamax_(m, &a[a_dim1 + 1], &c__1);
+        ipiv[1] = i__;
+        if (a[i__ + a_dim1] != 0.) {
+            if (i__ != 1) {
+                temp = a[a_dim1 + 1];
+                a[a_dim1 + 1] = a[i__ + a_dim1];
+                a[i__ + a_dim1] = temp;
+            }
+            if ((d__1 = a[a_dim1 + 1], abs(d__1)) >= sfmin) {
+                i__1 = *m - 1;
+                d__1 = 1. / a[a_dim1 + 1];
+                dscal_(&i__1, &d__1, &a[a_dim1 + 2], &c__1);
+            } else {
+                i__1 = *m - 1;
+                for (i__ = 1; i__ <= i__1; ++i__) {
+                    a[i__ + 1 + a_dim1] /= a[a_dim1 + 1];
+                }
+            }
+        } else {
+            *info = 1;
+        }
+    } else {
+        n1 = min(*m, *n) / 2;
+        n2 = *n - n1;
+        dgetrf2_(m, &n1, &a[a_offset], lda, &ipiv[1], &iinfo);
+        if (*info == 0 && iinfo > 0) {
+            *info = iinfo;
+        }
+        dlaswp_(&n2, &a[(n1 + 1) * a_dim1 + 1], lda, &c__1, &n1, &ipiv[1], &c__1);
+        dtrsm_((char *)"L", (char *)"L", (char *)"N", (char *)"U", &n1, &n2, &c_b13, &a[a_offset], lda, &a[(n1 + 1) * a_dim1 + 1],
+               lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1);
+        i__1 = *m - n1;
+        dgemm_((char *)"N", (char *)"N", &i__1, &n2, &n1, &c_b16, &a[n1 + 1 + a_dim1], lda,
+               &a[(n1 + 1) * a_dim1 + 1], lda, &c_b13, &a[n1 + 1 + (n1 + 1) * a_dim1], lda,
+               (ftnlen)1, (ftnlen)1);
+        i__1 = *m - n1;
+        dgetrf2_(&i__1, &n2, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, &ipiv[n1 + 1], &iinfo);
+        if (*info == 0 && iinfo > 0) {
+            *info = iinfo + n1;
+        }
+        i__1 = min(*m, *n);
+        for (i__ = n1 + 1; i__ <= i__1; ++i__) {
+            ipiv[i__] += n1;
+        }
+        i__1 = n1 + 1;
+        i__2 = min(*m, *n);
+        dlaswp_(&n1, &a[a_dim1 + 1], lda, &i__1, &i__2, &ipiv[1], &c__1);
+    }
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dgetrf2.f
+++ b/lib/linalg/dgetrf2.f
@ -1,269 +0,0 @@
-*> \brief \b DGETRF2
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       RECURSIVE SUBROUTINE DGETRF2( M, N, A, LDA, IPIV, INFO )
-*
-*       .. Scalar Arguments ..
-*       INTEGER            INFO, LDA, M, N
-*       ..
-*       .. Array Arguments ..
-*       INTEGER            IPIV( * )
-*       DOUBLE PRECISION   A( LDA, * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DGETRF2 computes an LU factorization of a general M-by-N matrix A
-*> using partial pivoting with row interchanges.
-*>
-*> The factorization has the form
-*>    A = P * L * U
-*> where P is a permutation matrix, L is lower triangular with unit
-*> diagonal elements (lower trapezoidal if m > n), and U is upper
-*> triangular (upper trapezoidal if m < n).
-*>
-*> This is the recursive version of the algorithm. It divides
-*> the matrix into four submatrices:
-*>
-*>        [  A11 | A12  ]  where A11 is n1 by n1 and A22 is n2 by n2
-*>    A = [ -----|----- ]  with n1 = min(m,n)/2
-*>        [  A21 | A22  ]       n2 = n-n1
-*>
-*>                                       [ A11 ]
-*> The subroutine calls itself to factor [ --- ],
-*>                                       [ A12 ]
-*>                 [ A12 ]
-*> do the swaps on [ --- ], solve A12, update A22,
-*>                 [ A22 ]
-*>
-*> then calls itself to factor A22 and do the swaps on A21.
-*>
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>          The number of rows of the matrix A.  M >= 0.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The number of columns of the matrix A.  N >= 0.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array, dimension (LDA,N)
-*>          On entry, the M-by-N matrix to be factored.
-*>          On exit, the factors L and U from the factorization
-*>          A = P*L*U; the unit diagonal elements of L are not stored.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(1,M).
-*> \endverbatim
-*>
-*> \param[out] IPIV
-*> \verbatim
-*>          IPIV is INTEGER array, dimension (min(M,N))
-*>          The pivot indices; for 1 <= i <= min(M,N), row i of the
-*>          matrix was interchanged with row IPIV(i).
-*> \endverbatim
-*>
-*> \param[out] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>          = 0:  successful exit
-*>          < 0:  if INFO = -i, the i-th argument had an illegal value
-*>          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
-*>                has been completed, but the factor U is exactly
-*>                singular, and division by zero will occur if it is used
-*>                to solve a system of equations.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup doubleGEcomputational
-*
-*  =====================================================================
-      RECURSIVE SUBROUTINE DGETRF2( M, N, A, LDA, IPIV, INFO )
-*
-*  -- LAPACK computational routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      INTEGER            INFO, LDA, M, N
-*     ..
-*     .. Array Arguments ..
-      INTEGER            IPIV( * )
-      DOUBLE PRECISION   A( LDA, * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION   ONE, ZERO
-      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION   SFMIN, TEMP
-      INTEGER            I, IINFO, N1, N2
-*     ..
-*     .. External Functions ..
-      DOUBLE PRECISION   DLAMCH
-      INTEGER            IDAMAX
-      EXTERNAL           DLAMCH, IDAMAX
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           DGEMM, DSCAL, DLASWP, DTRSM, XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          MAX, MIN
-*     ..
-*     .. Executable Statements ..
-*
-*     Test the input parameters
-*
-      INFO = 0
-      IF( M.LT.0 ) THEN
-         INFO = -1
-      ELSE IF( N.LT.0 ) THEN
-         INFO = -2
-      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
-         INFO = -4
-      END IF
-      IF( INFO.NE.0 ) THEN
-         CALL XERBLA( 'DGETRF2', -INFO )
-         RETURN
-      END IF
-*
-*     Quick return if possible
-*
-      IF( M.EQ.0 .OR. N.EQ.0 )
-     $   RETURN
-
-      IF ( M.EQ.1 ) THEN
-*
-*        Use unblocked code for one row case
-*        Just need to handle IPIV and INFO
-*
-         IPIV( 1 ) = 1
-         IF ( A(1,1).EQ.ZERO )
-     $      INFO = 1
-*
-      ELSE IF( N.EQ.1 ) THEN
-*
-*        Use unblocked code for one column case
-*
-*
-*        Compute machine safe minimum
-*
-         SFMIN = DLAMCH('S')
-*
-*        Find pivot and test for singularity
-*
-         I = IDAMAX( M, A( 1, 1 ), 1 )
-         IPIV( 1 ) = I
-         IF( A( I, 1 ).NE.ZERO ) THEN
-*
-*           Apply the interchange
-*
-            IF( I.NE.1 ) THEN
-               TEMP = A( 1, 1 )
-               A( 1, 1 ) = A( I, 1 )
-               A( I, 1 ) = TEMP
-            END IF
-*
-*           Compute elements 2:M of the column
-*
-            IF( ABS(A( 1, 1 )) .GE. SFMIN ) THEN
-               CALL DSCAL( M-1, ONE / A( 1, 1 ), A( 2, 1 ), 1 )
-            ELSE
-               DO 10 I = 1, M-1
-                  A( 1+I, 1 ) = A( 1+I, 1 ) / A( 1, 1 )
-   10          CONTINUE
-            END IF
-*
-         ELSE
-            INFO = 1
-         END IF
-*
-      ELSE
-*
-*        Use recursive code
-*
-         N1 = MIN( M, N ) / 2
-         N2 = N-N1
-*
-*               [ A11 ]
-*        Factor [ --- ]
-*               [ A21 ]
-*
-         CALL DGETRF2( M, N1, A, LDA, IPIV, IINFO )
-
-         IF ( INFO.EQ.0 .AND. IINFO.GT.0 )
-     $      INFO = IINFO
-*
-*                              [ A12 ]
-*        Apply interchanges to [ --- ]
-*                              [ A22 ]
-*
-         CALL DLASWP( N2, A( 1, N1+1 ), LDA, 1, N1, IPIV, 1 )
-*
-*        Solve A12
-*
-         CALL DTRSM( 'L', 'L', 'N', 'U', N1, N2, ONE, A, LDA,
-     $               A( 1, N1+1 ), LDA )
-*
-*        Update A22
-*
-         CALL DGEMM( 'N', 'N', M-N1, N2, N1, -ONE, A( N1+1, 1 ), LDA,
-     $               A( 1, N1+1 ), LDA, ONE, A( N1+1, N1+1 ), LDA )
-*
-*        Factor A22
-*
-         CALL DGETRF2( M-N1, N2, A( N1+1, N1+1 ), LDA, IPIV( N1+1 ),
-     $                 IINFO )
-*
-*        Adjust INFO and the pivot indices
-*
-         IF ( INFO.EQ.0 .AND. IINFO.GT.0 )
-     $      INFO = IINFO + N1
-         DO 20 I = N1+1, MIN( M, N )
-            IPIV( I ) = IPIV( I ) + N1
-   20    CONTINUE
-*
-*        Apply interchanges to A21
-*
-         CALL DLASWP( N1, A( 1, 1 ), LDA, N1+1, MIN( M, N), IPIV, 1 )
-*
-      END IF
-      RETURN
-*
-*     End of DGETRF2
-*
-      END
--- a/lib/linalg/dgetri.cpp
+++ b/lib/linalg/dgetri.cpp
@ -0,0 +1,125 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__2 = 2;
+static doublereal c_b20 = -1.;
+static doublereal c_b22 = 1.;
+int dgetri_(integer *n, doublereal *a, integer *lda, integer *ipiv, doublereal *work,
+            integer *lwork, integer *info)
+{
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    integer i__, j, jb, nb, jj, jp, nn, iws;
+    extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
+                      integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
+                      ftnlen, ftnlen),
+        dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *,
+               integer *, doublereal *, doublereal *, integer *, ftnlen);
+    integer nbmin;
+    extern int dswap_(integer *, doublereal *, integer *, doublereal *, integer *),
+        dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *,
+               integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen),
+        xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
+                           ftnlen, ftnlen);
+    integer ldwork;
+    extern int dtrtri_(char *, char *, integer *, doublereal *, integer *, integer *, ftnlen,
+                       ftnlen);
+    integer lwkopt;
+    logical lquery;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+    --work;
+    *info = 0;
+    nb = ilaenv_(&c__1, (char *)"DGETRI", (char *)" ", n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+    lwkopt = *n * nb;
+    work[1] = (doublereal)lwkopt;
+    lquery = *lwork == -1;
+    if (*n < 0) {
+        *info = -1;
+    } else if (*lda < max(1, *n)) {
+        *info = -3;
+    } else if (*lwork < max(1, *n) && !lquery) {
+        *info = -6;
+    }
+    if (*info != 0) {
+        i__1 = -(*info);
+        xerbla_((char *)"DGETRI", &i__1, (ftnlen)6);
+        return 0;
+    } else if (lquery) {
+        return 0;
+    }
+    if (*n == 0) {
+        return 0;
+    }
+    dtrtri_((char *)"Upper", (char *)"Non-unit", n, &a[a_offset], lda, info, (ftnlen)5, (ftnlen)8);
+    if (*info > 0) {
+        return 0;
+    }
+    nbmin = 2;
+    ldwork = *n;
+    if (nb > 1 && nb < *n) {
+        i__1 = ldwork * nb;
+        iws = max(i__1, 1);
+        if (*lwork < iws) {
+            nb = *lwork / ldwork;
+            i__1 = 2,
+            i__2 = ilaenv_(&c__2, (char *)"DGETRI", (char *)" ", n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+            nbmin = max(i__1, i__2);
+        }
+    } else {
+        iws = *n;
+    }
+    if (nb < nbmin || nb >= *n) {
+        for (j = *n; j >= 1; --j) {
+            i__1 = *n;
+            for (i__ = j + 1; i__ <= i__1; ++i__) {
+                work[i__] = a[i__ + j * a_dim1];
+                a[i__ + j * a_dim1] = 0.;
+            }
+            if (j < *n) {
+                i__1 = *n - j;
+                dgemv_((char *)"No transpose", n, &i__1, &c_b20, &a[(j + 1) * a_dim1 + 1], lda,
+                       &work[j + 1], &c__1, &c_b22, &a[j * a_dim1 + 1], &c__1, (ftnlen)12);
+            }
+        }
+    } else {
+        nn = (*n - 1) / nb * nb + 1;
+        i__1 = -nb;
+        for (j = nn; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) {
+            i__2 = nb, i__3 = *n - j + 1;
+            jb = min(i__2, i__3);
+            i__2 = j + jb - 1;
+            for (jj = j; jj <= i__2; ++jj) {
+                i__3 = *n;
+                for (i__ = jj + 1; i__ <= i__3; ++i__) {
+                    work[i__ + (jj - j) * ldwork] = a[i__ + jj * a_dim1];
+                    a[i__ + jj * a_dim1] = 0.;
+                }
+            }
+            if (j + jb <= *n) {
+                i__2 = *n - j - jb + 1;
+                dgemm_((char *)"No transpose", (char *)"No transpose", n, &jb, &i__2, &c_b20,
+                       &a[(j + jb) * a_dim1 + 1], lda, &work[j + jb], &ldwork, &c_b22,
+                       &a[j * a_dim1 + 1], lda, (ftnlen)12, (ftnlen)12);
+            }
+            dtrsm_((char *)"Right", (char *)"Lower", (char *)"No transpose", (char *)"Unit", n, &jb, &c_b22, &work[j], &ldwork,
+                   &a[j * a_dim1 + 1], lda, (ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)4);
+        }
+    }
+    for (j = *n - 1; j >= 1; --j) {
+        jp = ipiv[j];
+        if (jp != j) {
+            dswap_(n, &a[j * a_dim1 + 1], &c__1, &a[jp * a_dim1 + 1], &c__1);
+        }
+    }
+    work[1] = (doublereal)iws;
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dgetri.f
+++ b/lib/linalg/dgetri.f
@ -1,258 +0,0 @@
-*> \brief \b DGETRI
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DGETRI + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetri.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetri.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetri.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
-*
-*       .. Scalar Arguments ..
-*       INTEGER            INFO, LDA, LWORK, N
-*       ..
-*       .. Array Arguments ..
-*       INTEGER            IPIV( * )
-*       DOUBLE PRECISION   A( LDA, * ), WORK( * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DGETRI computes the inverse of a matrix using the LU factorization
-*> computed by DGETRF.
-*>
-*> This method inverts U and then computes inv(A) by solving the system
-*> inv(A)*L = inv(U) for inv(A).
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix A.  N >= 0.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array, dimension (LDA,N)
-*>          On entry, the factors L and U from the factorization
-*>          A = P*L*U as computed by DGETRF.
-*>          On exit, if INFO = 0, the inverse of the original matrix A.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(1,N).
-*> \endverbatim
-*>
-*> \param[in] IPIV
-*> \verbatim
-*>          IPIV is INTEGER array, dimension (N)
-*>          The pivot indices from DGETRF; for 1<=i<=N, row i of the
-*>          matrix was interchanged with row IPIV(i).
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
-*>          On exit, if INFO=0, then WORK(1) returns the optimal LWORK.
-*> \endverbatim
-*>
-*> \param[in] LWORK
-*> \verbatim
-*>          LWORK is INTEGER
-*>          The dimension of the array WORK.  LWORK >= max(1,N).
-*>          For optimal performance LWORK >= N*NB, where NB is
-*>          the optimal blocksize returned by ILAENV.
-*>
-*>          If LWORK = -1, then a workspace query is assumed; the routine
-*>          only calculates the optimal size of the WORK array, returns
-*>          this value as the first entry of the WORK array, and no error
-*>          message related to LWORK is issued by XERBLA.
-*> \endverbatim
-*>
-*> \param[out] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>          = 0:  successful exit
-*>          < 0:  if INFO = -i, the i-th argument had an illegal value
-*>          > 0:  if INFO = i, U(i,i) is exactly zero; the matrix is
-*>                singular and its inverse could not be computed.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup doubleGEcomputational
-*
-*  =====================================================================
-      SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
-*
-*  -- LAPACK computational routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      INTEGER            INFO, LDA, LWORK, N
-*     ..
-*     .. Array Arguments ..
-      INTEGER            IPIV( * )
-      DOUBLE PRECISION   A( LDA, * ), WORK( * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION   ZERO, ONE
-      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
-*     ..
-*     .. Local Scalars ..
-      LOGICAL            LQUERY
-      INTEGER            I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB,
-     $                   NBMIN, NN
-*     ..
-*     .. External Functions ..
-      INTEGER            ILAENV
-      EXTERNAL           ILAENV
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           DGEMM, DGEMV, DSWAP, DTRSM, DTRTRI, XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          MAX, MIN
-*     ..
-*     .. Executable Statements ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      NB = ILAENV( 1, 'DGETRI', ' ', N, -1, -1, -1 )
-      LWKOPT = N*NB
-      WORK( 1 ) = LWKOPT
-      LQUERY = ( LWORK.EQ.-1 )
-      IF( N.LT.0 ) THEN
-         INFO = -1
-      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
-         INFO = -3
-      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
-         INFO = -6
-      END IF
-      IF( INFO.NE.0 ) THEN
-         CALL XERBLA( 'DGETRI', -INFO )
-         RETURN
-      ELSE IF( LQUERY ) THEN
-         RETURN
-      END IF
-*
-*     Quick return if possible
-*
-      IF( N.EQ.0 )
-     $   RETURN
-*
-*     Form inv(U).  If INFO > 0 from DTRTRI, then U is singular,
-*     and the inverse is not computed.
-*
-      CALL DTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO )
-      IF( INFO.GT.0 )
-     $   RETURN
-*
-      NBMIN = 2
-      LDWORK = N
-      IF( NB.GT.1 .AND. NB.LT.N ) THEN
-         IWS = MAX( LDWORK*NB, 1 )
-         IF( LWORK.LT.IWS ) THEN
-            NB = LWORK / LDWORK
-            NBMIN = MAX( 2, ILAENV( 2, 'DGETRI', ' ', N, -1, -1, -1 ) )
-         END IF
-      ELSE
-         IWS = N
-      END IF
-*
-*     Solve the equation inv(A)*L = inv(U) for inv(A).
-*
-      IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN
-*
-*        Use unblocked code.
-*
-         DO 20 J = N, 1, -1
-*
-*           Copy current column of L to WORK and replace with zeros.
-*
-            DO 10 I = J + 1, N
-               WORK( I ) = A( I, J )
-               A( I, J ) = ZERO
-   10       CONTINUE
-*
-*           Compute current column of inv(A).
-*
-            IF( J.LT.N )
-     $         CALL DGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ),
-     $                     LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 )
-   20    CONTINUE
-      ELSE
-*
-*        Use blocked code.
-*
-         NN = ( ( N-1 ) / NB )*NB + 1
-         DO 50 J = NN, 1, -NB
-            JB = MIN( NB, N-J+1 )
-*
-*           Copy current block column of L to WORK and replace with
-*           zeros.
-*
-            DO 40 JJ = J, J + JB - 1
-               DO 30 I = JJ + 1, N
-                  WORK( I+( JJ-J )*LDWORK ) = A( I, JJ )
-                  A( I, JJ ) = ZERO
-   30          CONTINUE
-   40       CONTINUE
-*
-*           Compute current block column of inv(A).
-*
-            IF( J+JB.LE.N )
-     $         CALL DGEMM( 'No transpose', 'No transpose', N, JB,
-     $                     N-J-JB+1, -ONE, A( 1, J+JB ), LDA,
-     $                     WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA )
-            CALL DTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB,
-     $                  ONE, WORK( J ), LDWORK, A( 1, J ), LDA )
-   50    CONTINUE
-      END IF
-*
-*     Apply column interchanges.
-*
-      DO 60 J = N - 1, 1, -1
-         JP = IPIV( J )
-         IF( JP.NE.J )
-     $      CALL DSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 )
-   60 CONTINUE
-*
-      WORK( 1 ) = IWS
-      RETURN
-*
-*     End of DGETRI
-*
-      END
--- a/lib/linalg/dgetrs.cpp
+++ b/lib/linalg/dgetrs.cpp
@ -0,0 +1,65 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+static integer c__1 = 1;
+static doublereal c_b12 = 1.;
+static integer c_n1 = -1;
+int dgetrs_(char *trans, integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv,
+            doublereal *b, integer *ldb, integer *info, ftnlen trans_len)
+{
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+    extern logical lsame_(char *, char *, ftnlen, ftnlen);
+    extern int dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *,
+                      doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen,
+                      ftnlen),
+        xerbla_(char *, integer *, ftnlen),
+        dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *);
+    logical notran;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    *info = 0;
+    notran = lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1);
+    if (!notran && !lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1) &&
+        !lsame_(trans, (char *)"C", (ftnlen)1, (ftnlen)1)) {
+        *info = -1;
+    } else if (*n < 0) {
+        *info = -2;
+    } else if (*nrhs < 0) {
+        *info = -3;
+    } else if (*lda < max(1, *n)) {
+        *info = -5;
+    } else if (*ldb < max(1, *n)) {
+        *info = -8;
+    }
+    if (*info != 0) {
+        i__1 = -(*info);
+        xerbla_((char *)"DGETRS", &i__1, (ftnlen)6);
+        return 0;
+    }
+    if (*n == 0 || *nrhs == 0) {
+        return 0;
+    }
+    if (notran) {
+        dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1);
+        dtrsm_((char *)"Left", (char *)"Lower", (char *)"No transpose", (char *)"Unit", n, nrhs, &c_b12, &a[a_offset], lda,
+               &b[b_offset], ldb, (ftnlen)4, (ftnlen)5, (ftnlen)12, (ftnlen)4);
+        dtrsm_((char *)"Left", (char *)"Upper", (char *)"No transpose", (char *)"Non-unit", n, nrhs, &c_b12, &a[a_offset], lda,
+               &b[b_offset], ldb, (ftnlen)4, (ftnlen)5, (ftnlen)12, (ftnlen)8);
+    } else {
+        dtrsm_((char *)"Left", (char *)"Upper", (char *)"Transpose", (char *)"Non-unit", n, nrhs, &c_b12, &a[a_offset], lda,
+               &b[b_offset], ldb, (ftnlen)4, (ftnlen)5, (ftnlen)9, (ftnlen)8);
+        dtrsm_((char *)"Left", (char *)"Lower", (char *)"Transpose", (char *)"Unit", n, nrhs, &c_b12, &a[a_offset], lda,
+               &b[b_offset], ldb, (ftnlen)4, (ftnlen)5, (ftnlen)9, (ftnlen)4);
+        dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1);
+    }
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dgetrs.f
+++ b/lib/linalg/dgetrs.f
@ -1,222 +0,0 @@
-*> \brief \b DGETRS
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DGETRS + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetrs.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetrs.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetrs.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
-*
-*       .. Scalar Arguments ..
-*       CHARACTER          TRANS
-*       INTEGER            INFO, LDA, LDB, N, NRHS
-*       ..
-*       .. Array Arguments ..
-*       INTEGER            IPIV( * )
-*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DGETRS solves a system of linear equations
-*>    A * X = B  or  A**T * X = B
-*> with a general N-by-N matrix A using the LU factorization computed
-*> by DGETRF.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>          Specifies the form of the system of equations:
-*>          = 'N':  A * X = B  (No transpose)
-*>          = 'T':  A**T* X = B  (Transpose)
-*>          = 'C':  A**T* X = B  (Conjugate transpose = Transpose)
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix A.  N >= 0.
-*> \endverbatim
-*>
-*> \param[in] NRHS
-*> \verbatim
-*>          NRHS is INTEGER
-*>          The number of right hand sides, i.e., the number of columns
-*>          of the matrix B.  NRHS >= 0.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array, dimension (LDA,N)
-*>          The factors L and U from the factorization A = P*L*U
-*>          as computed by DGETRF.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(1,N).
-*> \endverbatim
-*>
-*> \param[in] IPIV
-*> \verbatim
-*>          IPIV is INTEGER array, dimension (N)
-*>          The pivot indices from DGETRF; for 1<=i<=N, row i of the
-*>          matrix was interchanged with row IPIV(i).
-*> \endverbatim
-*>
-*> \param[in,out] B
-*> \verbatim
-*>          B is DOUBLE PRECISION array, dimension (LDB,NRHS)
-*>          On entry, the right hand side matrix B.
-*>          On exit, the solution matrix X.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>          The leading dimension of the array B.  LDB >= max(1,N).
-*> \endverbatim
-*>
-*> \param[out] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>          = 0:  successful exit
-*>          < 0:  if INFO = -i, the i-th argument had an illegal value
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup doubleGEcomputational
-*
-*  =====================================================================
-      SUBROUTINE DGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
-*
-*  -- LAPACK computational routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      CHARACTER          TRANS
-      INTEGER            INFO, LDA, LDB, N, NRHS
-*     ..
-*     .. Array Arguments ..
-      INTEGER            IPIV( * )
-      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION   ONE
-      PARAMETER          ( ONE = 1.0D+0 )
-*     ..
-*     .. Local Scalars ..
-      LOGICAL            NOTRAN
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME
-      EXTERNAL           LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           DLASWP, DTRSM, XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          MAX
-*     ..
-*     .. Executable Statements ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      NOTRAN = LSAME( TRANS, 'N' )
-      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
-     $    LSAME( TRANS, 'C' ) ) THEN
-         INFO = -1
-      ELSE IF( N.LT.0 ) THEN
-         INFO = -2
-      ELSE IF( NRHS.LT.0 ) THEN
-         INFO = -3
-      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
-         INFO = -5
-      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
-         INFO = -8
-      END IF
-      IF( INFO.NE.0 ) THEN
-         CALL XERBLA( 'DGETRS', -INFO )
-         RETURN
-      END IF
-*
-*     Quick return if possible
-*
-      IF( N.EQ.0 .OR. NRHS.EQ.0 )
-     $   RETURN
-*
-      IF( NOTRAN ) THEN
-*
-*        Solve A * X = B.
-*
-*        Apply row interchanges to the right hand sides.
-*
-         CALL DLASWP( NRHS, B, LDB, 1, N, IPIV, 1 )
-*
-*        Solve L*X = B, overwriting B with X.
-*
-         CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', N, NRHS,
-     $               ONE, A, LDA, B, LDB )
-*
-*        Solve U*X = B, overwriting B with X.
-*
-         CALL DTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N,
-     $               NRHS, ONE, A, LDA, B, LDB )
-      ELSE
-*
-*        Solve A**T * X = B.
-*
-*        Solve U**T *X = B, overwriting B with X.
-*
-         CALL DTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, NRHS,
-     $               ONE, A, LDA, B, LDB )
-*
-*        Solve L**T *X = B, overwriting B with X.
-*
-         CALL DTRSM( 'Left', 'Lower', 'Transpose', 'Unit', N, NRHS, ONE,
-     $               A, LDA, B, LDB )
-*
-*        Apply row interchanges to the solution vectors.
-*
-         CALL DLASWP( NRHS, B, LDB, 1, N, IPIV, -1 )
-      END IF
-*
-      RETURN
-*
-*     End of DGETRS
-*
-      END
--- a/lib/linalg/disnan.cpp
+++ b/lib/linalg/disnan.cpp
@ -0,0 +1,14 @@
+
+#include <cmath>
+
+extern "C" {
+
+#include "lmp_f2c.h"
+
+logical disnan_(const doublereal *din)
+{
+    if (!din) return TRUE_;
+
+    return std::isnan(*din) ? TRUE_ : FALSE_;
+}
+}
--- a/lib/linalg/disnan.f
+++ b/lib/linalg/disnan.f
@ -1,77 +0,0 @@
-*> \brief \b DISNAN tests input for NaN.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DISNAN + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/disnan.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/disnan.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/disnan.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       LOGICAL FUNCTION DISNAN( DIN )
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION, INTENT(IN) :: DIN
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DISNAN returns .TRUE. if its argument is NaN, and .FALSE.
-*> otherwise.  To be replaced by the Fortran 2003 intrinsic in the
-*> future.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] DIN
-*> \verbatim
-*>          DIN is DOUBLE PRECISION
-*>          Input to test for NaN.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup OTHERauxiliary
-*
-*  =====================================================================
-      LOGICAL FUNCTION DISNAN( DIN )
-*
-*  -- LAPACK auxiliary routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION, INTENT(IN) :: DIN
-*     ..
-*
-*  =====================================================================
-*
-*  .. External Functions ..
-      LOGICAL DLAISNAN
-      EXTERNAL DLAISNAN
-*  ..
-*  .. Executable Statements ..
-      DISNAN = DLAISNAN(DIN,DIN)
-      RETURN
-      END
--- a/lib/linalg/dlabad.cpp
+++ b/lib/linalg/dlabad.cpp
@ -0,0 +1,16 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+int dlabad_(doublereal *small, doublereal *large)
+{
+    double d_lmp_lg10(doublereal *), sqrt(doublereal);
+    if (d_lmp_lg10(large) > 2e3) {
+        *small = sqrt(*small);
+        *large = sqrt(*large);
+    }
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dlabad.f
+++ b/lib/linalg/dlabad.f
@ -1,102 +0,0 @@
-*> \brief \b DLABAD
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DLABAD + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlabad.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlabad.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlabad.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DLABAD( SMALL, LARGE )
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION   LARGE, SMALL
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DLABAD takes as input the values computed by DLAMCH for underflow and
-*> overflow, and returns the square root of each of these values if the
-*> log of LARGE is sufficiently large.  This subroutine is intended to
-*> identify machines with a large exponent range, such as the Crays, and
-*> redefine the underflow and overflow limits to be the square roots of
-*> the values computed by DLAMCH.  This subroutine is needed because
-*> DLAMCH does not compensate for poor arithmetic in the upper half of
-*> the exponent range, as is found on a Cray.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in,out] SMALL
-*> \verbatim
-*>          SMALL is DOUBLE PRECISION
-*>          On entry, the underflow threshold as computed by DLAMCH.
-*>          On exit, if LOG10(LARGE) is sufficiently large, the square
-*>          root of SMALL, otherwise unchanged.
-*> \endverbatim
-*>
-*> \param[in,out] LARGE
-*> \verbatim
-*>          LARGE is DOUBLE PRECISION
-*>          On entry, the overflow threshold as computed by DLAMCH.
-*>          On exit, if LOG10(LARGE) is sufficiently large, the square
-*>          root of LARGE, otherwise unchanged.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup OTHERauxiliary
-*
-*  =====================================================================
-      SUBROUTINE DLABAD( SMALL, LARGE )
-*
-*  -- LAPACK auxiliary routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION   LARGE, SMALL
-*     ..
-*
-*  =====================================================================
-*
-*     .. Intrinsic Functions ..
-      INTRINSIC          LOG10, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-*     If it looks like we're on a Cray, take the square root of
-*     SMALL and LARGE to avoid overflow and underflow problems.
-*
-      IF( LOG10( LARGE ).GT.2000.D0 ) THEN
-         SMALL = SQRT( SMALL )
-         LARGE = SQRT( LARGE )
-      END IF
-*
-      RETURN
-*
-*     End of DLABAD
-*
-      END
--- a/lib/linalg/dlabrd.cpp
+++ b/lib/linalg/dlabrd.cpp
@ -0,0 +1,210 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+static doublereal c_b4 = -1.;
+static doublereal c_b5 = 1.;
+static integer c__1 = 1;
+static doublereal c_b16 = 0.;
+int dlabrd_(integer *m, integer *n, integer *nb, doublereal *a, integer *lda, doublereal *d__,
+            doublereal *e, doublereal *tauq, doublereal *taup, doublereal *x, integer *ldx,
+            doublereal *y, integer *ldy)
+{
+    integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2, i__3;
+    integer i__;
+    extern int dscal_(integer *, doublereal *, doublereal *, integer *),
+        dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *,
+               integer *, doublereal *, doublereal *, integer *, ftnlen),
+        dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *);
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1;
+    x -= x_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1;
+    y -= y_offset;
+    if (*m <= 0 || *n <= 0) {
+        return 0;
+    }
+    if (*m >= *n) {
+        i__1 = *nb;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+            i__2 = *m - i__ + 1;
+            i__3 = i__ - 1;
+            dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &a[i__ + a_dim1], lda, &y[i__ + y_dim1],
+                   ldy, &c_b5, &a[i__ + i__ * a_dim1], &c__1, (ftnlen)12);
+            i__2 = *m - i__ + 1;
+            i__3 = i__ - 1;
+            dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &x[i__ + x_dim1], ldx, &a[i__ * a_dim1 + 1],
+                   &c__1, &c_b5, &a[i__ + i__ * a_dim1], &c__1, (ftnlen)12);
+            i__2 = *m - i__ + 1;
+            i__3 = i__ + 1;
+            dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3, *m) + i__ * a_dim1], &c__1,
+                    &tauq[i__]);
+            d__[i__] = a[i__ + i__ * a_dim1];
+            if (i__ < *n) {
+                a[i__ + i__ * a_dim1] = 1.;
+                i__2 = *m - i__ + 1;
+                i__3 = *n - i__;
+                dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[i__ + (i__ + 1) * a_dim1], lda,
+                       &a[i__ + i__ * a_dim1], &c__1, &c_b16, &y[i__ + 1 + i__ * y_dim1], &c__1,
+                       (ftnlen)9);
+                i__2 = *m - i__ + 1;
+                i__3 = i__ - 1;
+                dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[i__ + a_dim1], lda,
+                       &a[i__ + i__ * a_dim1], &c__1, &c_b16, &y[i__ * y_dim1 + 1], &c__1,
+                       (ftnlen)9);
+                i__2 = *n - i__;
+                i__3 = i__ - 1;
+                dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &y[i__ + 1 + y_dim1], ldy,
+                       &y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[i__ + 1 + i__ * y_dim1], &c__1,
+                       (ftnlen)12);
+                i__2 = *m - i__ + 1;
+                i__3 = i__ - 1;
+                dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &x[i__ + x_dim1], ldx,
+                       &a[i__ + i__ * a_dim1], &c__1, &c_b16, &y[i__ * y_dim1 + 1], &c__1,
+                       (ftnlen)9);
+                i__2 = i__ - 1;
+                i__3 = *n - i__;
+                dgemv_((char *)"Transpose", &i__2, &i__3, &c_b4, &a[(i__ + 1) * a_dim1 + 1], lda,
+                       &y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[i__ + 1 + i__ * y_dim1], &c__1,
+                       (ftnlen)9);
+                i__2 = *n - i__;
+                dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
+                i__2 = *n - i__;
+                dgemv_((char *)"No transpose", &i__2, &i__, &c_b4, &y[i__ + 1 + y_dim1], ldy,
+                       &a[i__ + a_dim1], lda, &c_b5, &a[i__ + (i__ + 1) * a_dim1], lda, (ftnlen)12);
+                i__2 = i__ - 1;
+                i__3 = *n - i__;
+                dgemv_((char *)"Transpose", &i__2, &i__3, &c_b4, &a[(i__ + 1) * a_dim1 + 1], lda,
+                       &x[i__ + x_dim1], ldx, &c_b5, &a[i__ + (i__ + 1) * a_dim1], lda, (ftnlen)9);
+                i__2 = *n - i__;
+                i__3 = i__ + 2;
+                dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min(i__3, *n) * a_dim1], lda,
+                        &taup[i__]);
+                e[i__] = a[i__ + (i__ + 1) * a_dim1];
+                a[i__ + (i__ + 1) * a_dim1] = 1.;
+                i__2 = *m - i__;
+                i__3 = *n - i__;
+                dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + (i__ + 1) * a_dim1], lda,
+                       &a[i__ + (i__ + 1) * a_dim1], lda, &c_b16, &x[i__ + 1 + i__ * x_dim1], &c__1,
+                       (ftnlen)12);
+                i__2 = *n - i__;
+                dgemv_((char *)"Transpose", &i__2, &i__, &c_b5, &y[i__ + 1 + y_dim1], ldy,
+                       &a[i__ + (i__ + 1) * a_dim1], lda, &c_b16, &x[i__ * x_dim1 + 1], &c__1,
+                       (ftnlen)9);
+                i__2 = *m - i__;
+                dgemv_((char *)"No transpose", &i__2, &i__, &c_b4, &a[i__ + 1 + a_dim1], lda,
+                       &x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[i__ + 1 + i__ * x_dim1], &c__1,
+                       (ftnlen)12);
+                i__2 = i__ - 1;
+                i__3 = *n - i__;
+                dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &a[(i__ + 1) * a_dim1 + 1], lda,
+                       &a[i__ + (i__ + 1) * a_dim1], lda, &c_b16, &x[i__ * x_dim1 + 1], &c__1,
+                       (ftnlen)12);
+                i__2 = *m - i__;
+                i__3 = i__ - 1;
+                dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &x[i__ + 1 + x_dim1], ldx,
+                       &x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[i__ + 1 + i__ * x_dim1], &c__1,
+                       (ftnlen)12);
+                i__2 = *m - i__;
+                dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
+            }
+        }
+    } else {
+        i__1 = *nb;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+            i__2 = *n - i__ + 1;
+            i__3 = i__ - 1;
+            dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &y[i__ + y_dim1], ldy, &a[i__ + a_dim1],
+                   lda, &c_b5, &a[i__ + i__ * a_dim1], lda, (ftnlen)12);
+            i__2 = i__ - 1;
+            i__3 = *n - i__ + 1;
+            dgemv_((char *)"Transpose", &i__2, &i__3, &c_b4, &a[i__ * a_dim1 + 1], lda, &x[i__ + x_dim1],
+                   ldx, &c_b5, &a[i__ + i__ * a_dim1], lda, (ftnlen)9);
+            i__2 = *n - i__ + 1;
+            i__3 = i__ + 1;
+            dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3, *n) * a_dim1], lda,
+                    &taup[i__]);
+            d__[i__] = a[i__ + i__ * a_dim1];
+            if (i__ < *m) {
+                a[i__ + i__ * a_dim1] = 1.;
+                i__2 = *m - i__;
+                i__3 = *n - i__ + 1;
+                dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + i__ * a_dim1], lda,
+                       &a[i__ + i__ * a_dim1], lda, &c_b16, &x[i__ + 1 + i__ * x_dim1], &c__1,
+                       (ftnlen)12);
+                i__2 = *n - i__ + 1;
+                i__3 = i__ - 1;
+                dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &y[i__ + y_dim1], ldy,
+                       &a[i__ + i__ * a_dim1], lda, &c_b16, &x[i__ * x_dim1 + 1], &c__1, (ftnlen)9);
+                i__2 = *m - i__;
+                i__3 = i__ - 1;
+                dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &a[i__ + 1 + a_dim1], lda,
+                       &x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[i__ + 1 + i__ * x_dim1], &c__1,
+                       (ftnlen)12);
+                i__2 = i__ - 1;
+                i__3 = *n - i__ + 1;
+                dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &a[i__ * a_dim1 + 1], lda,
+                       &a[i__ + i__ * a_dim1], lda, &c_b16, &x[i__ * x_dim1 + 1], &c__1,
+                       (ftnlen)12);
+                i__2 = *m - i__;
+                i__3 = i__ - 1;
+                dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &x[i__ + 1 + x_dim1], ldx,
+                       &x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[i__ + 1 + i__ * x_dim1], &c__1,
+                       (ftnlen)12);
+                i__2 = *m - i__;
+                dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
+                i__2 = *m - i__;
+                i__3 = i__ - 1;
+                dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &a[i__ + 1 + a_dim1], lda,
+                       &y[i__ + y_dim1], ldy, &c_b5, &a[i__ + 1 + i__ * a_dim1], &c__1, (ftnlen)12);
+                i__2 = *m - i__;
+                dgemv_((char *)"No transpose", &i__2, &i__, &c_b4, &x[i__ + 1 + x_dim1], ldx,
+                       &a[i__ * a_dim1 + 1], &c__1, &c_b5, &a[i__ + 1 + i__ * a_dim1], &c__1,
+                       (ftnlen)12);
+                i__2 = *m - i__;
+                i__3 = i__ + 2;
+                dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3, *m) + i__ * a_dim1], &c__1,
+                        &tauq[i__]);
+                e[i__] = a[i__ + 1 + i__ * a_dim1];
+                a[i__ + 1 + i__ * a_dim1] = 1.;
+                i__2 = *m - i__;
+                i__3 = *n - i__;
+                dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + (i__ + 1) * a_dim1], lda,
+                       &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &y[i__ + 1 + i__ * y_dim1], &c__1,
+                       (ftnlen)9);
+                i__2 = *m - i__;
+                i__3 = i__ - 1;
+                dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + a_dim1], lda,
+                       &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &y[i__ * y_dim1 + 1], &c__1,
+                       (ftnlen)9);
+                i__2 = *n - i__;
+                i__3 = i__ - 1;
+                dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &y[i__ + 1 + y_dim1], ldy,
+                       &y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[i__ + 1 + i__ * y_dim1], &c__1,
+                       (ftnlen)12);
+                i__2 = *m - i__;
+                dgemv_((char *)"Transpose", &i__2, &i__, &c_b5, &x[i__ + 1 + x_dim1], ldx,
+                       &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &y[i__ * y_dim1 + 1], &c__1,
+                       (ftnlen)9);
+                i__2 = *n - i__;
+                dgemv_((char *)"Transpose", &i__, &i__2, &c_b4, &a[(i__ + 1) * a_dim1 + 1], lda,
+                       &y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[i__ + 1 + i__ * y_dim1], &c__1,
+                       (ftnlen)9);
+                i__2 = *n - i__;
+                dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
+            }
+        }
+    }
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dlabrd.f
+++ b/lib/linalg/dlabrd.f
@ -1,378 +0,0 @@
-*> \brief \b DLABRD reduces the first nb rows and columns of a general matrix to a bidiagonal form.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DLABRD + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlabrd.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlabrd.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlabrd.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y,
-*                          LDY )
-*
-*       .. Scalar Arguments ..
-*       INTEGER            LDA, LDX, LDY, M, N, NB
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAUP( * ),
-*      $                   TAUQ( * ), X( LDX, * ), Y( LDY, * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DLABRD reduces the first NB rows and columns of a real general
-*> m by n matrix A to upper or lower bidiagonal form by an orthogonal
-*> transformation Q**T * A * P, and returns the matrices X and Y which
-*> are needed to apply the transformation to the unreduced part of A.
-*>
-*> If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower
-*> bidiagonal form.
-*>
-*> This is an auxiliary routine called by DGEBRD
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>          The number of rows in the matrix A.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The number of columns in the matrix A.
-*> \endverbatim
-*>
-*> \param[in] NB
-*> \verbatim
-*>          NB is INTEGER
-*>          The number of leading rows and columns of A to be reduced.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array, dimension (LDA,N)
-*>          On entry, the m by n general matrix to be reduced.
-*>          On exit, the first NB rows and columns of the matrix are
-*>          overwritten; the rest of the array is unchanged.
-*>          If m >= n, elements on and below the diagonal in the first NB
-*>            columns, with the array TAUQ, represent the orthogonal
-*>            matrix Q as a product of elementary reflectors; and
-*>            elements above the diagonal in the first NB rows, with the
-*>            array TAUP, represent the orthogonal matrix P as a product
-*>            of elementary reflectors.
-*>          If m < n, elements below the diagonal in the first NB
-*>            columns, with the array TAUQ, represent the orthogonal
-*>            matrix Q as a product of elementary reflectors, and
-*>            elements on and above the diagonal in the first NB rows,
-*>            with the array TAUP, represent the orthogonal matrix P as
-*>            a product of elementary reflectors.
-*>          See Further Details.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(1,M).
-*> \endverbatim
-*>
-*> \param[out] D
-*> \verbatim
-*>          D is DOUBLE PRECISION array, dimension (NB)
-*>          The diagonal elements of the first NB rows and columns of
-*>          the reduced matrix.  D(i) = A(i,i).
-*> \endverbatim
-*>
-*> \param[out] E
-*> \verbatim
-*>          E is DOUBLE PRECISION array, dimension (NB)
-*>          The off-diagonal elements of the first NB rows and columns of
-*>          the reduced matrix.
-*> \endverbatim
-*>
-*> \param[out] TAUQ
-*> \verbatim
-*>          TAUQ is DOUBLE PRECISION array, dimension (NB)
-*>          The scalar factors of the elementary reflectors which
-*>          represent the orthogonal matrix Q. See Further Details.
-*> \endverbatim
-*>
-*> \param[out] TAUP
-*> \verbatim
-*>          TAUP is DOUBLE PRECISION array, dimension (NB)
-*>          The scalar factors of the elementary reflectors which
-*>          represent the orthogonal matrix P. See Further Details.
-*> \endverbatim
-*>
-*> \param[out] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array, dimension (LDX,NB)
-*>          The m-by-nb matrix X required to update the unreduced part
-*>          of A.
-*> \endverbatim
-*>
-*> \param[in] LDX
-*> \verbatim
-*>          LDX is INTEGER
-*>          The leading dimension of the array X. LDX >= max(1,M).
-*> \endverbatim
-*>
-*> \param[out] Y
-*> \verbatim
-*>          Y is DOUBLE PRECISION array, dimension (LDY,NB)
-*>          The n-by-nb matrix Y required to update the unreduced part
-*>          of A.
-*> \endverbatim
-*>
-*> \param[in] LDY
-*> \verbatim
-*>          LDY is INTEGER
-*>          The leading dimension of the array Y. LDY >= max(1,N).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup doubleOTHERauxiliary
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  The matrices Q and P are represented as products of elementary
-*>  reflectors:
-*>
-*>     Q = H(1) H(2) . . . H(nb)  and  P = G(1) G(2) . . . G(nb)
-*>
-*>  Each H(i) and G(i) has the form:
-*>
-*>     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T
-*>
-*>  where tauq and taup are real scalars, and v and u are real vectors.
-*>
-*>  If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in
-*>  A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in
-*>  A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
-*>
-*>  If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in
-*>  A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in
-*>  A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
-*>
-*>  The elements of the vectors v and u together form the m-by-nb matrix
-*>  V and the nb-by-n matrix U**T which are needed, with X and Y, to apply
-*>  the transformation to the unreduced part of the matrix, using a block
-*>  update of the form:  A := A - V*Y**T - X*U**T.
-*>
-*>  The contents of A on exit are illustrated by the following examples
-*>  with nb = 2:
-*>
-*>  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
-*>
-*>    (  1   1   u1  u1  u1 )           (  1   u1  u1  u1  u1  u1 )
-*>    (  v1  1   1   u2  u2 )           (  1   1   u2  u2  u2  u2 )
-*>    (  v1  v2  a   a   a  )           (  v1  1   a   a   a   a  )
-*>    (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
-*>    (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
-*>    (  v1  v2  a   a   a  )
-*>
-*>  where a denotes an element of the original matrix which is unchanged,
-*>  vi denotes an element of the vector defining H(i), and ui an element
-*>  of the vector defining G(i).
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y,
-     $                   LDY )
-*
-*  -- LAPACK auxiliary routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      INTEGER            LDA, LDX, LDY, M, N, NB
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAUP( * ),
-     $                   TAUQ( * ), X( LDX, * ), Y( LDY, * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION   ZERO, ONE
-      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           DGEMV, DLARFG, DSCAL
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          MIN
-*     ..
-*     .. Executable Statements ..
-*
-*     Quick return if possible
-*
-      IF( M.LE.0 .OR. N.LE.0 )
-     $   RETURN
-*
-      IF( M.GE.N ) THEN
-*
-*        Reduce to upper bidiagonal form
-*
-         DO 10 I = 1, NB
-*
-*           Update A(i:m,i)
-*
-            CALL DGEMV( 'No transpose', M-I+1, I-1, -ONE, A( I, 1 ),
-     $                  LDA, Y( I, 1 ), LDY, ONE, A( I, I ), 1 )
-            CALL DGEMV( 'No transpose', M-I+1, I-1, -ONE, X( I, 1 ),
-     $                  LDX, A( 1, I ), 1, ONE, A( I, I ), 1 )
-*
-*           Generate reflection Q(i) to annihilate A(i+1:m,i)
-*
-            CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1,
-     $                   TAUQ( I ) )
-            D( I ) = A( I, I )
-            IF( I.LT.N ) THEN
-               A( I, I ) = ONE
-*
-*              Compute Y(i+1:n,i)
-*
-               CALL DGEMV( 'Transpose', M-I+1, N-I, ONE, A( I, I+1 ),
-     $                     LDA, A( I, I ), 1, ZERO, Y( I+1, I ), 1 )
-               CALL DGEMV( 'Transpose', M-I+1, I-1, ONE, A( I, 1 ), LDA,
-     $                     A( I, I ), 1, ZERO, Y( 1, I ), 1 )
-               CALL DGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ),
-     $                     LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
-               CALL DGEMV( 'Transpose', M-I+1, I-1, ONE, X( I, 1 ), LDX,
-     $                     A( I, I ), 1, ZERO, Y( 1, I ), 1 )
-               CALL DGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ),
-     $                     LDA, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
-               CALL DSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 )
-*
-*              Update A(i,i+1:n)
-*
-               CALL DGEMV( 'No transpose', N-I, I, -ONE, Y( I+1, 1 ),
-     $                     LDY, A( I, 1 ), LDA, ONE, A( I, I+1 ), LDA )
-               CALL DGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ),
-     $                     LDA, X( I, 1 ), LDX, ONE, A( I, I+1 ), LDA )
-*
-*              Generate reflection P(i) to annihilate A(i,i+2:n)
-*
-               CALL DLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ),
-     $                      LDA, TAUP( I ) )
-               E( I ) = A( I, I+1 )
-               A( I, I+1 ) = ONE
-*
-*              Compute X(i+1:m,i)
-*
-               CALL DGEMV( 'No transpose', M-I, N-I, ONE, A( I+1, I+1 ),
-     $                     LDA, A( I, I+1 ), LDA, ZERO, X( I+1, I ), 1 )
-               CALL DGEMV( 'Transpose', N-I, I, ONE, Y( I+1, 1 ), LDY,
-     $                     A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 )
-               CALL DGEMV( 'No transpose', M-I, I, -ONE, A( I+1, 1 ),
-     $                     LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
-               CALL DGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ),
-     $                     LDA, A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 )
-               CALL DGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ),
-     $                     LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
-               CALL DSCAL( M-I, TAUP( I ), X( I+1, I ), 1 )
-            END IF
-   10    CONTINUE
-      ELSE
-*
-*        Reduce to lower bidiagonal form
-*
-         DO 20 I = 1, NB
-*
-*           Update A(i,i:n)
-*
-            CALL DGEMV( 'No transpose', N-I+1, I-1, -ONE, Y( I, 1 ),
-     $                  LDY, A( I, 1 ), LDA, ONE, A( I, I ), LDA )
-            CALL DGEMV( 'Transpose', I-1, N-I+1, -ONE, A( 1, I ), LDA,
-     $                  X( I, 1 ), LDX, ONE, A( I, I ), LDA )
-*
-*           Generate reflection P(i) to annihilate A(i,i+1:n)
-*
-            CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA,
-     $                   TAUP( I ) )
-            D( I ) = A( I, I )
-            IF( I.LT.M ) THEN
-               A( I, I ) = ONE
-*
-*              Compute X(i+1:m,i)
-*
-               CALL DGEMV( 'No transpose', M-I, N-I+1, ONE, A( I+1, I ),
-     $                     LDA, A( I, I ), LDA, ZERO, X( I+1, I ), 1 )
-               CALL DGEMV( 'Transpose', N-I+1, I-1, ONE, Y( I, 1 ), LDY,
-     $                     A( I, I ), LDA, ZERO, X( 1, I ), 1 )
-               CALL DGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ),
-     $                     LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
-               CALL DGEMV( 'No transpose', I-1, N-I+1, ONE, A( 1, I ),
-     $                     LDA, A( I, I ), LDA, ZERO, X( 1, I ), 1 )
-               CALL DGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ),
-     $                     LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
-               CALL DSCAL( M-I, TAUP( I ), X( I+1, I ), 1 )
-*
-*              Update A(i+1:m,i)
-*
-               CALL DGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ),
-     $                     LDA, Y( I, 1 ), LDY, ONE, A( I+1, I ), 1 )
-               CALL DGEMV( 'No transpose', M-I, I, -ONE, X( I+1, 1 ),
-     $                     LDX, A( 1, I ), 1, ONE, A( I+1, I ), 1 )
-*
-*              Generate reflection Q(i) to annihilate A(i+2:m,i)
-*
-               CALL DLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1,
-     $                      TAUQ( I ) )
-               E( I ) = A( I+1, I )
-               A( I+1, I ) = ONE
-*
-*              Compute Y(i+1:n,i)
-*
-               CALL DGEMV( 'Transpose', M-I, N-I, ONE, A( I+1, I+1 ),
-     $                     LDA, A( I+1, I ), 1, ZERO, Y( I+1, I ), 1 )
-               CALL DGEMV( 'Transpose', M-I, I-1, ONE, A( I+1, 1 ), LDA,
-     $                     A( I+1, I ), 1, ZERO, Y( 1, I ), 1 )
-               CALL DGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ),
-     $                     LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
-               CALL DGEMV( 'Transpose', M-I, I, ONE, X( I+1, 1 ), LDX,
-     $                     A( I+1, I ), 1, ZERO, Y( 1, I ), 1 )
-               CALL DGEMV( 'Transpose', I, N-I, -ONE, A( 1, I+1 ), LDA,
-     $                     Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
-               CALL DSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 )
-            END IF
-   20    CONTINUE
-      END IF
-      RETURN
-*
-*     End of DLABRD
-*
-      END
--- a/lib/linalg/dlacn2.cpp
+++ b/lib/linalg/dlacn2.cpp
@ -0,0 +1,136 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+static integer c__1 = 1;
+int dlacn2_(integer *n, doublereal *v, doublereal *x, integer *isgn, doublereal *est, integer *kase,
+            integer *isave)
+{
+    integer i__1;
+    doublereal d__1;
+    integer i_lmp_dnnt(doublereal *);
+    integer i__;
+    doublereal xs, temp;
+    extern doublereal dasum_(integer *, doublereal *, integer *);
+    integer jlast;
+    extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    doublereal altsgn, estold;
+    --isave;
+    --isgn;
+    --x;
+    --v;
+    if (*kase == 0) {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+            x[i__] = 1. / (doublereal)(*n);
+        }
+        *kase = 1;
+        isave[1] = 1;
+        return 0;
+    }
+    switch (isave[1]) {
+        case 1:
+            goto L20;
+        case 2:
+            goto L40;
+        case 3:
+            goto L70;
+        case 4:
+            goto L110;
+        case 5:
+            goto L140;
+    }
+L20:
+    if (*n == 1) {
+        v[1] = x[1];
+        *est = abs(v[1]);
+        goto L150;
+    }
+    *est = dasum_(n, &x[1], &c__1);
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+        if (x[i__] >= 0.) {
+            x[i__] = 1.;
+        } else {
+            x[i__] = -1.;
+        }
+        isgn[i__] = i_lmp_dnnt(&x[i__]);
+    }
+    *kase = 2;
+    isave[1] = 2;
+    return 0;
+L40:
+    isave[2] = idamax_(n, &x[1], &c__1);
+    isave[3] = 2;
+L50:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+        x[i__] = 0.;
+    }
+    x[isave[2]] = 1.;
+    *kase = 1;
+    isave[1] = 3;
+    return 0;
+L70:
+    dcopy_(n, &x[1], &c__1, &v[1], &c__1);
+    estold = *est;
+    *est = dasum_(n, &v[1], &c__1);
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+        if (x[i__] >= 0.) {
+            xs = 1.;
+        } else {
+            xs = -1.;
+        }
+        if (i_lmp_dnnt(&xs) != isgn[i__]) {
+            goto L90;
+        }
+    }
+    goto L120;
+L90:
+    if (*est <= estold) {
+        goto L120;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+        if (x[i__] >= 0.) {
+            x[i__] = 1.;
+        } else {
+            x[i__] = -1.;
+        }
+        isgn[i__] = i_lmp_dnnt(&x[i__]);
+    }
+    *kase = 2;
+    isave[1] = 4;
+    return 0;
+L110:
+    jlast = isave[2];
+    isave[2] = idamax_(n, &x[1], &c__1);
+    if (x[jlast] != (d__1 = x[isave[2]], abs(d__1)) && isave[3] < 5) {
+        ++isave[3];
+        goto L50;
+    }
+L120:
+    altsgn = 1.;
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+        x[i__] = altsgn * ((doublereal)(i__ - 1) / (doublereal)(*n - 1) + 1.);
+        altsgn = -altsgn;
+    }
+    *kase = 1;
+    isave[1] = 5;
+    return 0;
+L140:
+    temp = dasum_(n, &x[1], &c__1) / (doublereal)(*n * 3) * 2.;
+    if (temp > *est) {
+        dcopy_(n, &x[1], &c__1, &v[1], &c__1);
+        *est = temp;
+    }
+L150:
+    *kase = 0;
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dlacn2.f
+++ b/lib/linalg/dlacn2.f
@ -1,304 +0,0 @@
-*> \brief \b DLACN2 estimates the 1-norm of a square matrix, using reverse communication for evaluating matrix-vector products.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DLACN2 + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlacn2.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlacn2.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlacn2.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DLACN2( N, V, X, ISGN, EST, KASE, ISAVE )
-*
-*       .. Scalar Arguments ..
-*       INTEGER            KASE, N
-*       DOUBLE PRECISION   EST
-*       ..
-*       .. Array Arguments ..
-*       INTEGER            ISGN( * ), ISAVE( 3 )
-*       DOUBLE PRECISION   V( * ), X( * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DLACN2 estimates the 1-norm of a square, real matrix A.
-*> Reverse communication is used for evaluating matrix-vector products.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>         The order of the matrix.  N >= 1.
-*> \endverbatim
-*>
-*> \param[out] V
-*> \verbatim
-*>          V is DOUBLE PRECISION array, dimension (N)
-*>         On the final return, V = A*W,  where  EST = norm(V)/norm(W)
-*>         (W is not returned).
-*> \endverbatim
-*>
-*> \param[in,out] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array, dimension (N)
-*>         On an intermediate return, X should be overwritten by
-*>               A * X,   if KASE=1,
-*>               A**T * X,  if KASE=2,
-*>         and DLACN2 must be re-called with all the other parameters
-*>         unchanged.
-*> \endverbatim
-*>
-*> \param[out] ISGN
-*> \verbatim
-*>          ISGN is INTEGER array, dimension (N)
-*> \endverbatim
-*>
-*> \param[in,out] EST
-*> \verbatim
-*>          EST is DOUBLE PRECISION
-*>         On entry with KASE = 1 or 2 and ISAVE(1) = 3, EST should be
-*>         unchanged from the previous call to DLACN2.
-*>         On exit, EST is an estimate (a lower bound) for norm(A).
-*> \endverbatim
-*>
-*> \param[in,out] KASE
-*> \verbatim
-*>          KASE is INTEGER
-*>         On the initial call to DLACN2, KASE should be 0.
-*>         On an intermediate return, KASE will be 1 or 2, indicating
-*>         whether X should be overwritten by A * X  or A**T * X.
-*>         On the final return from DLACN2, KASE will again be 0.
-*> \endverbatim
-*>
-*> \param[in,out] ISAVE
-*> \verbatim
-*>          ISAVE is INTEGER array, dimension (3)
-*>         ISAVE is used to save variables between calls to DLACN2
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup doubleOTHERauxiliary
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Originally named SONEST, dated March 16, 1988.
-*>
-*>  This is a thread safe version of DLACON, which uses the array ISAVE
-*>  in place of a SAVE statement, as follows:
-*>
-*>     DLACON     DLACN2
-*>      JUMP     ISAVE(1)
-*>      J        ISAVE(2)
-*>      ITER     ISAVE(3)
-*> \endverbatim
-*
-*> \par Contributors:
-*  ==================
-*>
-*>     Nick Higham, University of Manchester
-*
-*> \par References:
-*  ================
-*>
-*>  N.J. Higham, "FORTRAN codes for estimating the one-norm of
-*>  a real or complex matrix, with applications to condition estimation",
-*>  ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.
-*>
-*  =====================================================================
-      SUBROUTINE DLACN2( N, V, X, ISGN, EST, KASE, ISAVE )
-*
-*  -- LAPACK auxiliary routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      INTEGER            KASE, N
-      DOUBLE PRECISION   EST
-*     ..
-*     .. Array Arguments ..
-      INTEGER            ISGN( * ), ISAVE( 3 )
-      DOUBLE PRECISION   V( * ), X( * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      INTEGER            ITMAX
-      PARAMETER          ( ITMAX = 5 )
-      DOUBLE PRECISION   ZERO, ONE, TWO
-      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I, JLAST
-      DOUBLE PRECISION   ALTSGN, ESTOLD, TEMP, XS
-*     ..
-*     .. External Functions ..
-      INTEGER            IDAMAX
-      DOUBLE PRECISION   DASUM
-      EXTERNAL           IDAMAX, DASUM
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           DCOPY
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, DBLE, NINT
-*     ..
-*     .. Executable Statements ..
-*
-      IF( KASE.EQ.0 ) THEN
-         DO 10 I = 1, N
-            X( I ) = ONE / DBLE( N )
-   10    CONTINUE
-         KASE = 1
-         ISAVE( 1 ) = 1
-         RETURN
-      END IF
-*
-      GO TO ( 20, 40, 70, 110, 140 )ISAVE( 1 )
-*
-*     ................ ENTRY   (ISAVE( 1 ) = 1)
-*     FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY A*X.
-*
-   20 CONTINUE
-      IF( N.EQ.1 ) THEN
-         V( 1 ) = X( 1 )
-         EST = ABS( V( 1 ) )
-*        ... QUIT
-         GO TO 150
-      END IF
-      EST = DASUM( N, X, 1 )
-*
-      DO 30 I = 1, N
-         IF( X(I).GE.ZERO ) THEN
-            X(I) = ONE
-         ELSE
-            X(I) = -ONE
-         END IF
-         ISGN( I ) = NINT( X( I ) )
-   30 CONTINUE
-      KASE = 2
-      ISAVE( 1 ) = 2
-      RETURN
-*
-*     ................ ENTRY   (ISAVE( 1 ) = 2)
-*     FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X.
-*
-   40 CONTINUE
-      ISAVE( 2 ) = IDAMAX( N, X, 1 )
-      ISAVE( 3 ) = 2
-*
-*     MAIN LOOP - ITERATIONS 2,3,...,ITMAX.
-*
-   50 CONTINUE
-      DO 60 I = 1, N
-         X( I ) = ZERO
-   60 CONTINUE
-      X( ISAVE( 2 ) ) = ONE
-      KASE = 1
-      ISAVE( 1 ) = 3
-      RETURN
-*
-*     ................ ENTRY   (ISAVE( 1 ) = 3)
-*     X HAS BEEN OVERWRITTEN BY A*X.
-*
-   70 CONTINUE
-      CALL DCOPY( N, X, 1, V, 1 )
-      ESTOLD = EST
-      EST = DASUM( N, V, 1 )
-      DO 80 I = 1, N
-         IF( X(I).GE.ZERO ) THEN
-            XS = ONE
-         ELSE
-            XS = -ONE
-         END IF
-         IF( NINT( XS ).NE.ISGN( I ) )
-     $      GO TO 90
-   80 CONTINUE
-*     REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED.
-      GO TO 120
-*
-   90 CONTINUE
-*     TEST FOR CYCLING.
-      IF( EST.LE.ESTOLD )
-     $   GO TO 120
-*
-      DO 100 I = 1, N
-         IF( X(I).GE.ZERO ) THEN
-            X(I) = ONE
-         ELSE
-            X(I) = -ONE
-         END IF
-         ISGN( I ) = NINT( X( I ) )
-  100 CONTINUE
-      KASE = 2
-      ISAVE( 1 ) = 4
-      RETURN
-*
-*     ................ ENTRY   (ISAVE( 1 ) = 4)
-*     X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X.
-*
-  110 CONTINUE
-      JLAST = ISAVE( 2 )
-      ISAVE( 2 ) = IDAMAX( N, X, 1 )
-      IF( ( X( JLAST ).NE.ABS( X( ISAVE( 2 ) ) ) ) .AND.
-     $    ( ISAVE( 3 ).LT.ITMAX ) ) THEN
-         ISAVE( 3 ) = ISAVE( 3 ) + 1
-         GO TO 50
-      END IF
-*
-*     ITERATION COMPLETE.  FINAL STAGE.
-*
-  120 CONTINUE
-      ALTSGN = ONE
-      DO 130 I = 1, N
-         X( I ) = ALTSGN*( ONE+DBLE( I-1 ) / DBLE( N-1 ) )
-         ALTSGN = -ALTSGN
-  130 CONTINUE
-      KASE = 1
-      ISAVE( 1 ) = 5
-      RETURN
-*
-*     ................ ENTRY   (ISAVE( 1 ) = 5)
-*     X HAS BEEN OVERWRITTEN BY A*X.
-*
-  140 CONTINUE
-      TEMP = TWO*( DASUM( N, X, 1 ) / DBLE( 3*N ) )
-      IF( TEMP.GT.EST ) THEN
-         CALL DCOPY( N, X, 1, V, 1 )
-         EST = TEMP
-      END IF
-*
-  150 CONTINUE
-      KASE = 0
-      RETURN
-*
-*     End of DLACN2
-*
-      END
--- a/lib/linalg/dlacpy.cpp
+++ b/lib/linalg/dlacpy.cpp
@ -0,0 +1,46 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+int dlacpy_(char *uplo, integer *m, integer *n, doublereal *a, integer *lda, doublereal *b,
+            integer *ldb, ftnlen uplo_len)
+{
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
+    integer i__, j;
+    extern logical lsame_(char *, char *, ftnlen, ftnlen);
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) {
+        i__1 = *n;
+        for (j = 1; j <= i__1; ++j) {
+            i__2 = min(j, *m);
+            for (i__ = 1; i__ <= i__2; ++i__) {
+                b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
+            }
+        }
+    } else if (lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) {
+        i__1 = *n;
+        for (j = 1; j <= i__1; ++j) {
+            i__2 = *m;
+            for (i__ = j; i__ <= i__2; ++i__) {
+                b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
+            }
+        }
+    } else {
+        i__1 = *n;
+        for (j = 1; j <= i__1; ++j) {
+            i__2 = *m;
+            for (i__ = 1; i__ <= i__2; ++i__) {
+                b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
+            }
+        }
+    }
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dlacpy.f
+++ b/lib/linalg/dlacpy.f
@ -1,153 +0,0 @@
-*> \brief \b DLACPY copies all or part of one two-dimensional array to another.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DLACPY + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlacpy.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlacpy.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlacpy.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DLACPY( UPLO, M, N, A, LDA, B, LDB )
-*
-*       .. Scalar Arguments ..
-*       CHARACTER          UPLO
-*       INTEGER            LDA, LDB, M, N
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DLACPY copies all or part of a two-dimensional matrix A to another
-*> matrix B.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>          Specifies the part of the matrix A to be copied to B.
-*>          = 'U':      Upper triangular part
-*>          = 'L':      Lower triangular part
-*>          Otherwise:  All of the matrix A
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>          The number of rows of the matrix A.  M >= 0.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The number of columns of the matrix A.  N >= 0.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array, dimension (LDA,N)
-*>          The m by n matrix A.  If UPLO = 'U', only the upper triangle
-*>          or trapezoid is accessed; if UPLO = 'L', only the lower
-*>          triangle or trapezoid is accessed.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(1,M).
-*> \endverbatim
-*>
-*> \param[out] B
-*> \verbatim
-*>          B is DOUBLE PRECISION array, dimension (LDB,N)
-*>          On exit, B = A in the locations specified by UPLO.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>          The leading dimension of the array B.  LDB >= max(1,M).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup OTHERauxiliary
-*
-*  =====================================================================
-      SUBROUTINE DLACPY( UPLO, M, N, A, LDA, B, LDB )
-*
-*  -- LAPACK auxiliary routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      CHARACTER          UPLO
-      INTEGER            LDA, LDB, M, N
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      INTEGER            I, J
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME
-      EXTERNAL           LSAME
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          MIN
-*     ..
-*     .. Executable Statements ..
-*
-      IF( LSAME( UPLO, 'U' ) ) THEN
-         DO 20 J = 1, N
-            DO 10 I = 1, MIN( J, M )
-               B( I, J ) = A( I, J )
-   10       CONTINUE
-   20    CONTINUE
-      ELSE IF( LSAME( UPLO, 'L' ) ) THEN
-         DO 40 J = 1, N
-            DO 30 I = J, M
-               B( I, J ) = A( I, J )
-   30       CONTINUE
-   40    CONTINUE
-      ELSE
-         DO 60 J = 1, N
-            DO 50 I = 1, M
-               B( I, J ) = A( I, J )
-   50       CONTINUE
-   60    CONTINUE
-      END IF
-      RETURN
-*
-*     End of DLACPY
-*
-      END
--- a/lib/linalg/dladiv.cpp
+++ b/lib/linalg/dladiv.cpp
@ -0,0 +1,88 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+int dladiv_(doublereal *a, doublereal *b, doublereal *c__, doublereal *d__, doublereal *p,
+            doublereal *q)
+{
+    doublereal d__1, d__2;
+    doublereal s, aa, ab, bb, cc, cd, dd, be, un, ov, eps;
+    extern doublereal dlamch_(char *, ftnlen);
+    extern int dladiv1_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *,
+                        doublereal *);
+    aa = *a;
+    bb = *b;
+    cc = *c__;
+    dd = *d__;
+    d__1 = abs(*a), d__2 = abs(*b);
+    ab = max(d__1, d__2);
+    d__1 = abs(*c__), d__2 = abs(*d__);
+    cd = max(d__1, d__2);
+    s = 1.;
+    ov = dlamch_((char *)"Overflow threshold", (ftnlen)18);
+    un = dlamch_((char *)"Safe minimum", (ftnlen)12);
+    eps = dlamch_((char *)"Epsilon", (ftnlen)7);
+    be = 2. / (eps * eps);
+    if (ab >= ov * .5) {
+        aa *= .5;
+        bb *= .5;
+        s *= 2.;
+    }
+    if (cd >= ov * .5) {
+        cc *= .5;
+        dd *= .5;
+        s *= .5;
+    }
+    if (ab <= un * 2. / eps) {
+        aa *= be;
+        bb *= be;
+        s /= be;
+    }
+    if (cd <= un * 2. / eps) {
+        cc *= be;
+        dd *= be;
+        s *= be;
+    }
+    if (abs(*d__) <= abs(*c__)) {
+        dladiv1_(&aa, &bb, &cc, &dd, p, q);
+    } else {
+        dladiv1_(&bb, &aa, &dd, &cc, p, q);
+        *q = -(*q);
+    }
+    *p *= s;
+    *q *= s;
+    return 0;
+}
+int dladiv1_(doublereal *a, doublereal *b, doublereal *c__, doublereal *d__, doublereal *p,
+             doublereal *q)
+{
+    doublereal r__, t;
+    extern doublereal dladiv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *,
+                               doublereal *);
+    r__ = *d__ / *c__;
+    t = 1. / (*c__ + *d__ * r__);
+    *p = dladiv2_(a, b, c__, d__, &r__, &t);
+    *a = -(*a);
+    *q = dladiv2_(b, a, c__, d__, &r__, &t);
+    return 0;
+}
+doublereal dladiv2_(doublereal *a, doublereal *b, doublereal *c__, doublereal *d__, doublereal *r__,
+                    doublereal *t)
+{
+    doublereal ret_val;
+    doublereal br;
+    if (*r__ != 0.) {
+        br = *b * *r__;
+        if (br != 0.) {
+            ret_val = (*a + br) * *t;
+        } else {
+            ret_val = *a * *t + *b * *t * *r__;
+        }
+    } else {
+        ret_val = (*a + *d__ * (*b / *c__)) * *t;
+    }
+    return ret_val;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dladiv.f
+++ b/lib/linalg/dladiv.f
@ -1,251 +0,0 @@
-*> \brief \b DLADIV performs complex division in real arithmetic, avoiding unnecessary overflow.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DLADIV + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dladiv.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dladiv.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dladiv.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DLADIV( A, B, C, D, P, Q )
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION   A, B, C, D, P, Q
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DLADIV performs complex division in  real arithmetic
-*>
-*>                       a + i*b
-*>            p + i*q = ---------
-*>                       c + i*d
-*>
-*> The algorithm is due to Michael Baudin and Robert L. Smith
-*> and can be found in the paper
-*> "A Robust Complex Division in Scilab"
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION
-*> \endverbatim
-*>
-*> \param[in] B
-*> \verbatim
-*>          B is DOUBLE PRECISION
-*> \endverbatim
-*>
-*> \param[in] C
-*> \verbatim
-*>          C is DOUBLE PRECISION
-*> \endverbatim
-*>
-*> \param[in] D
-*> \verbatim
-*>          D is DOUBLE PRECISION
-*>          The scalars a, b, c, and d in the above expression.
-*> \endverbatim
-*>
-*> \param[out] P
-*> \verbatim
-*>          P is DOUBLE PRECISION
-*> \endverbatim
-*>
-*> \param[out] Q
-*> \verbatim
-*>          Q is DOUBLE PRECISION
-*>          The scalars p and q in the above expression.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup doubleOTHERauxiliary
-*
-*  =====================================================================
-      SUBROUTINE DLADIV( A, B, C, D, P, Q )
-*
-*  -- LAPACK auxiliary routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION   A, B, C, D, P, Q
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION   BS
-      PARAMETER          ( BS = 2.0D0 )
-      DOUBLE PRECISION   HALF
-      PARAMETER          ( HALF = 0.5D0 )
-      DOUBLE PRECISION   TWO
-      PARAMETER          ( TWO = 2.0D0 )
-*
-*     .. Local Scalars ..
-      DOUBLE PRECISION   AA, BB, CC, DD, AB, CD, S, OV, UN, BE, EPS
-*     ..
-*     .. External Functions ..
-      DOUBLE PRECISION   DLAMCH
-      EXTERNAL           DLAMCH
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           DLADIV1
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, MAX
-*     ..
-*     .. Executable Statements ..
-*
-      AA = A
-      BB = B
-      CC = C
-      DD = D
-      AB = MAX( ABS(A), ABS(B) )
-      CD = MAX( ABS(C), ABS(D) )
-      S = 1.0D0
-
-      OV = DLAMCH( 'Overflow threshold' )
-      UN = DLAMCH( 'Safe minimum' )
-      EPS = DLAMCH( 'Epsilon' )
-      BE = BS / (EPS*EPS)
-
-      IF( AB >= HALF*OV ) THEN
-         AA = HALF * AA
-         BB = HALF * BB
-         S  = TWO * S
-      END IF
-      IF( CD >= HALF*OV ) THEN
-         CC = HALF * CC
-         DD = HALF * DD
-         S  = HALF * S
-      END IF
-      IF( AB <= UN*BS/EPS ) THEN
-         AA = AA * BE
-         BB = BB * BE
-         S  = S / BE
-      END IF
-      IF( CD <= UN*BS/EPS ) THEN
-         CC = CC * BE
-         DD = DD * BE
-         S  = S * BE
-      END IF
-      IF( ABS( D ).LE.ABS( C ) ) THEN
-         CALL DLADIV1(AA, BB, CC, DD, P, Q)
-      ELSE
-         CALL DLADIV1(BB, AA, DD, CC, P, Q)
-         Q = -Q
-      END IF
-      P = P * S
-      Q = Q * S
-*
-      RETURN
-*
-*     End of DLADIV
-*
-      END
-
-*> \ingroup doubleOTHERauxiliary
-
-
-      SUBROUTINE DLADIV1( A, B, C, D, P, Q )
-*
-*  -- LAPACK auxiliary routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION   A, B, C, D, P, Q
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION   ONE
-      PARAMETER          ( ONE = 1.0D0 )
-*
-*     .. Local Scalars ..
-      DOUBLE PRECISION   R, T
-*     ..
-*     .. External Functions ..
-      DOUBLE PRECISION   DLADIV2
-      EXTERNAL           DLADIV2
-*     ..
-*     .. Executable Statements ..
-*
-      R = D / C
-      T = ONE / (C + D * R)
-      P = DLADIV2(A, B, C, D, R, T)
-      A = -A
-      Q = DLADIV2(B, A, C, D, R, T)
-*
-      RETURN
-*
-*     End of DLADIV1
-*
-      END
-
-*> \ingroup doubleOTHERauxiliary
-
-      DOUBLE PRECISION FUNCTION DLADIV2( A, B, C, D, R, T )
-*
-*  -- LAPACK auxiliary routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION   A, B, C, D, R, T
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION   ZERO
-      PARAMETER          ( ZERO = 0.0D0 )
-*
-*     .. Local Scalars ..
-      DOUBLE PRECISION   BR
-*     ..
-*     .. Executable Statements ..
-*
-      IF( R.NE.ZERO ) THEN
-         BR = B * R
-         IF( BR.NE.ZERO ) THEN
-            DLADIV2 = (A + BR) * T
-         ELSE
-            DLADIV2 = A * T + (B * T) * R
-         END IF
-      ELSE
-         DLADIV2 = (A + D * (B / C)) * T
-      END IF
-*
-      RETURN
-*
-*     End of DLADIV2
-*
-      END
--- a/lib/linalg/dlae2.cpp
+++ b/lib/linalg/dlae2.cpp
@ -0,0 +1,45 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+int dlae2_(doublereal *a, doublereal *b, doublereal *c__, doublereal *rt1, doublereal *rt2)
+{
+    doublereal d__1;
+    double sqrt(doublereal);
+    doublereal ab, df, tb, sm, rt, adf, acmn, acmx;
+    sm = *a + *c__;
+    df = *a - *c__;
+    adf = abs(df);
+    tb = *b + *b;
+    ab = abs(tb);
+    if (abs(*a) > abs(*c__)) {
+        acmx = *a;
+        acmn = *c__;
+    } else {
+        acmx = *c__;
+        acmn = *a;
+    }
+    if (adf > ab) {
+        d__1 = ab / adf;
+        rt = adf * sqrt(d__1 * d__1 + 1.);
+    } else if (adf < ab) {
+        d__1 = adf / ab;
+        rt = ab * sqrt(d__1 * d__1 + 1.);
+    } else {
+        rt = ab * sqrt(2.);
+    }
+    if (sm < 0.) {
+        *rt1 = (sm - rt) * .5;
+        *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
+    } else if (sm > 0.) {
+        *rt1 = (sm + rt) * .5;
+        *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
+    } else {
+        *rt1 = rt * .5;
+        *rt2 = rt * -.5;
+    }
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dlae2.f
+++ b/lib/linalg/dlae2.f
@ -1,182 +0,0 @@
-*> \brief \b DLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DLAE2 + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlae2.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlae2.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlae2.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DLAE2( A, B, C, RT1, RT2 )
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION   A, B, C, RT1, RT2
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DLAE2  computes the eigenvalues of a 2-by-2 symmetric matrix
-*>    [  A   B  ]
-*>    [  B   C  ].
-*> On return, RT1 is the eigenvalue of larger absolute value, and RT2
-*> is the eigenvalue of smaller absolute value.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION
-*>          The (1,1) element of the 2-by-2 matrix.
-*> \endverbatim
-*>
-*> \param[in] B
-*> \verbatim
-*>          B is DOUBLE PRECISION
-*>          The (1,2) and (2,1) elements of the 2-by-2 matrix.
-*> \endverbatim
-*>
-*> \param[in] C
-*> \verbatim
-*>          C is DOUBLE PRECISION
-*>          The (2,2) element of the 2-by-2 matrix.
-*> \endverbatim
-*>
-*> \param[out] RT1
-*> \verbatim
-*>          RT1 is DOUBLE PRECISION
-*>          The eigenvalue of larger absolute value.
-*> \endverbatim
-*>
-*> \param[out] RT2
-*> \verbatim
-*>          RT2 is DOUBLE PRECISION
-*>          The eigenvalue of smaller absolute value.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup OTHERauxiliary
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  RT1 is accurate to a few ulps barring over/underflow.
-*>
-*>  RT2 may be inaccurate if there is massive cancellation in the
-*>  determinant A*C-B*B; higher precision or correctly rounded or
-*>  correctly truncated arithmetic would be needed to compute RT2
-*>  accurately in all cases.
-*>
-*>  Overflow is possible only if RT1 is within a factor of 5 of overflow.
-*>  Underflow is harmless if the input data is 0 or exceeds
-*>     underflow_threshold / macheps.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DLAE2( A, B, C, RT1, RT2 )
-*
-*  -- LAPACK auxiliary routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION   A, B, C, RT1, RT2
-*     ..
-*
-* =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION   ONE
-      PARAMETER          ( ONE = 1.0D0 )
-      DOUBLE PRECISION   TWO
-      PARAMETER          ( TWO = 2.0D0 )
-      DOUBLE PRECISION   ZERO
-      PARAMETER          ( ZERO = 0.0D0 )
-      DOUBLE PRECISION   HALF
-      PARAMETER          ( HALF = 0.5D0 )
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION   AB, ACMN, ACMX, ADF, DF, RT, SM, TB
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-*     Compute the eigenvalues
-*
-      SM = A + C
-      DF = A - C
-      ADF = ABS( DF )
-      TB = B + B
-      AB = ABS( TB )
-      IF( ABS( A ).GT.ABS( C ) ) THEN
-         ACMX = A
-         ACMN = C
-      ELSE
-         ACMX = C
-         ACMN = A
-      END IF
-      IF( ADF.GT.AB ) THEN
-         RT = ADF*SQRT( ONE+( AB / ADF )**2 )
-      ELSE IF( ADF.LT.AB ) THEN
-         RT = AB*SQRT( ONE+( ADF / AB )**2 )
-      ELSE
-*
-*        Includes case AB=ADF=0
-*
-         RT = AB*SQRT( TWO )
-      END IF
-      IF( SM.LT.ZERO ) THEN
-         RT1 = HALF*( SM-RT )
-*
-*        Order of execution important.
-*        To get fully accurate smaller eigenvalue,
-*        next line needs to be executed in higher precision.
-*
-         RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B
-      ELSE IF( SM.GT.ZERO ) THEN
-         RT1 = HALF*( SM+RT )
-*
-*        Order of execution important.
-*        To get fully accurate smaller eigenvalue,
-*        next line needs to be executed in higher precision.
-*
-         RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B
-      ELSE
-*
-*        Includes case RT1 = RT2 = 0
-*
-         RT1 = HALF*RT
-         RT2 = -HALF*RT
-      END IF
-      RETURN
-*
-*     End of DLAE2
-*
-      END
--- a/lib/linalg/dlaed0.cpp
+++ b/lib/linalg/dlaed0.cpp
@ -0,0 +1,236 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+static integer c__9 = 9;
+static integer c__0 = 0;
+static integer c__2 = 2;
+static doublereal c_b23 = 1.;
+static doublereal c_b24 = 0.;
+static integer c__1 = 1;
+int dlaed0_(integer *icompq, integer *qsiz, integer *n, doublereal *d__, doublereal *e,
+            doublereal *q, integer *ldq, doublereal *qstore, integer *ldqs, doublereal *work,
+            integer *iwork, integer *info)
+{
+    integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2;
+    doublereal d__1;
+    double log(doublereal);
+    integer pow_lmp_ii(integer *, integer *);
+    integer i__, j, k, iq, lgn, msd2, smm1, spm1, spm2;
+    doublereal temp;
+    integer curr;
+    extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
+                      integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
+                      ftnlen, ftnlen);
+    integer iperm;
+    extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *);
+    integer indxq, iwrem;
+    extern int dlaed1_(integer *, doublereal *, doublereal *, integer *, integer *, doublereal *,
+                       integer *, doublereal *, integer *, integer *);
+    integer iqptr;
+    extern int dlaed7_(integer *, integer *, integer *, integer *, integer *, integer *,
+                       doublereal *, doublereal *, integer *, integer *, doublereal *, integer *,
+                       doublereal *, integer *, integer *, integer *, integer *, integer *,
+                       doublereal *, doublereal *, integer *, integer *);
+    integer tlvls;
+    extern int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *,
+                       integer *, ftnlen);
+    integer igivcl;
+    extern int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
+                           ftnlen, ftnlen);
+    integer igivnm, submat, curprb, subpbs, igivpt;
+    extern int dsteqr_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *,
+                       doublereal *, integer *, ftnlen);
+    integer curlvl, matsiz, iprmpt, smlsiz;
+    --d__;
+    --e;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    qstore_dim1 = *ldqs;
+    qstore_offset = 1 + qstore_dim1;
+    qstore -= qstore_offset;
+    --work;
+    --iwork;
+    *info = 0;
+    if (*icompq < 0 || *icompq > 2) {
+        *info = -1;
+    } else if (*icompq == 1 && *qsiz < max(0, *n)) {
+        *info = -2;
+    } else if (*n < 0) {
+        *info = -3;
+    } else if (*ldq < max(1, *n)) {
+        *info = -7;
+    } else if (*ldqs < max(1, *n)) {
+        *info = -9;
+    }
+    if (*info != 0) {
+        i__1 = -(*info);
+        xerbla_((char *)"DLAED0", &i__1, (ftnlen)6);
+        return 0;
+    }
+    if (*n == 0) {
+        return 0;
+    }
+    smlsiz = ilaenv_(&c__9, (char *)"DLAED0", (char *)" ", &c__0, &c__0, &c__0, &c__0, (ftnlen)6, (ftnlen)1);
+    iwork[1] = *n;
+    subpbs = 1;
+    tlvls = 0;
+L10:
+    if (iwork[subpbs] > smlsiz) {
+        for (j = subpbs; j >= 1; --j) {
+            iwork[j * 2] = (iwork[j] + 1) / 2;
+            iwork[(j << 1) - 1] = iwork[j] / 2;
+        }
+        ++tlvls;
+        subpbs <<= 1;
+        goto L10;
+    }
+    i__1 = subpbs;
+    for (j = 2; j <= i__1; ++j) {
+        iwork[j] += iwork[j - 1];
+    }
+    spm1 = subpbs - 1;
+    i__1 = spm1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+        submat = iwork[i__] + 1;
+        smm1 = submat - 1;
+        d__[smm1] -= (d__1 = e[smm1], abs(d__1));
+        d__[submat] -= (d__1 = e[smm1], abs(d__1));
+    }
+    indxq = (*n << 2) + 3;
+    if (*icompq != 2) {
+        temp = log((doublereal)(*n)) / log(2.);
+        lgn = (integer)temp;
+        if (pow_lmp_ii(&c__2, &lgn) < *n) {
+            ++lgn;
+        }
+        if (pow_lmp_ii(&c__2, &lgn) < *n) {
+            ++lgn;
+        }
+        iprmpt = indxq + *n + 1;
+        iperm = iprmpt + *n * lgn;
+        iqptr = iperm + *n * lgn;
+        igivpt = iqptr + *n + 2;
+        igivcl = igivpt + *n * lgn;
+        igivnm = 1;
+        iq = igivnm + (*n << 1) * lgn;
+        i__1 = *n;
+        iwrem = iq + i__1 * i__1 + 1;
+        i__1 = subpbs;
+        for (i__ = 0; i__ <= i__1; ++i__) {
+            iwork[iprmpt + i__] = 1;
+            iwork[igivpt + i__] = 1;
+        }
+        iwork[iqptr] = 1;
+    }
+    curr = 0;
+    i__1 = spm1;
+    for (i__ = 0; i__ <= i__1; ++i__) {
+        if (i__ == 0) {
+            submat = 1;
+            matsiz = iwork[1];
+        } else {
+            submat = iwork[i__] + 1;
+            matsiz = iwork[i__ + 1] - iwork[i__];
+        }
+        if (*icompq == 2) {
+            dsteqr_((char *)"I", &matsiz, &d__[submat], &e[submat], &q[submat + submat * q_dim1], ldq,
+                    &work[1], info, (ftnlen)1);
+            if (*info != 0) {
+                goto L130;
+            }
+        } else {
+            dsteqr_((char *)"I", &matsiz, &d__[submat], &e[submat], &work[iq - 1 + iwork[iqptr + curr]],
+                    &matsiz, &work[1], info, (ftnlen)1);
+            if (*info != 0) {
+                goto L130;
+            }
+            if (*icompq == 1) {
+                dgemm_((char *)"N", (char *)"N", qsiz, &matsiz, &matsiz, &c_b23, &q[submat * q_dim1 + 1], ldq,
+                       &work[iq - 1 + iwork[iqptr + curr]], &matsiz, &c_b24,
+                       &qstore[submat * qstore_dim1 + 1], ldqs, (ftnlen)1, (ftnlen)1);
+            }
+            i__2 = matsiz;
+            iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2;
+            ++curr;
+        }
+        k = 1;
+        i__2 = iwork[i__ + 1];
+        for (j = submat; j <= i__2; ++j) {
+            iwork[indxq + j] = k;
+            ++k;
+        }
+    }
+    curlvl = 1;
+L80:
+    if (subpbs > 1) {
+        spm2 = subpbs - 2;
+        i__1 = spm2;
+        for (i__ = 0; i__ <= i__1; i__ += 2) {
+            if (i__ == 0) {
+                submat = 1;
+                matsiz = iwork[2];
+                msd2 = iwork[1];
+                curprb = 0;
+            } else {
+                submat = iwork[i__] + 1;
+                matsiz = iwork[i__ + 2] - iwork[i__];
+                msd2 = matsiz / 2;
+                ++curprb;
+            }
+            if (*icompq == 2) {
+                dlaed1_(&matsiz, &d__[submat], &q[submat + submat * q_dim1], ldq,
+                        &iwork[indxq + submat], &e[submat + msd2 - 1], &msd2, &work[1],
+                        &iwork[subpbs + 1], info);
+            } else {
+                dlaed7_(icompq, &matsiz, qsiz, &tlvls, &curlvl, &curprb, &d__[submat],
+                        &qstore[submat * qstore_dim1 + 1], ldqs, &iwork[indxq + submat],
+                        &e[submat + msd2 - 1], &msd2, &work[iq], &iwork[iqptr], &iwork[iprmpt],
+                        &iwork[iperm], &iwork[igivpt], &iwork[igivcl], &work[igivnm], &work[iwrem],
+                        &iwork[subpbs + 1], info);
+            }
+            if (*info != 0) {
+                goto L130;
+            }
+            iwork[i__ / 2 + 1] = iwork[i__ + 2];
+        }
+        subpbs /= 2;
+        ++curlvl;
+        goto L80;
+    }
+    if (*icompq == 1) {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+            j = iwork[indxq + i__];
+            work[i__] = d__[j];
+            dcopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1 + 1], &c__1);
+        }
+        dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
+    } else if (*icompq == 2) {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+            j = iwork[indxq + i__];
+            work[i__] = d__[j];
+            dcopy_(n, &q[j * q_dim1 + 1], &c__1, &work[*n * i__ + 1], &c__1);
+        }
+        dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
+        dlacpy_((char *)"A", n, n, &work[*n + 1], n, &q[q_offset], ldq, (ftnlen)1);
+    } else {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+            j = iwork[indxq + i__];
+            work[i__] = d__[j];
+        }
+        dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
+    }
+    goto L140;
+L130:
+    *info = submat * (*n + 1) + submat + matsiz - 1;
+L140:
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dlaed0.f
+++ b/lib/linalg/dlaed0.f
@ -1,431 +0,0 @@
-*> \brief \b DLAED0 used by DSTEDC. Computes all eigenvalues and corresponding eigenvectors of an unreduced symmetric tridiagonal matrix using the divide and conquer method.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DLAED0 + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed0.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed0.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed0.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DLAED0( ICOMPQ, QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS,
-*                          WORK, IWORK, INFO )
-*
-*       .. Scalar Arguments ..
-*       INTEGER            ICOMPQ, INFO, LDQ, LDQS, N, QSIZ
-*       ..
-*       .. Array Arguments ..
-*       INTEGER            IWORK( * )
-*       DOUBLE PRECISION   D( * ), E( * ), Q( LDQ, * ), QSTORE( LDQS, * ),
-*      $                   WORK( * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DLAED0 computes all eigenvalues and corresponding eigenvectors of a
-*> symmetric tridiagonal matrix using the divide and conquer method.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] ICOMPQ
-*> \verbatim
-*>          ICOMPQ is INTEGER
-*>          = 0:  Compute eigenvalues only.
-*>          = 1:  Compute eigenvectors of original dense symmetric matrix
-*>                also.  On entry, Q contains the orthogonal matrix used
-*>                to reduce the original matrix to tridiagonal form.
-*>          = 2:  Compute eigenvalues and eigenvectors of tridiagonal
-*>                matrix.
-*> \endverbatim
-*>
-*> \param[in] QSIZ
-*> \verbatim
-*>          QSIZ is INTEGER
-*>         The dimension of the orthogonal matrix used to reduce
-*>         the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>         The dimension of the symmetric tridiagonal matrix.  N >= 0.
-*> \endverbatim
-*>
-*> \param[in,out] D
-*> \verbatim
-*>          D is DOUBLE PRECISION array, dimension (N)
-*>         On entry, the main diagonal of the tridiagonal matrix.
-*>         On exit, its eigenvalues.
-*> \endverbatim
-*>
-*> \param[in] E
-*> \verbatim
-*>          E is DOUBLE PRECISION array, dimension (N-1)
-*>         The off-diagonal elements of the tridiagonal matrix.
-*>         On exit, E has been destroyed.
-*> \endverbatim
-*>
-*> \param[in,out] Q
-*> \verbatim
-*>          Q is DOUBLE PRECISION array, dimension (LDQ, N)
-*>         On entry, Q must contain an N-by-N orthogonal matrix.
-*>         If ICOMPQ = 0    Q is not referenced.
-*>         If ICOMPQ = 1    On entry, Q is a subset of the columns of the
-*>                          orthogonal matrix used to reduce the full
-*>                          matrix to tridiagonal form corresponding to
-*>                          the subset of the full matrix which is being
-*>                          decomposed at this time.
-*>         If ICOMPQ = 2    On entry, Q will be the identity matrix.
-*>                          On exit, Q contains the eigenvectors of the
-*>                          tridiagonal matrix.
-*> \endverbatim
-*>
-*> \param[in] LDQ
-*> \verbatim
-*>          LDQ is INTEGER
-*>         The leading dimension of the array Q.  If eigenvectors are
-*>         desired, then  LDQ >= max(1,N).  In any case,  LDQ >= 1.
-*> \endverbatim
-*>
-*> \param[out] QSTORE
-*> \verbatim
-*>          QSTORE is DOUBLE PRECISION array, dimension (LDQS, N)
-*>         Referenced only when ICOMPQ = 1.  Used to store parts of
-*>         the eigenvector matrix when the updating matrix multiplies
-*>         take place.
-*> \endverbatim
-*>
-*> \param[in] LDQS
-*> \verbatim
-*>          LDQS is INTEGER
-*>         The leading dimension of the array QSTORE.  If ICOMPQ = 1,
-*>         then  LDQS >= max(1,N).  In any case,  LDQS >= 1.
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is DOUBLE PRECISION array,
-*>         If ICOMPQ = 0 or 1, the dimension of WORK must be at least
-*>                     1 + 3*N + 2*N*lg N + 3*N**2
-*>                     ( lg( N ) = smallest integer k
-*>                                 such that 2^k >= N )
-*>         If ICOMPQ = 2, the dimension of WORK must be at least
-*>                     4*N + N**2.
-*> \endverbatim
-*>
-*> \param[out] IWORK
-*> \verbatim
-*>          IWORK is INTEGER array,
-*>         If ICOMPQ = 0 or 1, the dimension of IWORK must be at least
-*>                        6 + 6*N + 5*N*lg N.
-*>                        ( lg( N ) = smallest integer k
-*>                                    such that 2^k >= N )
-*>         If ICOMPQ = 2, the dimension of IWORK must be at least
-*>                        3 + 5*N.
-*> \endverbatim
-*>
-*> \param[out] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>          = 0:  successful exit.
-*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
-*>          > 0:  The algorithm failed to compute an eigenvalue while
-*>                working on the submatrix lying in rows and columns
-*>                INFO/(N+1) through mod(INFO,N+1).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup auxOTHERcomputational
-*
-*> \par Contributors:
-*  ==================
-*>
-*> Jeff Rutter, Computer Science Division, University of California
-*> at Berkeley, USA
-*
-*  =====================================================================
-      SUBROUTINE DLAED0( ICOMPQ, QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS,
-     $                   WORK, IWORK, INFO )
-*
-*  -- LAPACK computational routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      INTEGER            ICOMPQ, INFO, LDQ, LDQS, N, QSIZ
-*     ..
-*     .. Array Arguments ..
-      INTEGER            IWORK( * )
-      DOUBLE PRECISION   D( * ), E( * ), Q( LDQ, * ), QSTORE( LDQS, * ),
-     $                   WORK( * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION   ZERO, ONE, TWO
-      PARAMETER          ( ZERO = 0.D0, ONE = 1.D0, TWO = 2.D0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            CURLVL, CURPRB, CURR, I, IGIVCL, IGIVNM,
-     $                   IGIVPT, INDXQ, IPERM, IPRMPT, IQ, IQPTR, IWREM,
-     $                   J, K, LGN, MATSIZ, MSD2, SMLSIZ, SMM1, SPM1,
-     $                   SPM2, SUBMAT, SUBPBS, TLVLS
-      DOUBLE PRECISION   TEMP
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           DCOPY, DGEMM, DLACPY, DLAED1, DLAED7, DSTEQR,
-     $                   XERBLA
-*     ..
-*     .. External Functions ..
-      INTEGER            ILAENV
-      EXTERNAL           ILAENV
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, DBLE, INT, LOG, MAX
-*     ..
-*     .. Executable Statements ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-*
-      IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.2 ) THEN
-         INFO = -1
-      ELSE IF( ( ICOMPQ.EQ.1 ) .AND. ( QSIZ.LT.MAX( 0, N ) ) ) THEN
-         INFO = -2
-      ELSE IF( N.LT.0 ) THEN
-         INFO = -3
-      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
-         INFO = -7
-      ELSE IF( LDQS.LT.MAX( 1, N ) ) THEN
-         INFO = -9
-      END IF
-      IF( INFO.NE.0 ) THEN
-         CALL XERBLA( 'DLAED0', -INFO )
-         RETURN
-      END IF
-*
-*     Quick return if possible
-*
-      IF( N.EQ.0 )
-     $   RETURN
-*
-      SMLSIZ = ILAENV( 9, 'DLAED0', ' ', 0, 0, 0, 0 )
-*
-*     Determine the size and placement of the submatrices, and save in
-*     the leading elements of IWORK.
-*
-      IWORK( 1 ) = N
-      SUBPBS = 1
-      TLVLS = 0
-   10 CONTINUE
-      IF( IWORK( SUBPBS ).GT.SMLSIZ ) THEN
-         DO 20 J = SUBPBS, 1, -1
-            IWORK( 2*J ) = ( IWORK( J )+1 ) / 2
-            IWORK( 2*J-1 ) = IWORK( J ) / 2
-   20    CONTINUE
-         TLVLS = TLVLS + 1
-         SUBPBS = 2*SUBPBS
-         GO TO 10
-      END IF
-      DO 30 J = 2, SUBPBS
-         IWORK( J ) = IWORK( J ) + IWORK( J-1 )
-   30 CONTINUE
-*
-*     Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1
-*     using rank-1 modifications (cuts).
-*
-      SPM1 = SUBPBS - 1
-      DO 40 I = 1, SPM1
-         SUBMAT = IWORK( I ) + 1
-         SMM1 = SUBMAT - 1
-         D( SMM1 ) = D( SMM1 ) - ABS( E( SMM1 ) )
-         D( SUBMAT ) = D( SUBMAT ) - ABS( E( SMM1 ) )
-   40 CONTINUE
-*
-      INDXQ = 4*N + 3
-      IF( ICOMPQ.NE.2 ) THEN
-*
-*        Set up workspaces for eigenvalues only/accumulate new vectors
-*        routine
-*
-         TEMP = LOG( DBLE( N ) ) / LOG( TWO )
-         LGN = INT( TEMP )
-         IF( 2**LGN.LT.N )
-     $      LGN = LGN + 1
-         IF( 2**LGN.LT.N )
-     $      LGN = LGN + 1
-         IPRMPT = INDXQ + N + 1
-         IPERM = IPRMPT + N*LGN
-         IQPTR = IPERM + N*LGN
-         IGIVPT = IQPTR + N + 2
-         IGIVCL = IGIVPT + N*LGN
-*
-         IGIVNM = 1
-         IQ = IGIVNM + 2*N*LGN
-         IWREM = IQ + N**2 + 1
-*
-*        Initialize pointers
-*
-         DO 50 I = 0, SUBPBS
-            IWORK( IPRMPT+I ) = 1
-            IWORK( IGIVPT+I ) = 1
-   50    CONTINUE
-         IWORK( IQPTR ) = 1
-      END IF
-*
-*     Solve each submatrix eigenproblem at the bottom of the divide and
-*     conquer tree.
-*
-      CURR = 0
-      DO 70 I = 0, SPM1
-         IF( I.EQ.0 ) THEN
-            SUBMAT = 1
-            MATSIZ = IWORK( 1 )
-         ELSE
-            SUBMAT = IWORK( I ) + 1
-            MATSIZ = IWORK( I+1 ) - IWORK( I )
-         END IF
-         IF( ICOMPQ.EQ.2 ) THEN
-            CALL DSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ),
-     $                   Q( SUBMAT, SUBMAT ), LDQ, WORK, INFO )
-            IF( INFO.NE.0 )
-     $         GO TO 130
-         ELSE
-            CALL DSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ),
-     $                   WORK( IQ-1+IWORK( IQPTR+CURR ) ), MATSIZ, WORK,
-     $                   INFO )
-            IF( INFO.NE.0 )
-     $         GO TO 130
-            IF( ICOMPQ.EQ.1 ) THEN
-               CALL DGEMM( 'N', 'N', QSIZ, MATSIZ, MATSIZ, ONE,
-     $                     Q( 1, SUBMAT ), LDQ, WORK( IQ-1+IWORK( IQPTR+
-     $                     CURR ) ), MATSIZ, ZERO, QSTORE( 1, SUBMAT ),
-     $                     LDQS )
-            END IF
-            IWORK( IQPTR+CURR+1 ) = IWORK( IQPTR+CURR ) + MATSIZ**2
-            CURR = CURR + 1
-         END IF
-         K = 1
-         DO 60 J = SUBMAT, IWORK( I+1 )
-            IWORK( INDXQ+J ) = K
-            K = K + 1
-   60    CONTINUE
-   70 CONTINUE
-*
-*     Successively merge eigensystems of adjacent submatrices
-*     into eigensystem for the corresponding larger matrix.
-*
-*     while ( SUBPBS > 1 )
-*
-      CURLVL = 1
-   80 CONTINUE
-      IF( SUBPBS.GT.1 ) THEN
-         SPM2 = SUBPBS - 2
-         DO 90 I = 0, SPM2, 2
-            IF( I.EQ.0 ) THEN
-               SUBMAT = 1
-               MATSIZ = IWORK( 2 )
-               MSD2 = IWORK( 1 )
-               CURPRB = 0
-            ELSE
-               SUBMAT = IWORK( I ) + 1
-               MATSIZ = IWORK( I+2 ) - IWORK( I )
-               MSD2 = MATSIZ / 2
-               CURPRB = CURPRB + 1
-            END IF
-*
-*     Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2)
-*     into an eigensystem of size MATSIZ.
-*     DLAED1 is used only for the full eigensystem of a tridiagonal
-*     matrix.
-*     DLAED7 handles the cases in which eigenvalues only or eigenvalues
-*     and eigenvectors of a full symmetric matrix (which was reduced to
-*     tridiagonal form) are desired.
-*
-            IF( ICOMPQ.EQ.2 ) THEN
-               CALL DLAED1( MATSIZ, D( SUBMAT ), Q( SUBMAT, SUBMAT ),
-     $                      LDQ, IWORK( INDXQ+SUBMAT ),
-     $                      E( SUBMAT+MSD2-1 ), MSD2, WORK,
-     $                      IWORK( SUBPBS+1 ), INFO )
-            ELSE
-               CALL DLAED7( ICOMPQ, MATSIZ, QSIZ, TLVLS, CURLVL, CURPRB,
-     $                      D( SUBMAT ), QSTORE( 1, SUBMAT ), LDQS,
-     $                      IWORK( INDXQ+SUBMAT ), E( SUBMAT+MSD2-1 ),
-     $                      MSD2, WORK( IQ ), IWORK( IQPTR ),
-     $                      IWORK( IPRMPT ), IWORK( IPERM ),
-     $                      IWORK( IGIVPT ), IWORK( IGIVCL ),
-     $                      WORK( IGIVNM ), WORK( IWREM ),
-     $                      IWORK( SUBPBS+1 ), INFO )
-            END IF
-            IF( INFO.NE.0 )
-     $         GO TO 130
-            IWORK( I / 2+1 ) = IWORK( I+2 )
-   90    CONTINUE
-         SUBPBS = SUBPBS / 2
-         CURLVL = CURLVL + 1
-         GO TO 80
-      END IF
-*
-*     end while
-*
-*     Re-merge the eigenvalues/vectors which were deflated at the final
-*     merge step.
-*
-      IF( ICOMPQ.EQ.1 ) THEN
-         DO 100 I = 1, N
-            J = IWORK( INDXQ+I )
-            WORK( I ) = D( J )
-            CALL DCOPY( QSIZ, QSTORE( 1, J ), 1, Q( 1, I ), 1 )
-  100    CONTINUE
-         CALL DCOPY( N, WORK, 1, D, 1 )
-      ELSE IF( ICOMPQ.EQ.2 ) THEN
-         DO 110 I = 1, N
-            J = IWORK( INDXQ+I )
-            WORK( I ) = D( J )
-            CALL DCOPY( N, Q( 1, J ), 1, WORK( N*I+1 ), 1 )
-  110    CONTINUE
-         CALL DCOPY( N, WORK, 1, D, 1 )
-         CALL DLACPY( 'A', N, N, WORK( N+1 ), N, Q, LDQ )
-      ELSE
-         DO 120 I = 1, N
-            J = IWORK( INDXQ+I )
-            WORK( I ) = D( J )
-  120    CONTINUE
-         CALL DCOPY( N, WORK, 1, D, 1 )
-      END IF
-      GO TO 140
-*
-  130 CONTINUE
-      INFO = SUBMAT*( N+1 ) + SUBMAT + MATSIZ - 1
-*
-  140 CONTINUE
-      RETURN
-*
-*     End of DLAED0
-*
-      END
--- a/lib/linalg/dlaed1.cpp
+++ b/lib/linalg/dlaed1.cpp
@ -0,0 +1,90 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+static integer c__1 = 1;
+static integer c_n1 = -1;
+int dlaed1_(integer *n, doublereal *d__, doublereal *q, integer *ldq, integer *indxq,
+            doublereal *rho, integer *cutpnt, doublereal *work, integer *iwork, integer *info)
+{
+    integer q_dim1, q_offset, i__1, i__2;
+    integer i__, k, n1, n2, is, iw, iz, iq2, zpp1, indx, indxc;
+    extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *);
+    integer indxp;
+    extern int dlaed2_(integer *, integer *, integer *, doublereal *, doublereal *, integer *,
+                       integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+                       doublereal *, integer *, integer *, integer *, integer *, integer *),
+        dlaed3_(integer *, integer *, integer *, doublereal *, doublereal *, integer *,
+                doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *,
+                doublereal *, integer *);
+    integer idlmda;
+    extern int dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *),
+        xerbla_(char *, integer *, ftnlen);
+    integer coltyp;
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --indxq;
+    --work;
+    --iwork;
+    *info = 0;
+    if (*n < 0) {
+        *info = -1;
+    } else if (*ldq < max(1, *n)) {
+        *info = -4;
+    } else {
+        i__1 = 1, i__2 = *n / 2;
+        if (min(i__1, i__2) > *cutpnt || *n / 2 < *cutpnt) {
+            *info = -7;
+        }
+    }
+    if (*info != 0) {
+        i__1 = -(*info);
+        xerbla_((char *)"DLAED1", &i__1, (ftnlen)6);
+        return 0;
+    }
+    if (*n == 0) {
+        return 0;
+    }
+    iz = 1;
+    idlmda = iz + *n;
+    iw = idlmda + *n;
+    iq2 = iw + *n;
+    indx = 1;
+    indxc = indx + *n;
+    coltyp = indxc + *n;
+    indxp = coltyp + *n;
+    dcopy_(cutpnt, &q[*cutpnt + q_dim1], ldq, &work[iz], &c__1);
+    zpp1 = *cutpnt + 1;
+    i__1 = *n - *cutpnt;
+    dcopy_(&i__1, &q[zpp1 + zpp1 * q_dim1], ldq, &work[iz + *cutpnt], &c__1);
+    dlaed2_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, &indxq[1], rho, &work[iz], &work[idlmda],
+            &work[iw], &work[iq2], &iwork[indx], &iwork[indxc], &iwork[indxp], &iwork[coltyp],
+            info);
+    if (*info != 0) {
+        goto L20;
+    }
+    if (k != 0) {
+        is = (iwork[coltyp] + iwork[coltyp + 1]) * *cutpnt +
+             (iwork[coltyp + 1] + iwork[coltyp + 2]) * (*n - *cutpnt) + iq2;
+        dlaed3_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, rho, &work[idlmda], &work[iq2],
+                &iwork[indxc], &iwork[coltyp], &work[iw], &work[is], info);
+        if (*info != 0) {
+            goto L20;
+        }
+        n1 = k;
+        n2 = *n - k;
+        dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
+    } else {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+            indxq[i__] = i__;
+        }
+    }
+L20:
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dlaed1.f
+++ b/lib/linalg/dlaed1.f
@ -1,271 +0,0 @@
-*> \brief \b DLAED1 used by DSTEDC. Computes the updated eigensystem of a diagonal matrix after modification by a rank-one symmetric matrix. Used when the original matrix is tridiagonal.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DLAED1 + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed1.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed1.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed1.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DLAED1( N, D, Q, LDQ, INDXQ, RHO, CUTPNT, WORK, IWORK,
-*                          INFO )
-*
-*       .. Scalar Arguments ..
-*       INTEGER            CUTPNT, INFO, LDQ, N
-*       DOUBLE PRECISION   RHO
-*       ..
-*       .. Array Arguments ..
-*       INTEGER            INDXQ( * ), IWORK( * )
-*       DOUBLE PRECISION   D( * ), Q( LDQ, * ), WORK( * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DLAED1 computes the updated eigensystem of a diagonal
-*> matrix after modification by a rank-one symmetric matrix.  This
-*> routine is used only for the eigenproblem which requires all
-*> eigenvalues and eigenvectors of a tridiagonal matrix.  DLAED7 handles
-*> the case in which eigenvalues only or eigenvalues and eigenvectors
-*> of a full symmetric matrix (which was reduced to tridiagonal form)
-*> are desired.
-*>
-*>   T = Q(in) ( D(in) + RHO * Z*Z**T ) Q**T(in) = Q(out) * D(out) * Q**T(out)
-*>
-*>    where Z = Q**T*u, u is a vector of length N with ones in the
-*>    CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
-*>
-*>    The eigenvectors of the original matrix are stored in Q, and the
-*>    eigenvalues are in D.  The algorithm consists of three stages:
-*>
-*>       The first stage consists of deflating the size of the problem
-*>       when there are multiple eigenvalues or if there is a zero in
-*>       the Z vector.  For each such occurrence the dimension of the
-*>       secular equation problem is reduced by one.  This stage is
-*>       performed by the routine DLAED2.
-*>
-*>       The second stage consists of calculating the updated
-*>       eigenvalues. This is done by finding the roots of the secular
-*>       equation via the routine DLAED4 (as called by DLAED3).
-*>       This routine also calculates the eigenvectors of the current
-*>       problem.
-*>
-*>       The final stage consists of computing the updated eigenvectors
-*>       directly using the updated eigenvalues.  The eigenvectors for
-*>       the current problem are multiplied with the eigenvectors from
-*>       the overall problem.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>         The dimension of the symmetric tridiagonal matrix.  N >= 0.
-*> \endverbatim
-*>
-*> \param[in,out] D
-*> \verbatim
-*>          D is DOUBLE PRECISION array, dimension (N)
-*>         On entry, the eigenvalues of the rank-1-perturbed matrix.
-*>         On exit, the eigenvalues of the repaired matrix.
-*> \endverbatim
-*>
-*> \param[in,out] Q
-*> \verbatim
-*>          Q is DOUBLE PRECISION array, dimension (LDQ,N)
-*>         On entry, the eigenvectors of the rank-1-perturbed matrix.
-*>         On exit, the eigenvectors of the repaired tridiagonal matrix.
-*> \endverbatim
-*>
-*> \param[in] LDQ
-*> \verbatim
-*>          LDQ is INTEGER
-*>         The leading dimension of the array Q.  LDQ >= max(1,N).
-*> \endverbatim
-*>
-*> \param[in,out] INDXQ
-*> \verbatim
-*>          INDXQ is INTEGER array, dimension (N)
-*>         On entry, the permutation which separately sorts the two
-*>         subproblems in D into ascending order.
-*>         On exit, the permutation which will reintegrate the
-*>         subproblems back into sorted order,
-*>         i.e. D( INDXQ( I = 1, N ) ) will be in ascending order.
-*> \endverbatim
-*>
-*> \param[in] RHO
-*> \verbatim
-*>          RHO is DOUBLE PRECISION
-*>         The subdiagonal entry used to create the rank-1 modification.
-*> \endverbatim
-*>
-*> \param[in] CUTPNT
-*> \verbatim
-*>          CUTPNT is INTEGER
-*>         The location of the last eigenvalue in the leading sub-matrix.
-*>         min(1,N) <= CUTPNT <= N/2.
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is DOUBLE PRECISION array, dimension (4*N + N**2)
-*> \endverbatim
-*>
-*> \param[out] IWORK
-*> \verbatim
-*>          IWORK is INTEGER array, dimension (4*N)
-*> \endverbatim
-*>
-*> \param[out] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>          = 0:  successful exit.
-*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
-*>          > 0:  if INFO = 1, an eigenvalue did not converge
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup auxOTHERcomputational
-*
-*> \par Contributors:
-*  ==================
-*>
-*> Jeff Rutter, Computer Science Division, University of California
-*> at Berkeley, USA \n
-*>  Modified by Francoise Tisseur, University of Tennessee
-*>
-*  =====================================================================
-      SUBROUTINE DLAED1( N, D, Q, LDQ, INDXQ, RHO, CUTPNT, WORK, IWORK,
-     $                   INFO )
-*
-*  -- LAPACK computational routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      INTEGER            CUTPNT, INFO, LDQ, N
-      DOUBLE PRECISION   RHO
-*     ..
-*     .. Array Arguments ..
-      INTEGER            INDXQ( * ), IWORK( * )
-      DOUBLE PRECISION   D( * ), Q( LDQ, * ), WORK( * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      INTEGER            COLTYP, I, IDLMDA, INDX, INDXC, INDXP, IQ2, IS,
-     $                   IW, IZ, K, N1, N2, ZPP1
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           DCOPY, DLAED2, DLAED3, DLAMRG, XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          MAX, MIN
-*     ..
-*     .. Executable Statements ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-*
-      IF( N.LT.0 ) THEN
-         INFO = -1
-      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
-         INFO = -4
-      ELSE IF( MIN( 1, N / 2 ).GT.CUTPNT .OR. ( N / 2 ).LT.CUTPNT ) THEN
-         INFO = -7
-      END IF
-      IF( INFO.NE.0 ) THEN
-         CALL XERBLA( 'DLAED1', -INFO )
-         RETURN
-      END IF
-*
-*     Quick return if possible
-*
-      IF( N.EQ.0 )
-     $   RETURN
-*
-*     The following values are integer pointers which indicate
-*     the portion of the workspace
-*     used by a particular array in DLAED2 and DLAED3.
-*
-      IZ = 1
-      IDLMDA = IZ + N
-      IW = IDLMDA + N
-      IQ2 = IW + N
-*
-      INDX = 1
-      INDXC = INDX + N
-      COLTYP = INDXC + N
-      INDXP = COLTYP + N
-*
-*
-*     Form the z-vector which consists of the last row of Q_1 and the
-*     first row of Q_2.
-*
-      CALL DCOPY( CUTPNT, Q( CUTPNT, 1 ), LDQ, WORK( IZ ), 1 )
-      ZPP1 = CUTPNT + 1
-      CALL DCOPY( N-CUTPNT, Q( ZPP1, ZPP1 ), LDQ, WORK( IZ+CUTPNT ), 1 )
-*
-*     Deflate eigenvalues.
-*
-      CALL DLAED2( K, N, CUTPNT, D, Q, LDQ, INDXQ, RHO, WORK( IZ ),
-     $             WORK( IDLMDA ), WORK( IW ), WORK( IQ2 ),
-     $             IWORK( INDX ), IWORK( INDXC ), IWORK( INDXP ),
-     $             IWORK( COLTYP ), INFO )
-*
-      IF( INFO.NE.0 )
-     $   GO TO 20
-*
-*     Solve Secular Equation.
-*
-      IF( K.NE.0 ) THEN
-         IS = ( IWORK( COLTYP )+IWORK( COLTYP+1 ) )*CUTPNT +
-     $        ( IWORK( COLTYP+1 )+IWORK( COLTYP+2 ) )*( N-CUTPNT ) + IQ2
-         CALL DLAED3( K, N, CUTPNT, D, Q, LDQ, RHO, WORK( IDLMDA ),
-     $                WORK( IQ2 ), IWORK( INDXC ), IWORK( COLTYP ),
-     $                WORK( IW ), WORK( IS ), INFO )
-         IF( INFO.NE.0 )
-     $      GO TO 20
-*
-*     Prepare the INDXQ sorting permutation.
-*
-         N1 = K
-         N2 = N - K
-         CALL DLAMRG( N1, N2, D, 1, -1, INDXQ )
-      ELSE
-         DO 10 I = 1, N
-            INDXQ( I ) = I
-   10    CONTINUE
-      END IF
-*
-   20 CONTINUE
-      RETURN
-*
-*     End of DLAED1
-*
-      END
--- a/lib/linalg/dlaed2.cpp
+++ b/lib/linalg/dlaed2.cpp
@ -0,0 +1,263 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+static doublereal c_b3 = -1.;
+static integer c__1 = 1;
+int dlaed2_(integer *k, integer *n, integer *n1, doublereal *d__, doublereal *q, integer *ldq,
+            integer *indxq, doublereal *rho, doublereal *z__, doublereal *dlamda, doublereal *w,
+            doublereal *q2, integer *indx, integer *indxc, integer *indxp, integer *coltyp,
+            integer *info)
+{
+    integer q_dim1, q_offset, i__1, i__2;
+    doublereal d__1, d__2, d__3, d__4;
+    double sqrt(doublereal);
+    doublereal c__;
+    integer i__, j;
+    doublereal s, t;
+    integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1;
+    doublereal eps, tau, tol;
+    integer psm[4], imax, jmax;
+    extern int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *,
+                     doublereal *);
+    integer ctot[4];
+    extern int dscal_(integer *, doublereal *, doublereal *, integer *),
+        dcopy_(integer *, doublereal *, integer *, doublereal *, integer *);
+    extern doublereal dlapy2_(doublereal *, doublereal *), dlamch_(char *, ftnlen);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern int dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *),
+        dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *,
+                ftnlen),
+        xerbla_(char *, integer *, ftnlen);
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --indxq;
+    --z__;
+    --dlamda;
+    --w;
+    --q2;
+    --indx;
+    --indxc;
+    --indxp;
+    --coltyp;
+    *info = 0;
+    if (*n < 0) {
+        *info = -2;
+    } else if (*ldq < max(1, *n)) {
+        *info = -6;
+    } else {
+        i__1 = 1, i__2 = *n / 2;
+        if (min(i__1, i__2) > *n1 || *n / 2 < *n1) {
+            *info = -3;
+        }
+    }
+    if (*info != 0) {
+        i__1 = -(*info);
+        xerbla_((char *)"DLAED2", &i__1, (ftnlen)6);
+        return 0;
+    }
+    if (*n == 0) {
+        return 0;
+    }
+    n2 = *n - *n1;
+    n1p1 = *n1 + 1;
+    if (*rho < 0.) {
+        dscal_(&n2, &c_b3, &z__[n1p1], &c__1);
+    }
+    t = 1. / sqrt(2.);
+    dscal_(n, &t, &z__[1], &c__1);
+    *rho = (d__1 = *rho * 2., abs(d__1));
+    i__1 = *n;
+    for (i__ = n1p1; i__ <= i__1; ++i__) {
+        indxq[i__] += *n1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+        dlamda[i__] = d__[indxq[i__]];
+    }
+    dlamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]);
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+        indx[i__] = indxq[indxc[i__]];
+    }
+    imax = idamax_(n, &z__[1], &c__1);
+    jmax = idamax_(n, &d__[1], &c__1);
+    eps = dlamch_((char *)"Epsilon", (ftnlen)7);
+    d__3 = (d__1 = d__[jmax], abs(d__1)), d__4 = (d__2 = z__[imax], abs(d__2));
+    tol = eps * 8. * max(d__3, d__4);
+    if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) {
+        *k = 0;
+        iq2 = 1;
+        i__1 = *n;
+        for (j = 1; j <= i__1; ++j) {
+            i__ = indx[j];
+            dcopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
+            dlamda[j] = d__[i__];
+            iq2 += *n;
+        }
+        dlacpy_((char *)"A", n, n, &q2[1], n, &q[q_offset], ldq, (ftnlen)1);
+        dcopy_(n, &dlamda[1], &c__1, &d__[1], &c__1);
+        goto L190;
+    }
+    i__1 = *n1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+        coltyp[i__] = 1;
+    }
+    i__1 = *n;
+    for (i__ = n1p1; i__ <= i__1; ++i__) {
+        coltyp[i__] = 3;
+    }
+    *k = 0;
+    k2 = *n + 1;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+        nj = indx[j];
+        if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) {
+            --k2;
+            coltyp[nj] = 4;
+            indxp[k2] = nj;
+            if (j == *n) {
+                goto L100;
+            }
+        } else {
+            pj = nj;
+            goto L80;
+        }
+    }
+L80:
+    ++j;
+    nj = indx[j];
+    if (j > *n) {
+        goto L100;
+    }
+    if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) {
+        --k2;
+        coltyp[nj] = 4;
+        indxp[k2] = nj;
+    } else {
+        s = z__[pj];
+        c__ = z__[nj];
+        tau = dlapy2_(&c__, &s);
+        t = d__[nj] - d__[pj];
+        c__ /= tau;
+        s = -s / tau;
+        if ((d__1 = t * c__ * s, abs(d__1)) <= tol) {
+            z__[nj] = tau;
+            z__[pj] = 0.;
+            if (coltyp[nj] != coltyp[pj]) {
+                coltyp[nj] = 2;
+            }
+            coltyp[pj] = 4;
+            drot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, &c__, &s);
+            d__1 = c__;
+            d__2 = s;
+            t = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2);
+            d__1 = s;
+            d__2 = c__;
+            d__[nj] = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2);
+            d__[pj] = t;
+            --k2;
+            i__ = 1;
+        L90:
+            if (k2 + i__ <= *n) {
+                if (d__[pj] < d__[indxp[k2 + i__]]) {
+                    indxp[k2 + i__ - 1] = indxp[k2 + i__];
+                    indxp[k2 + i__] = pj;
+                    ++i__;
+                    goto L90;
+                } else {
+                    indxp[k2 + i__ - 1] = pj;
+                }
+            } else {
+                indxp[k2 + i__ - 1] = pj;
+            }
+            pj = nj;
+        } else {
+            ++(*k);
+            dlamda[*k] = d__[pj];
+            w[*k] = z__[pj];
+            indxp[*k] = pj;
+            pj = nj;
+        }
+    }
+    goto L80;
+L100:
+    ++(*k);
+    dlamda[*k] = d__[pj];
+    w[*k] = z__[pj];
+    indxp[*k] = pj;
+    for (j = 1; j <= 4; ++j) {
+        ctot[j - 1] = 0;
+    }
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+        ct = coltyp[j];
+        ++ctot[ct - 1];
+    }
+    psm[0] = 1;
+    psm[1] = ctot[0] + 1;
+    psm[2] = psm[1] + ctot[1];
+    psm[3] = psm[2] + ctot[2];
+    *k = *n - ctot[3];
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+        js = indxp[j];
+        ct = coltyp[js];
+        indx[psm[ct - 1]] = js;
+        indxc[psm[ct - 1]] = j;
+        ++psm[ct - 1];
+    }
+    i__ = 1;
+    iq1 = 1;
+    iq2 = (ctot[0] + ctot[1]) * *n1 + 1;
+    i__1 = ctot[0];
+    for (j = 1; j <= i__1; ++j) {
+        js = indx[i__];
+        dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
+        z__[i__] = d__[js];
+        ++i__;
+        iq1 += *n1;
+    }
+    i__1 = ctot[1];
+    for (j = 1; j <= i__1; ++j) {
+        js = indx[i__];
+        dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
+        dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
+        z__[i__] = d__[js];
+        ++i__;
+        iq1 += *n1;
+        iq2 += n2;
+    }
+    i__1 = ctot[2];
+    for (j = 1; j <= i__1; ++j) {
+        js = indx[i__];
+        dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
+        z__[i__] = d__[js];
+        ++i__;
+        iq2 += n2;
+    }
+    iq1 = iq2;
+    i__1 = ctot[3];
+    for (j = 1; j <= i__1; ++j) {
+        js = indx[i__];
+        dcopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
+        iq2 += *n;
+        z__[i__] = d__[js];
+        ++i__;
+    }
+    if (*k < *n) {
+        dlacpy_((char *)"A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq, (ftnlen)1);
+        i__1 = *n - *k;
+        dcopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1);
+    }
+    for (j = 1; j <= 4; ++j) {
+        coltyp[j] = ctot[j - 1];
+    }
+L190:
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dlaed2.f
+++ b/lib/linalg/dlaed2.f
@ -1,536 +0,0 @@
-*> \brief \b DLAED2 used by DSTEDC. Merges eigenvalues and deflates secular equation. Used when the original matrix is tridiagonal.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DLAED2 + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed2.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed2.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed2.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
-*                          Q2, INDX, INDXC, INDXP, COLTYP, INFO )
-*
-*       .. Scalar Arguments ..
-*       INTEGER            INFO, K, LDQ, N, N1
-*       DOUBLE PRECISION   RHO
-*       ..
-*       .. Array Arguments ..
-*       INTEGER            COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),
-*      $                   INDXQ( * )
-*       DOUBLE PRECISION   D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
-*      $                   W( * ), Z( * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DLAED2 merges the two sets of eigenvalues together into a single
-*> sorted set.  Then it tries to deflate the size of the problem.
-*> There are two ways in which deflation can occur:  when two or more
-*> eigenvalues are close together or if there is a tiny entry in the
-*> Z vector.  For each such occurrence the order of the related secular
-*> equation problem is reduced by one.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[out] K
-*> \verbatim
-*>          K is INTEGER
-*>         The number of non-deflated eigenvalues, and the order of the
-*>         related secular equation. 0 <= K <=N.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>         The dimension of the symmetric tridiagonal matrix.  N >= 0.
-*> \endverbatim
-*>
-*> \param[in] N1
-*> \verbatim
-*>          N1 is INTEGER
-*>         The location of the last eigenvalue in the leading sub-matrix.
-*>         min(1,N) <= N1 <= N/2.
-*> \endverbatim
-*>
-*> \param[in,out] D
-*> \verbatim
-*>          D is DOUBLE PRECISION array, dimension (N)
-*>         On entry, D contains the eigenvalues of the two submatrices to
-*>         be combined.
-*>         On exit, D contains the trailing (N-K) updated eigenvalues
-*>         (those which were deflated) sorted into increasing order.
-*> \endverbatim
-*>
-*> \param[in,out] Q
-*> \verbatim
-*>          Q is DOUBLE PRECISION array, dimension (LDQ, N)
-*>         On entry, Q contains the eigenvectors of two submatrices in
-*>         the two square blocks with corners at (1,1), (N1,N1)
-*>         and (N1+1, N1+1), (N,N).
-*>         On exit, Q contains the trailing (N-K) updated eigenvectors
-*>         (those which were deflated) in its last N-K columns.
-*> \endverbatim
-*>
-*> \param[in] LDQ
-*> \verbatim
-*>          LDQ is INTEGER
-*>         The leading dimension of the array Q.  LDQ >= max(1,N).
-*> \endverbatim
-*>
-*> \param[in,out] INDXQ
-*> \verbatim
-*>          INDXQ is INTEGER array, dimension (N)
-*>         The permutation which separately sorts the two sub-problems
-*>         in D into ascending order.  Note that elements in the second
-*>         half of this permutation must first have N1 added to their
-*>         values. Destroyed on exit.
-*> \endverbatim
-*>
-*> \param[in,out] RHO
-*> \verbatim
-*>          RHO is DOUBLE PRECISION
-*>         On entry, the off-diagonal element associated with the rank-1
-*>         cut which originally split the two submatrices which are now
-*>         being recombined.
-*>         On exit, RHO has been modified to the value required by
-*>         DLAED3.
-*> \endverbatim
-*>
-*> \param[in] Z
-*> \verbatim
-*>          Z is DOUBLE PRECISION array, dimension (N)
-*>         On entry, Z contains the updating vector (the last
-*>         row of the first sub-eigenvector matrix and the first row of
-*>         the second sub-eigenvector matrix).
-*>         On exit, the contents of Z have been destroyed by the updating
-*>         process.
-*> \endverbatim
-*>
-*> \param[out] DLAMDA
-*> \verbatim
-*>          DLAMDA is DOUBLE PRECISION array, dimension (N)
-*>         A copy of the first K eigenvalues which will be used by
-*>         DLAED3 to form the secular equation.
-*> \endverbatim
-*>
-*> \param[out] W
-*> \verbatim
-*>          W is DOUBLE PRECISION array, dimension (N)
-*>         The first k values of the final deflation-altered z-vector
-*>         which will be passed to DLAED3.
-*> \endverbatim
-*>
-*> \param[out] Q2
-*> \verbatim
-*>          Q2 is DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2)
-*>         A copy of the first K eigenvectors which will be used by
-*>         DLAED3 in a matrix multiply (DGEMM) to solve for the new
-*>         eigenvectors.
-*> \endverbatim
-*>
-*> \param[out] INDX
-*> \verbatim
-*>          INDX is INTEGER array, dimension (N)
-*>         The permutation used to sort the contents of DLAMDA into
-*>         ascending order.
-*> \endverbatim
-*>
-*> \param[out] INDXC
-*> \verbatim
-*>          INDXC is INTEGER array, dimension (N)
-*>         The permutation used to arrange the columns of the deflated
-*>         Q matrix into three groups:  the first group contains non-zero
-*>         elements only at and above N1, the second contains
-*>         non-zero elements only below N1, and the third is dense.
-*> \endverbatim
-*>
-*> \param[out] INDXP
-*> \verbatim
-*>          INDXP is INTEGER array, dimension (N)
-*>         The permutation used to place deflated values of D at the end
-*>         of the array.  INDXP(1:K) points to the nondeflated D-values
-*>         and INDXP(K+1:N) points to the deflated eigenvalues.
-*> \endverbatim
-*>
-*> \param[out] COLTYP
-*> \verbatim
-*>          COLTYP is INTEGER array, dimension (N)
-*>         During execution, a label which will indicate which of the
-*>         following types a column in the Q2 matrix is:
-*>         1 : non-zero in the upper half only;
-*>         2 : dense;
-*>         3 : non-zero in the lower half only;
-*>         4 : deflated.
-*>         On exit, COLTYP(i) is the number of columns of type i,
-*>         for i=1 to 4 only.
-*> \endverbatim
-*>
-*> \param[out] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>          = 0:  successful exit.
-*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup auxOTHERcomputational
-*
-*> \par Contributors:
-*  ==================
-*>
-*> Jeff Rutter, Computer Science Division, University of California
-*> at Berkeley, USA \n
-*>  Modified by Francoise Tisseur, University of Tennessee
-*>
-*  =====================================================================
-      SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
-     $                   Q2, INDX, INDXC, INDXP, COLTYP, INFO )
-*
-*  -- LAPACK computational routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      INTEGER            INFO, K, LDQ, N, N1
-      DOUBLE PRECISION   RHO
-*     ..
-*     .. Array Arguments ..
-      INTEGER            COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),
-     $                   INDXQ( * )
-      DOUBLE PRECISION   D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
-     $                   W( * ), Z( * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION   MONE, ZERO, ONE, TWO, EIGHT
-      PARAMETER          ( MONE = -1.0D0, ZERO = 0.0D0, ONE = 1.0D0,
-     $                   TWO = 2.0D0, EIGHT = 8.0D0 )
-*     ..
-*     .. Local Arrays ..
-      INTEGER            CTOT( 4 ), PSM( 4 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            CT, I, IMAX, IQ1, IQ2, J, JMAX, JS, K2, N1P1,
-     $                   N2, NJ, PJ
-      DOUBLE PRECISION   C, EPS, S, T, TAU, TOL
-*     ..
-*     .. External Functions ..
-      INTEGER            IDAMAX
-      DOUBLE PRECISION   DLAMCH, DLAPY2
-      EXTERNAL           IDAMAX, DLAMCH, DLAPY2
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           DCOPY, DLACPY, DLAMRG, DROT, DSCAL, XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, MAX, MIN, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-*
-      IF( N.LT.0 ) THEN
-         INFO = -2
-      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
-         INFO = -6
-      ELSE IF( MIN( 1, ( N / 2 ) ).GT.N1 .OR. ( N / 2 ).LT.N1 ) THEN
-         INFO = -3
-      END IF
-      IF( INFO.NE.0 ) THEN
-         CALL XERBLA( 'DLAED2', -INFO )
-         RETURN
-      END IF
-*
-*     Quick return if possible
-*
-      IF( N.EQ.0 )
-     $   RETURN
-*
-      N2 = N - N1
-      N1P1 = N1 + 1
-*
-      IF( RHO.LT.ZERO ) THEN
-         CALL DSCAL( N2, MONE, Z( N1P1 ), 1 )
-      END IF
-*
-*     Normalize z so that norm(z) = 1.  Since z is the concatenation of
-*     two normalized vectors, norm2(z) = sqrt(2).
-*
-      T = ONE / SQRT( TWO )
-      CALL DSCAL( N, T, Z, 1 )
-*
-*     RHO = ABS( norm(z)**2 * RHO )
-*
-      RHO = ABS( TWO*RHO )
-*
-*     Sort the eigenvalues into increasing order
-*
-      DO 10 I = N1P1, N
-         INDXQ( I ) = INDXQ( I ) + N1
-   10 CONTINUE
-*
-*     re-integrate the deflated parts from the last pass
-*
-      DO 20 I = 1, N
-         DLAMDA( I ) = D( INDXQ( I ) )
-   20 CONTINUE
-      CALL DLAMRG( N1, N2, DLAMDA, 1, 1, INDXC )
-      DO 30 I = 1, N
-         INDX( I ) = INDXQ( INDXC( I ) )
-   30 CONTINUE
-*
-*     Calculate the allowable deflation tolerance
-*
-      IMAX = IDAMAX( N, Z, 1 )
-      JMAX = IDAMAX( N, D, 1 )
-      EPS = DLAMCH( 'Epsilon' )
-      TOL = EIGHT*EPS*MAX( ABS( D( JMAX ) ), ABS( Z( IMAX ) ) )
-*
-*     If the rank-1 modifier is small enough, no more needs to be done
-*     except to reorganize Q so that its columns correspond with the
-*     elements in D.
-*
-      IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN
-         K = 0
-         IQ2 = 1
-         DO 40 J = 1, N
-            I = INDX( J )
-            CALL DCOPY( N, Q( 1, I ), 1, Q2( IQ2 ), 1 )
-            DLAMDA( J ) = D( I )
-            IQ2 = IQ2 + N
-   40    CONTINUE
-         CALL DLACPY( 'A', N, N, Q2, N, Q, LDQ )
-         CALL DCOPY( N, DLAMDA, 1, D, 1 )
-         GO TO 190
-      END IF
-*
-*     If there are multiple eigenvalues then the problem deflates.  Here
-*     the number of equal eigenvalues are found.  As each equal
-*     eigenvalue is found, an elementary reflector is computed to rotate
-*     the corresponding eigensubspace so that the corresponding
-*     components of Z are zero in this new basis.
-*
-      DO 50 I = 1, N1
-         COLTYP( I ) = 1
-   50 CONTINUE
-      DO 60 I = N1P1, N
-         COLTYP( I ) = 3
-   60 CONTINUE
-*
-*
-      K = 0
-      K2 = N + 1
-      DO 70 J = 1, N
-         NJ = INDX( J )
-         IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
-*
-*           Deflate due to small z component.
-*
-            K2 = K2 - 1
-            COLTYP( NJ ) = 4
-            INDXP( K2 ) = NJ
-            IF( J.EQ.N )
-     $         GO TO 100
-         ELSE
-            PJ = NJ
-            GO TO 80
-         END IF
-   70 CONTINUE
-   80 CONTINUE
-      J = J + 1
-      NJ = INDX( J )
-      IF( J.GT.N )
-     $   GO TO 100
-      IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
-*
-*        Deflate due to small z component.
-*
-         K2 = K2 - 1
-         COLTYP( NJ ) = 4
-         INDXP( K2 ) = NJ
-      ELSE
-*
-*        Check if eigenvalues are close enough to allow deflation.
-*
-         S = Z( PJ )
-         C = Z( NJ )
-*
-*        Find sqrt(a**2+b**2) without overflow or
-*        destructive underflow.
-*
-         TAU = DLAPY2( C, S )
-         T = D( NJ ) - D( PJ )
-         C = C / TAU
-         S = -S / TAU
-         IF( ABS( T*C*S ).LE.TOL ) THEN
-*
-*           Deflation is possible.
-*
-            Z( NJ ) = TAU
-            Z( PJ ) = ZERO
-            IF( COLTYP( NJ ).NE.COLTYP( PJ ) )
-     $         COLTYP( NJ ) = 2
-            COLTYP( PJ ) = 4
-            CALL DROT( N, Q( 1, PJ ), 1, Q( 1, NJ ), 1, C, S )
-            T = D( PJ )*C**2 + D( NJ )*S**2
-            D( NJ ) = D( PJ )*S**2 + D( NJ )*C**2
-            D( PJ ) = T
-            K2 = K2 - 1
-            I = 1
-   90       CONTINUE
-            IF( K2+I.LE.N ) THEN
-               IF( D( PJ ).LT.D( INDXP( K2+I ) ) ) THEN
-                  INDXP( K2+I-1 ) = INDXP( K2+I )
-                  INDXP( K2+I ) = PJ
-                  I = I + 1
-                  GO TO 90
-               ELSE
-                  INDXP( K2+I-1 ) = PJ
-               END IF
-            ELSE
-               INDXP( K2+I-1 ) = PJ
-            END IF
-            PJ = NJ
-         ELSE
-            K = K + 1
-            DLAMDA( K ) = D( PJ )
-            W( K ) = Z( PJ )
-            INDXP( K ) = PJ
-            PJ = NJ
-         END IF
-      END IF
-      GO TO 80
-  100 CONTINUE
-*
-*     Record the last eigenvalue.
-*
-      K = K + 1
-      DLAMDA( K ) = D( PJ )
-      W( K ) = Z( PJ )
-      INDXP( K ) = PJ
-*
-*     Count up the total number of the various types of columns, then
-*     form a permutation which positions the four column types into
-*     four uniform groups (although one or more of these groups may be
-*     empty).
-*
-      DO 110 J = 1, 4
-         CTOT( J ) = 0
-  110 CONTINUE
-      DO 120 J = 1, N
-         CT = COLTYP( J )
-         CTOT( CT ) = CTOT( CT ) + 1
-  120 CONTINUE
-*
-*     PSM(*) = Position in SubMatrix (of types 1 through 4)
-*
-      PSM( 1 ) = 1
-      PSM( 2 ) = 1 + CTOT( 1 )
-      PSM( 3 ) = PSM( 2 ) + CTOT( 2 )
-      PSM( 4 ) = PSM( 3 ) + CTOT( 3 )
-      K = N - CTOT( 4 )
-*
-*     Fill out the INDXC array so that the permutation which it induces
-*     will place all type-1 columns first, all type-2 columns next,
-*     then all type-3's, and finally all type-4's.
-*
-      DO 130 J = 1, N
-         JS = INDXP( J )
-         CT = COLTYP( JS )
-         INDX( PSM( CT ) ) = JS
-         INDXC( PSM( CT ) ) = J
-         PSM( CT ) = PSM( CT ) + 1
-  130 CONTINUE
-*
-*     Sort the eigenvalues and corresponding eigenvectors into DLAMDA
-*     and Q2 respectively.  The eigenvalues/vectors which were not
-*     deflated go into the first K slots of DLAMDA and Q2 respectively,
-*     while those which were deflated go into the last N - K slots.
-*
-      I = 1
-      IQ1 = 1
-      IQ2 = 1 + ( CTOT( 1 )+CTOT( 2 ) )*N1
-      DO 140 J = 1, CTOT( 1 )
-         JS = INDX( I )
-         CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )
-         Z( I ) = D( JS )
-         I = I + 1
-         IQ1 = IQ1 + N1
-  140 CONTINUE
-*
-      DO 150 J = 1, CTOT( 2 )
-         JS = INDX( I )
-         CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )
-         CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )
-         Z( I ) = D( JS )
-         I = I + 1
-         IQ1 = IQ1 + N1
-         IQ2 = IQ2 + N2
-  150 CONTINUE
-*
-      DO 160 J = 1, CTOT( 3 )
-         JS = INDX( I )
-         CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )
-         Z( I ) = D( JS )
-         I = I + 1
-         IQ2 = IQ2 + N2
-  160 CONTINUE
-*
-      IQ1 = IQ2
-      DO 170 J = 1, CTOT( 4 )
-         JS = INDX( I )
-         CALL DCOPY( N, Q( 1, JS ), 1, Q2( IQ2 ), 1 )
-         IQ2 = IQ2 + N
-         Z( I ) = D( JS )
-         I = I + 1
-  170 CONTINUE
-*
-*     The deflated eigenvalues and their corresponding vectors go back
-*     into the last N - K slots of D and Q respectively.
-*
-      IF( K.LT.N ) THEN
-         CALL DLACPY( 'A', N, CTOT( 4 ), Q2( IQ1 ), N,
-     $                Q( 1, K+1 ), LDQ )
-         CALL DCOPY( N-K, Z( K+1 ), 1, D( K+1 ), 1 )
-      END IF
-*
-*     Copy CTOT into COLTYP for referencing in DLAED3.
-*
-      DO 180 J = 1, 4
-         COLTYP( J ) = CTOT( J )
-  180 CONTINUE
-*
-  190 CONTINUE
-      RETURN
-*
-*     End of DLAED2
-*
-      END
--- a/lib/linalg/dlaed3.cpp
+++ b/lib/linalg/dlaed3.cpp
@ -0,0 +1,138 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+static integer c__1 = 1;
+static doublereal c_b22 = 1.;
+static doublereal c_b23 = 0.;
+int dlaed3_(integer *k, integer *n, integer *n1, doublereal *d__, doublereal *q, integer *ldq,
+            doublereal *rho, doublereal *dlamda, doublereal *q2, integer *indx, integer *ctot,
+            doublereal *w, doublereal *s, integer *info)
+{
+    integer q_dim1, q_offset, i__1, i__2;
+    doublereal d__1;
+    double sqrt(doublereal), d_lmp_sign(doublereal *, doublereal *);
+    integer i__, j, n2, n12, ii, n23, iq2;
+    doublereal temp;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
+                      integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
+                      ftnlen, ftnlen),
+        dcopy_(integer *, doublereal *, integer *, doublereal *, integer *),
+        dlaed4_(integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+                doublereal *, integer *);
+    extern doublereal dlamc3_(doublereal *, doublereal *);
+    extern int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *,
+                       integer *, ftnlen),
+        dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *,
+                ftnlen),
+        xerbla_(char *, integer *, ftnlen);
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --dlamda;
+    --q2;
+    --indx;
+    --ctot;
+    --w;
+    --s;
+    *info = 0;
+    if (*k < 0) {
+        *info = -1;
+    } else if (*n < *k) {
+        *info = -2;
+    } else if (*ldq < max(1, *n)) {
+        *info = -6;
+    }
+    if (*info != 0) {
+        i__1 = -(*info);
+        xerbla_((char *)"DLAED3", &i__1, (ftnlen)6);
+        return 0;
+    }
+    if (*k == 0) {
+        return 0;
+    }
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+        dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__];
+    }
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+        dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], info);
+        if (*info != 0) {
+            goto L120;
+        }
+    }
+    if (*k == 1) {
+        goto L110;
+    }
+    if (*k == 2) {
+        i__1 = *k;
+        for (j = 1; j <= i__1; ++j) {
+            w[1] = q[j * q_dim1 + 1];
+            w[2] = q[j * q_dim1 + 2];
+            ii = indx[1];
+            q[j * q_dim1 + 1] = w[ii];
+            ii = indx[2];
+            q[j * q_dim1 + 2] = w[ii];
+        }
+        goto L110;
+    }
+    dcopy_(k, &w[1], &c__1, &s[1], &c__1);
+    i__1 = *ldq + 1;
+    dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1);
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+        i__2 = j - 1;
+        for (i__ = 1; i__ <= i__2; ++i__) {
+            w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
+        }
+        i__2 = *k;
+        for (i__ = j + 1; i__ <= i__2; ++i__) {
+            w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
+        }
+    }
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+        d__1 = sqrt(-w[i__]);
+        w[i__] = d_lmp_sign(&d__1, &s[i__]);
+    }
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+        i__2 = *k;
+        for (i__ = 1; i__ <= i__2; ++i__) {
+            s[i__] = w[i__] / q[i__ + j * q_dim1];
+        }
+        temp = dnrm2_(k, &s[1], &c__1);
+        i__2 = *k;
+        for (i__ = 1; i__ <= i__2; ++i__) {
+            ii = indx[i__];
+            q[i__ + j * q_dim1] = s[ii] / temp;
+        }
+    }
+L110:
+    n2 = *n - *n1;
+    n12 = ctot[1] + ctot[2];
+    n23 = ctot[2] + ctot[3];
+    dlacpy_((char *)"A", &n23, k, &q[ctot[1] + 1 + q_dim1], ldq, &s[1], &n23, (ftnlen)1);
+    iq2 = *n1 * n12 + 1;
+    if (n23 != 0) {
+        dgemm_((char *)"N", (char *)"N", &n2, k, &n23, &c_b22, &q2[iq2], &n2, &s[1], &n23, &c_b23,
+               &q[*n1 + 1 + q_dim1], ldq, (ftnlen)1, (ftnlen)1);
+    } else {
+        dlaset_((char *)"A", &n2, k, &c_b23, &c_b23, &q[*n1 + 1 + q_dim1], ldq, (ftnlen)1);
+    }
+    dlacpy_((char *)"A", &n12, k, &q[q_offset], ldq, &s[1], &n12, (ftnlen)1);
+    if (n12 != 0) {
+        dgemm_((char *)"N", (char *)"N", n1, k, &n12, &c_b22, &q2[1], n1, &s[1], &n12, &c_b23, &q[q_offset], ldq,
+               (ftnlen)1, (ftnlen)1);
+    } else {
+        dlaset_((char *)"A", n1, k, &c_b23, &c_b23, &q[q_dim1 + 1], ldq, (ftnlen)1);
+    }
+L120:
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dlaed3.f
+++ b/lib/linalg/dlaed3.f
@ -1,350 +0,0 @@
-*> \brief \b DLAED3 used by DSTEDC. Finds the roots of the secular equation and updates the eigenvectors. Used when the original matrix is tridiagonal.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DLAED3 + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed3.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed3.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed3.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,
-*                          CTOT, W, S, INFO )
-*
-*       .. Scalar Arguments ..
-*       INTEGER            INFO, K, LDQ, N, N1
-*       DOUBLE PRECISION   RHO
-*       ..
-*       .. Array Arguments ..
-*       INTEGER            CTOT( * ), INDX( * )
-*       DOUBLE PRECISION   D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
-*      $                   S( * ), W( * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DLAED3 finds the roots of the secular equation, as defined by the
-*> values in D, W, and RHO, between 1 and K.  It makes the
-*> appropriate calls to DLAED4 and then updates the eigenvectors by
-*> multiplying the matrix of eigenvectors of the pair of eigensystems
-*> being combined by the matrix of eigenvectors of the K-by-K system
-*> which is solved here.
-*>
-*> This code makes very mild assumptions about floating point
-*> arithmetic. It will work on machines with a guard digit in
-*> add/subtract, or on those binary machines without guard digits
-*> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
-*> It could conceivably fail on hexadecimal or decimal machines
-*> without guard digits, but we know of none.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>          The number of terms in the rational function to be solved by
-*>          DLAED4.  K >= 0.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The number of rows and columns in the Q matrix.
-*>          N >= K (deflation may result in N>K).
-*> \endverbatim
-*>
-*> \param[in] N1
-*> \verbatim
-*>          N1 is INTEGER
-*>          The location of the last eigenvalue in the leading submatrix.
-*>          min(1,N) <= N1 <= N/2.
-*> \endverbatim
-*>
-*> \param[out] D
-*> \verbatim
-*>          D is DOUBLE PRECISION array, dimension (N)
-*>          D(I) contains the updated eigenvalues for
-*>          1 <= I <= K.
-*> \endverbatim
-*>
-*> \param[out] Q
-*> \verbatim
-*>          Q is DOUBLE PRECISION array, dimension (LDQ,N)
-*>          Initially the first K columns are used as workspace.
-*>          On output the columns 1 to K contain
-*>          the updated eigenvectors.
-*> \endverbatim
-*>
-*> \param[in] LDQ
-*> \verbatim
-*>          LDQ is INTEGER
-*>          The leading dimension of the array Q.  LDQ >= max(1,N).
-*> \endverbatim
-*>
-*> \param[in] RHO
-*> \verbatim
-*>          RHO is DOUBLE PRECISION
-*>          The value of the parameter in the rank one update equation.
-*>          RHO >= 0 required.
-*> \endverbatim
-*>
-*> \param[in,out] DLAMDA
-*> \verbatim
-*>          DLAMDA is DOUBLE PRECISION array, dimension (K)
-*>          The first K elements of this array contain the old roots
-*>          of the deflated updating problem.  These are the poles
-*>          of the secular equation. May be changed on output by
-*>          having lowest order bit set to zero on Cray X-MP, Cray Y-MP,
-*>          Cray-2, or Cray C-90, as described above.
-*> \endverbatim
-*>
-*> \param[in] Q2
-*> \verbatim
-*>          Q2 is DOUBLE PRECISION array, dimension (LDQ2*N)
-*>          The first K columns of this matrix contain the non-deflated
-*>          eigenvectors for the split problem.
-*> \endverbatim
-*>
-*> \param[in] INDX
-*> \verbatim
-*>          INDX is INTEGER array, dimension (N)
-*>          The permutation used to arrange the columns of the deflated
-*>          Q matrix into three groups (see DLAED2).
-*>          The rows of the eigenvectors found by DLAED4 must be likewise
-*>          permuted before the matrix multiply can take place.
-*> \endverbatim
-*>
-*> \param[in] CTOT
-*> \verbatim
-*>          CTOT is INTEGER array, dimension (4)
-*>          A count of the total number of the various types of columns
-*>          in Q, as described in INDX.  The fourth column type is any
-*>          column which has been deflated.
-*> \endverbatim
-*>
-*> \param[in,out] W
-*> \verbatim
-*>          W is DOUBLE PRECISION array, dimension (K)
-*>          The first K elements of this array contain the components
-*>          of the deflation-adjusted updating vector. Destroyed on
-*>          output.
-*> \endverbatim
-*>
-*> \param[out] S
-*> \verbatim
-*>          S is DOUBLE PRECISION array, dimension (N1 + 1)*K
-*>          Will contain the eigenvectors of the repaired matrix which
-*>          will be multiplied by the previously accumulated eigenvectors
-*>          to update the system.
-*> \endverbatim
-*>
-*> \param[out] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>          = 0:  successful exit.
-*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
-*>          > 0:  if INFO = 1, an eigenvalue did not converge
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup auxOTHERcomputational
-*
-*> \par Contributors:
-*  ==================
-*>
-*> Jeff Rutter, Computer Science Division, University of California
-*> at Berkeley, USA \n
-*>  Modified by Francoise Tisseur, University of Tennessee
-*>
-*  =====================================================================
-      SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,
-     $                   CTOT, W, S, INFO )
-*
-*  -- LAPACK computational routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      INTEGER            INFO, K, LDQ, N, N1
-      DOUBLE PRECISION   RHO
-*     ..
-*     .. Array Arguments ..
-      INTEGER            CTOT( * ), INDX( * )
-      DOUBLE PRECISION   D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
-     $                   S( * ), W( * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION   ONE, ZERO
-      PARAMETER          ( ONE = 1.0D0, ZERO = 0.0D0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I, II, IQ2, J, N12, N2, N23
-      DOUBLE PRECISION   TEMP
-*     ..
-*     .. External Functions ..
-      DOUBLE PRECISION   DLAMC3, DNRM2
-      EXTERNAL           DLAMC3, DNRM2
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           DCOPY, DGEMM, DLACPY, DLAED4, DLASET, XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          MAX, SIGN, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-*
-      IF( K.LT.0 ) THEN
-         INFO = -1
-      ELSE IF( N.LT.K ) THEN
-         INFO = -2
-      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
-         INFO = -6
-      END IF
-      IF( INFO.NE.0 ) THEN
-         CALL XERBLA( 'DLAED3', -INFO )
-         RETURN
-      END IF
-*
-*     Quick return if possible
-*
-      IF( K.EQ.0 )
-     $   RETURN
-*
-*     Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
-*     be computed with high relative accuracy (barring over/underflow).
-*     This is a problem on machines without a guard digit in
-*     add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
-*     The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
-*     which on any of these machines zeros out the bottommost
-*     bit of DLAMDA(I) if it is 1; this makes the subsequent
-*     subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
-*     occurs. On binary machines with a guard digit (almost all
-*     machines) it does not change DLAMDA(I) at all. On hexadecimal
-*     and decimal machines with a guard digit, it slightly
-*     changes the bottommost bits of DLAMDA(I). It does not account
-*     for hexadecimal or decimal machines without guard digits
-*     (we know of none). We use a subroutine call to compute
-*     2*DLAMBDA(I) to prevent optimizing compilers from eliminating
-*     this code.
-*
-      DO 10 I = 1, K
-         DLAMDA( I ) = DLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I )
-   10 CONTINUE
-*
-      DO 20 J = 1, K
-         CALL DLAED4( K, J, DLAMDA, W, Q( 1, J ), RHO, D( J ), INFO )
-*
-*        If the zero finder fails, the computation is terminated.
-*
-         IF( INFO.NE.0 )
-     $      GO TO 120
-   20 CONTINUE
-*
-      IF( K.EQ.1 )
-     $   GO TO 110
-      IF( K.EQ.2 ) THEN
-         DO 30 J = 1, K
-            W( 1 ) = Q( 1, J )
-            W( 2 ) = Q( 2, J )
-            II = INDX( 1 )
-            Q( 1, J ) = W( II )
-            II = INDX( 2 )
-            Q( 2, J ) = W( II )
-   30    CONTINUE
-         GO TO 110
-      END IF
-*
-*     Compute updated W.
-*
-      CALL DCOPY( K, W, 1, S, 1 )
-*
-*     Initialize W(I) = Q(I,I)
-*
-      CALL DCOPY( K, Q, LDQ+1, W, 1 )
-      DO 60 J = 1, K
-         DO 40 I = 1, J - 1
-            W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )
-   40    CONTINUE
-         DO 50 I = J + 1, K
-            W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )
-   50    CONTINUE
-   60 CONTINUE
-      DO 70 I = 1, K
-         W( I ) = SIGN( SQRT( -W( I ) ), S( I ) )
-   70 CONTINUE
-*
-*     Compute eigenvectors of the modified rank-1 modification.
-*
-      DO 100 J = 1, K
-         DO 80 I = 1, K
-            S( I ) = W( I ) / Q( I, J )
-   80    CONTINUE
-         TEMP = DNRM2( K, S, 1 )
-         DO 90 I = 1, K
-            II = INDX( I )
-            Q( I, J ) = S( II ) / TEMP
-   90    CONTINUE
-  100 CONTINUE
-*
-*     Compute the updated eigenvectors.
-*
-  110 CONTINUE
-*
-      N2 = N - N1
-      N12 = CTOT( 1 ) + CTOT( 2 )
-      N23 = CTOT( 2 ) + CTOT( 3 )
-*
-      CALL DLACPY( 'A', N23, K, Q( CTOT( 1 )+1, 1 ), LDQ, S, N23 )
-      IQ2 = N1*N12 + 1
-      IF( N23.NE.0 ) THEN
-         CALL DGEMM( 'N', 'N', N2, K, N23, ONE, Q2( IQ2 ), N2, S, N23,
-     $               ZERO, Q( N1+1, 1 ), LDQ )
-      ELSE
-         CALL DLASET( 'A', N2, K, ZERO, ZERO, Q( N1+1, 1 ), LDQ )
-      END IF
-*
-      CALL DLACPY( 'A', N12, K, Q, LDQ, S, N12 )
-      IF( N12.NE.0 ) THEN
-         CALL DGEMM( 'N', 'N', N1, K, N12, ONE, Q2, N1, S, N12, ZERO, Q,
-     $               LDQ )
-      ELSE
-         CALL DLASET( 'A', N1, K, ZERO, ZERO, Q( 1, 1 ), LDQ )
-      END IF
-*
-*
-  120 CONTINUE
-      RETURN
-*
-*     End of DLAED3
-*
-      END
--- a/lib/linalg/dlaed4.cpp
+++ b/lib/linalg/dlaed4.cpp
@ -0,0 +1,571 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+int dlaed4_(integer *n, integer *i__, doublereal *d__, doublereal *z__, doublereal *delta,
+            doublereal *rho, doublereal *dlam, integer *info)
+{
+    integer i__1;
+    doublereal d__1;
+    double sqrt(doublereal);
+    doublereal a, b, c__;
+    integer j;
+    doublereal w;
+    integer ii;
+    doublereal dw, zz[3];
+    integer ip1;
+    doublereal del, eta, phi, eps, tau, psi;
+    integer iim1, iip1;
+    doublereal dphi, dpsi;
+    integer iter;
+    doublereal temp, prew, temp1, dltlb, dltub, midpt;
+    integer niter;
+    logical swtch;
+    extern int dlaed5_(integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+                       doublereal *),
+        dlaed6_(integer *, logical *, doublereal *, doublereal *, doublereal *, doublereal *,
+                doublereal *, integer *);
+    logical swtch3;
+    extern doublereal dlamch_(char *, ftnlen);
+    logical orgati;
+    doublereal erretm, rhoinv;
+    --delta;
+    --z__;
+    --d__;
+    *info = 0;
+    if (*n == 1) {
+        *dlam = d__[1] + *rho * z__[1] * z__[1];
+        delta[1] = 1.;
+        return 0;
+    }
+    if (*n == 2) {
+        dlaed5_(i__, &d__[1], &z__[1], &delta[1], rho, dlam);
+        return 0;
+    }
+    eps = dlamch_((char *)"Epsilon", (ftnlen)7);
+    rhoinv = 1. / *rho;
+    if (*i__ == *n) {
+        ii = *n - 1;
+        niter = 1;
+        midpt = *rho / 2.;
+        i__1 = *n;
+        for (j = 1; j <= i__1; ++j) {
+            delta[j] = d__[j] - d__[*i__] - midpt;
+        }
+        psi = 0.;
+        i__1 = *n - 2;
+        for (j = 1; j <= i__1; ++j) {
+            psi += z__[j] * z__[j] / delta[j];
+        }
+        c__ = rhoinv + psi;
+        w = c__ + z__[ii] * z__[ii] / delta[ii] + z__[*n] * z__[*n] / delta[*n];
+        if (w <= 0.) {
+            temp = z__[*n - 1] * z__[*n - 1] / (d__[*n] - d__[*n - 1] + *rho) +
+                   z__[*n] * z__[*n] / *rho;
+            if (c__ <= temp) {
+                tau = *rho;
+            } else {
+                del = d__[*n] - d__[*n - 1];
+                a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n];
+                b = z__[*n] * z__[*n] * del;
+                if (a < 0.) {
+                    tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
+                } else {
+                    tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
+                }
+            }
+            dltlb = midpt;
+            dltub = *rho;
+        } else {
+            del = d__[*n] - d__[*n - 1];
+            a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n];
+            b = z__[*n] * z__[*n] * del;
+            if (a < 0.) {
+                tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
+            } else {
+                tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
+            }
+            dltlb = 0.;
+            dltub = midpt;
+        }
+        i__1 = *n;
+        for (j = 1; j <= i__1; ++j) {
+            delta[j] = d__[j] - d__[*i__] - tau;
+        }
+        dpsi = 0.;
+        psi = 0.;
+        erretm = 0.;
+        i__1 = ii;
+        for (j = 1; j <= i__1; ++j) {
+            temp = z__[j] / delta[j];
+            psi += z__[j] * temp;
+            dpsi += temp * temp;
+            erretm += psi;
+        }
+        erretm = abs(erretm);
+        temp = z__[*n] / delta[*n];
+        phi = z__[*n] * temp;
+        dphi = temp * temp;
+        erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + dphi);
+        w = rhoinv + phi + psi;
+        if (abs(w) <= eps * erretm) {
+            *dlam = d__[*i__] + tau;
+            goto L250;
+        }
+        if (w <= 0.) {
+            dltlb = max(dltlb, tau);
+        } else {
+            dltub = min(dltub, tau);
+        }
+        ++niter;
+        c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi;
+        a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * (dpsi + dphi);
+        b = delta[*n - 1] * delta[*n] * w;
+        if (c__ < 0.) {
+            c__ = abs(c__);
+        }
+        if (c__ == 0.) {
+            eta = -w / (dpsi + dphi);
+        } else if (a >= 0.) {
+            eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
+        } else {
+            eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
+        }
+        if (w * eta > 0.) {
+            eta = -w / (dpsi + dphi);
+        }
+        temp = tau + eta;
+        if (temp > dltub || temp < dltlb) {
+            if (w < 0.) {
+                eta = (dltub - tau) / 2.;
+            } else {
+                eta = (dltlb - tau) / 2.;
+            }
+        }
+        i__1 = *n;
+        for (j = 1; j <= i__1; ++j) {
+            delta[j] -= eta;
+        }
+        tau += eta;
+        dpsi = 0.;
+        psi = 0.;
+        erretm = 0.;
+        i__1 = ii;
+        for (j = 1; j <= i__1; ++j) {
+            temp = z__[j] / delta[j];
+            psi += z__[j] * temp;
+            dpsi += temp * temp;
+            erretm += psi;
+        }
+        erretm = abs(erretm);
+        temp = z__[*n] / delta[*n];
+        phi = z__[*n] * temp;
+        dphi = temp * temp;
+        erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + dphi);
+        w = rhoinv + phi + psi;
+        iter = niter + 1;
+        for (niter = iter; niter <= 30; ++niter) {
+            if (abs(w) <= eps * erretm) {
+                *dlam = d__[*i__] + tau;
+                goto L250;
+            }
+            if (w <= 0.) {
+                dltlb = max(dltlb, tau);
+            } else {
+                dltub = min(dltub, tau);
+            }
+            c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi;
+            a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * (dpsi + dphi);
+            b = delta[*n - 1] * delta[*n] * w;
+            if (a >= 0.) {
+                eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
+            } else {
+                eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
+            }
+            if (w * eta > 0.) {
+                eta = -w / (dpsi + dphi);
+            }
+            temp = tau + eta;
+            if (temp > dltub || temp < dltlb) {
+                if (w < 0.) {
+                    eta = (dltub - tau) / 2.;
+                } else {
+                    eta = (dltlb - tau) / 2.;
+                }
+            }
+            i__1 = *n;
+            for (j = 1; j <= i__1; ++j) {
+                delta[j] -= eta;
+            }
+            tau += eta;
+            dpsi = 0.;
+            psi = 0.;
+            erretm = 0.;
+            i__1 = ii;
+            for (j = 1; j <= i__1; ++j) {
+                temp = z__[j] / delta[j];
+                psi += z__[j] * temp;
+                dpsi += temp * temp;
+                erretm += psi;
+            }
+            erretm = abs(erretm);
+            temp = z__[*n] / delta[*n];
+            phi = z__[*n] * temp;
+            dphi = temp * temp;
+            erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + dphi);
+            w = rhoinv + phi + psi;
+        }
+        *info = 1;
+        *dlam = d__[*i__] + tau;
+        goto L250;
+    } else {
+        niter = 1;
+        ip1 = *i__ + 1;
+        del = d__[ip1] - d__[*i__];
+        midpt = del / 2.;
+        i__1 = *n;
+        for (j = 1; j <= i__1; ++j) {
+            delta[j] = d__[j] - d__[*i__] - midpt;
+        }
+        psi = 0.;
+        i__1 = *i__ - 1;
+        for (j = 1; j <= i__1; ++j) {
+            psi += z__[j] * z__[j] / delta[j];
+        }
+        phi = 0.;
+        i__1 = *i__ + 2;
+        for (j = *n; j >= i__1; --j) {
+            phi += z__[j] * z__[j] / delta[j];
+        }
+        c__ = rhoinv + psi + phi;
+        w = c__ + z__[*i__] * z__[*i__] / delta[*i__] + z__[ip1] * z__[ip1] / delta[ip1];
+        if (w > 0.) {
+            orgati = TRUE_;
+            a = c__ * del + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1];
+            b = z__[*i__] * z__[*i__] * del;
+            if (a > 0.) {
+                tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
+            } else {
+                tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
+            }
+            dltlb = 0.;
+            dltub = midpt;
+        } else {
+            orgati = FALSE_;
+            a = c__ * del - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1];
+            b = z__[ip1] * z__[ip1] * del;
+            if (a < 0.) {
+                tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs(d__1))));
+            } else {
+                tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) / (c__ * 2.);
+            }
+            dltlb = -midpt;
+            dltub = 0.;
+        }
+        if (orgati) {
+            i__1 = *n;
+            for (j = 1; j <= i__1; ++j) {
+                delta[j] = d__[j] - d__[*i__] - tau;
+            }
+        } else {
+            i__1 = *n;
+            for (j = 1; j <= i__1; ++j) {
+                delta[j] = d__[j] - d__[ip1] - tau;
+            }
+        }
+        if (orgati) {
+            ii = *i__;
+        } else {
+            ii = *i__ + 1;
+        }
+        iim1 = ii - 1;
+        iip1 = ii + 1;
+        dpsi = 0.;
+        psi = 0.;
+        erretm = 0.;
+        i__1 = iim1;
+        for (j = 1; j <= i__1; ++j) {
+            temp = z__[j] / delta[j];
+            psi += z__[j] * temp;
+            dpsi += temp * temp;
+            erretm += psi;
+        }
+        erretm = abs(erretm);
+        dphi = 0.;
+        phi = 0.;
+        i__1 = iip1;
+        for (j = *n; j >= i__1; --j) {
+            temp = z__[j] / delta[j];
+            phi += z__[j] * temp;
+            dphi += temp * temp;
+            erretm += phi;
+        }
+        w = rhoinv + phi + psi;
+        swtch3 = FALSE_;
+        if (orgati) {
+            if (w < 0.) {
+                swtch3 = TRUE_;
+            }
+        } else {
+            if (w > 0.) {
+                swtch3 = TRUE_;
+            }
+        }
+        if (ii == 1 || ii == *n) {
+            swtch3 = FALSE_;
+        }
+        temp = z__[ii] / delta[ii];
+        dw = dpsi + dphi + temp * temp;
+        temp = z__[ii] * temp;
+        w += temp;
+        erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + abs(tau) * dw;
+        if (abs(w) <= eps * erretm) {
+            if (orgati) {
+                *dlam = d__[*i__] + tau;
+            } else {
+                *dlam = d__[ip1] + tau;
+            }
+            goto L250;
+        }
+        if (w <= 0.) {
+            dltlb = max(dltlb, tau);
+        } else {
+            dltub = min(dltub, tau);
+        }
+        ++niter;
+        if (!swtch3) {
+            if (orgati) {
+                d__1 = z__[*i__] / delta[*i__];
+                c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (d__1 * d__1);
+            } else {
+                d__1 = z__[ip1] / delta[ip1];
+                c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (d__1 * d__1);
+            }
+            a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] * dw;
+            b = delta[*i__] * delta[ip1] * w;
+            if (c__ == 0.) {
+                if (a == 0.) {
+                    if (orgati) {
+                        a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] * (dpsi + dphi);
+                    } else {
+                        a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] * (dpsi + dphi);
+                    }
+                }
+                eta = b / a;
+            } else if (a <= 0.) {
+                eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
+            } else {
+                eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
+            }
+        } else {
+            temp = rhoinv + psi + phi;
+            if (orgati) {
+                temp1 = z__[iim1] / delta[iim1];
+                temp1 *= temp1;
+                c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[iip1]) * temp1;
+                zz[0] = z__[iim1] * z__[iim1];
+                zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi);
+            } else {
+                temp1 = z__[iip1] / delta[iip1];
+                temp1 *= temp1;
+                c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[iim1]) * temp1;
+                zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1));
+                zz[2] = z__[iip1] * z__[iip1];
+            }
+            zz[1] = z__[ii] * z__[ii];
+            dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info);
+            if (*info != 0) {
+                goto L250;
+            }
+        }
+        if (w * eta >= 0.) {
+            eta = -w / dw;
+        }
+        temp = tau + eta;
+        if (temp > dltub || temp < dltlb) {
+            if (w < 0.) {
+                eta = (dltub - tau) / 2.;
+            } else {
+                eta = (dltlb - tau) / 2.;
+            }
+        }
+        prew = w;
+        i__1 = *n;
+        for (j = 1; j <= i__1; ++j) {
+            delta[j] -= eta;
+        }
+        dpsi = 0.;
+        psi = 0.;
+        erretm = 0.;
+        i__1 = iim1;
+        for (j = 1; j <= i__1; ++j) {
+            temp = z__[j] / delta[j];
+            psi += z__[j] * temp;
+            dpsi += temp * temp;
+            erretm += psi;
+        }
+        erretm = abs(erretm);
+        dphi = 0.;
+        phi = 0.;
+        i__1 = iip1;
+        for (j = *n; j >= i__1; --j) {
+            temp = z__[j] / delta[j];
+            phi += z__[j] * temp;
+            dphi += temp * temp;
+            erretm += phi;
+        }
+        temp = z__[ii] / delta[ii];
+        dw = dpsi + dphi + temp * temp;
+        temp = z__[ii] * temp;
+        w = rhoinv + phi + psi + temp;
+        erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. +
+                 (d__1 = tau + eta, abs(d__1)) * dw;
+        swtch = FALSE_;
+        if (orgati) {
+            if (-w > abs(prew) / 10.) {
+                swtch = TRUE_;
+            }
+        } else {
+            if (w > abs(prew) / 10.) {
+                swtch = TRUE_;
+            }
+        }
+        tau += eta;
+        iter = niter + 1;
+        for (niter = iter; niter <= 30; ++niter) {
+            if (abs(w) <= eps * erretm) {
+                if (orgati) {
+                    *dlam = d__[*i__] + tau;
+                } else {
+                    *dlam = d__[ip1] + tau;
+                }
+                goto L250;
+            }
+            if (w <= 0.) {
+                dltlb = max(dltlb, tau);
+            } else {
+                dltub = min(dltub, tau);
+            }
+            if (!swtch3) {
+                if (!swtch) {
+                    if (orgati) {
+                        d__1 = z__[*i__] / delta[*i__];
+                        c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (d__1 * d__1);
+                    } else {
+                        d__1 = z__[ip1] / delta[ip1];
+                        c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (d__1 * d__1);
+                    }
+                } else {
+                    temp = z__[ii] / delta[ii];
+                    if (orgati) {
+                        dpsi += temp * temp;
+                    } else {
+                        dphi += temp * temp;
+                    }
+                    c__ = w - delta[*i__] * dpsi - delta[ip1] * dphi;
+                }
+                a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] * dw;
+                b = delta[*i__] * delta[ip1] * w;
+                if (c__ == 0.) {
+                    if (a == 0.) {
+                        if (!swtch) {
+                            if (orgati) {
+                                a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] * (dpsi + dphi);
+                            } else {
+                                a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] * (dpsi + dphi);
+                            }
+                        } else {
+                            a = delta[*i__] * delta[*i__] * dpsi + delta[ip1] * delta[ip1] * dphi;
+                        }
+                    }
+                    eta = b / a;
+                } else if (a <= 0.) {
+                    eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
+                } else {
+                    eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
+                }
+            } else {
+                temp = rhoinv + psi + phi;
+                if (swtch) {
+                    c__ = temp - delta[iim1] * dpsi - delta[iip1] * dphi;
+                    zz[0] = delta[iim1] * delta[iim1] * dpsi;
+                    zz[2] = delta[iip1] * delta[iip1] * dphi;
+                } else {
+                    if (orgati) {
+                        temp1 = z__[iim1] / delta[iim1];
+                        temp1 *= temp1;
+                        c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[iip1]) * temp1;
+                        zz[0] = z__[iim1] * z__[iim1];
+                        zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi);
+                    } else {
+                        temp1 = z__[iip1] / delta[iip1];
+                        temp1 *= temp1;
+                        c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[iim1]) * temp1;
+                        zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1));
+                        zz[2] = z__[iip1] * z__[iip1];
+                    }
+                }
+                dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info);
+                if (*info != 0) {
+                    goto L250;
+                }
+            }
+            if (w * eta >= 0.) {
+                eta = -w / dw;
+            }
+            temp = tau + eta;
+            if (temp > dltub || temp < dltlb) {
+                if (w < 0.) {
+                    eta = (dltub - tau) / 2.;
+                } else {
+                    eta = (dltlb - tau) / 2.;
+                }
+            }
+            i__1 = *n;
+            for (j = 1; j <= i__1; ++j) {
+                delta[j] -= eta;
+            }
+            tau += eta;
+            prew = w;
+            dpsi = 0.;
+            psi = 0.;
+            erretm = 0.;
+            i__1 = iim1;
+            for (j = 1; j <= i__1; ++j) {
+                temp = z__[j] / delta[j];
+                psi += z__[j] * temp;
+                dpsi += temp * temp;
+                erretm += psi;
+            }
+            erretm = abs(erretm);
+            dphi = 0.;
+            phi = 0.;
+            i__1 = iip1;
+            for (j = *n; j >= i__1; --j) {
+                temp = z__[j] / delta[j];
+                phi += z__[j] * temp;
+                dphi += temp * temp;
+                erretm += phi;
+            }
+            temp = z__[ii] / delta[ii];
+            dw = dpsi + dphi + temp * temp;
+            temp = z__[ii] * temp;
+            w = rhoinv + phi + psi + temp;
+            erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + abs(tau) * dw;
+            if (w * prew > 0. && abs(w) > abs(prew) / 10.) {
+                swtch = !swtch;
+            }
+        }
+        *info = 1;
+        if (orgati) {
+            *dlam = d__[*i__] + tau;
+        } else {
+            *dlam = d__[ip1] + tau;
+        }
+    }
+L250:
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dlaed4.f
+++ b/lib/linalg/dlaed4.f
@ -1,917 +0,0 @@
-*> \brief \b DLAED4 used by DSTEDC. Finds a single root of the secular equation.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DLAED4 + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed4.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed4.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed4.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DLAED4( N, I, D, Z, DELTA, RHO, DLAM, INFO )
-*
-*       .. Scalar Arguments ..
-*       INTEGER            I, INFO, N
-*       DOUBLE PRECISION   DLAM, RHO
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION   D( * ), DELTA( * ), Z( * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> This subroutine computes the I-th updated eigenvalue of a symmetric
-*> rank-one modification to a diagonal matrix whose elements are
-*> given in the array d, and that
-*>
-*>            D(i) < D(j)  for  i < j
-*>
-*> and that RHO > 0.  This is arranged by the calling routine, and is
-*> no loss in generality.  The rank-one modified system is thus
-*>
-*>            diag( D )  +  RHO * Z * Z_transpose.
-*>
-*> where we assume the Euclidean norm of Z is 1.
-*>
-*> The method consists of approximating the rational functions in the
-*> secular equation by simpler interpolating rational functions.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>         The length of all arrays.
-*> \endverbatim
-*>
-*> \param[in] I
-*> \verbatim
-*>          I is INTEGER
-*>         The index of the eigenvalue to be computed.  1 <= I <= N.
-*> \endverbatim
-*>
-*> \param[in] D
-*> \verbatim
-*>          D is DOUBLE PRECISION array, dimension (N)
-*>         The original eigenvalues.  It is assumed that they are in
-*>         order, D(I) < D(J)  for I < J.
-*> \endverbatim
-*>
-*> \param[in] Z
-*> \verbatim
-*>          Z is DOUBLE PRECISION array, dimension (N)
-*>         The components of the updating vector.
-*> \endverbatim
-*>
-*> \param[out] DELTA
-*> \verbatim
-*>          DELTA is DOUBLE PRECISION array, dimension (N)
-*>         If N > 2, DELTA contains (D(j) - lambda_I) in its  j-th
-*>         component.  If N = 1, then DELTA(1) = 1. If N = 2, see DLAED5
-*>         for detail. The vector DELTA contains the information necessary
-*>         to construct the eigenvectors by DLAED3 and DLAED9.
-*> \endverbatim
-*>
-*> \param[in] RHO
-*> \verbatim
-*>          RHO is DOUBLE PRECISION
-*>         The scalar in the symmetric updating formula.
-*> \endverbatim
-*>
-*> \param[out] DLAM
-*> \verbatim
-*>          DLAM is DOUBLE PRECISION
-*>         The computed lambda_I, the I-th updated eigenvalue.
-*> \endverbatim
-*>
-*> \param[out] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>         = 0:  successful exit
-*>         > 0:  if INFO = 1, the updating process failed.
-*> \endverbatim
-*
-*> \par Internal Parameters:
-*  =========================
-*>
-*> \verbatim
-*>  Logical variable ORGATI (origin-at-i?) is used for distinguishing
-*>  whether D(i) or D(i+1) is treated as the origin.
-*>
-*>            ORGATI = .true.    origin at i
-*>            ORGATI = .false.   origin at i+1
-*>
-*>   Logical variable SWTCH3 (switch-for-3-poles?) is for noting
-*>   if we are working with THREE poles!
-*>
-*>   MAXIT is the maximum number of iterations allowed for each
-*>   eigenvalue.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup auxOTHERcomputational
-*
-*> \par Contributors:
-*  ==================
-*>
-*>     Ren-Cang Li, Computer Science Division, University of California
-*>     at Berkeley, USA
-*>
-*  =====================================================================
-      SUBROUTINE DLAED4( N, I, D, Z, DELTA, RHO, DLAM, INFO )
-*
-*  -- LAPACK computational routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      INTEGER            I, INFO, N
-      DOUBLE PRECISION   DLAM, RHO
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION   D( * ), DELTA( * ), Z( * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      INTEGER            MAXIT
-      PARAMETER          ( MAXIT = 30 )
-      DOUBLE PRECISION   ZERO, ONE, TWO, THREE, FOUR, EIGHT, TEN
-      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0,
-     $                   THREE = 3.0D0, FOUR = 4.0D0, EIGHT = 8.0D0,
-     $                   TEN = 10.0D0 )
-*     ..
-*     .. Local Scalars ..
-      LOGICAL            ORGATI, SWTCH, SWTCH3
-      INTEGER            II, IIM1, IIP1, IP1, ITER, J, NITER
-      DOUBLE PRECISION   A, B, C, DEL, DLTLB, DLTUB, DPHI, DPSI, DW,
-     $                   EPS, ERRETM, ETA, MIDPT, PHI, PREW, PSI,
-     $                   RHOINV, TAU, TEMP, TEMP1, W
-*     ..
-*     .. Local Arrays ..
-      DOUBLE PRECISION   ZZ( 3 )
-*     ..
-*     .. External Functions ..
-      DOUBLE PRECISION   DLAMCH
-      EXTERNAL           DLAMCH
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           DLAED5, DLAED6
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, MAX, MIN, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-*     Since this routine is called in an inner loop, we do no argument
-*     checking.
-*
-*     Quick return for N=1 and 2.
-*
-      INFO = 0
-      IF( N.EQ.1 ) THEN
-*
-*         Presumably, I=1 upon entry
-*
-         DLAM = D( 1 ) + RHO*Z( 1 )*Z( 1 )
-         DELTA( 1 ) = ONE
-         RETURN
-      END IF
-      IF( N.EQ.2 ) THEN
-         CALL DLAED5( I, D, Z, DELTA, RHO, DLAM )
-         RETURN
-      END IF
-*
-*     Compute machine epsilon
-*
-      EPS = DLAMCH( 'Epsilon' )
-      RHOINV = ONE / RHO
-*
-*     The case I = N
-*
-      IF( I.EQ.N ) THEN
-*
-*        Initialize some basic variables
-*
-         II = N - 1
-         NITER = 1
-*
-*        Calculate initial guess
-*
-         MIDPT = RHO / TWO
-*
-*        If ||Z||_2 is not one, then TEMP should be set to
-*        RHO * ||Z||_2^2 / TWO
-*
-         DO 10 J = 1, N
-            DELTA( J ) = ( D( J )-D( I ) ) - MIDPT
-   10    CONTINUE
-*
-         PSI = ZERO
-         DO 20 J = 1, N - 2
-            PSI = PSI + Z( J )*Z( J ) / DELTA( J )
-   20    CONTINUE
-*
-         C = RHOINV + PSI
-         W = C + Z( II )*Z( II ) / DELTA( II ) +
-     $       Z( N )*Z( N ) / DELTA( N )
-*
-         IF( W.LE.ZERO ) THEN
-            TEMP = Z( N-1 )*Z( N-1 ) / ( D( N )-D( N-1 )+RHO ) +
-     $             Z( N )*Z( N ) / RHO
-            IF( C.LE.TEMP ) THEN
-               TAU = RHO
-            ELSE
-               DEL = D( N ) - D( N-1 )
-               A = -C*DEL + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N )
-               B = Z( N )*Z( N )*DEL
-               IF( A.LT.ZERO ) THEN
-                  TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A )
-               ELSE
-                  TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C )
-               END IF
-            END IF
-*
-*           It can be proved that
-*               D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO
-*
-            DLTLB = MIDPT
-            DLTUB = RHO
-         ELSE
-            DEL = D( N ) - D( N-1 )
-            A = -C*DEL + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N )
-            B = Z( N )*Z( N )*DEL
-            IF( A.LT.ZERO ) THEN
-               TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A )
-            ELSE
-               TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C )
-            END IF
-*
-*           It can be proved that
-*               D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2
-*
-            DLTLB = ZERO
-            DLTUB = MIDPT
-         END IF
-*
-         DO 30 J = 1, N
-            DELTA( J ) = ( D( J )-D( I ) ) - TAU
-   30    CONTINUE
-*
-*        Evaluate PSI and the derivative DPSI
-*
-         DPSI = ZERO
-         PSI = ZERO
-         ERRETM = ZERO
-         DO 40 J = 1, II
-            TEMP = Z( J ) / DELTA( J )
-            PSI = PSI + Z( J )*TEMP
-            DPSI = DPSI + TEMP*TEMP
-            ERRETM = ERRETM + PSI
-   40    CONTINUE
-         ERRETM = ABS( ERRETM )
-*
-*        Evaluate PHI and the derivative DPHI
-*
-         TEMP = Z( N ) / DELTA( N )
-         PHI = Z( N )*TEMP
-         DPHI = TEMP*TEMP
-         ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +
-     $            ABS( TAU )*( DPSI+DPHI )
-*
-         W = RHOINV + PHI + PSI
-*
-*        Test for convergence
-*
-         IF( ABS( W ).LE.EPS*ERRETM ) THEN
-            DLAM = D( I ) + TAU
-            GO TO 250
-         END IF
-*
-         IF( W.LE.ZERO ) THEN
-            DLTLB = MAX( DLTLB, TAU )
-         ELSE
-            DLTUB = MIN( DLTUB, TAU )
-         END IF
-*
-*        Calculate the new step
-*
-         NITER = NITER + 1
-         C = W - DELTA( N-1 )*DPSI - DELTA( N )*DPHI
-         A = ( DELTA( N-1 )+DELTA( N ) )*W -
-     $       DELTA( N-1 )*DELTA( N )*( DPSI+DPHI )
-         B = DELTA( N-1 )*DELTA( N )*W
-         IF( C.LT.ZERO )
-     $      C = ABS( C )
-         IF( C.EQ.ZERO ) THEN
-*           ETA = B/A
-*           ETA = RHO - TAU
-*           ETA = DLTUB - TAU
-*
-*           Update proposed by Li, Ren-Cang:
-            ETA = -W / ( DPSI+DPHI )
-         ELSE IF( A.GE.ZERO ) THEN
-            ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
-         ELSE
-            ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) )
-         END IF
-*
-*        Note, eta should be positive if w is negative, and
-*        eta should be negative otherwise. However,
-*        if for some reason caused by roundoff, eta*w > 0,
-*        we simply use one Newton step instead. This way
-*        will guarantee eta*w < 0.
-*
-         IF( W*ETA.GT.ZERO )
-     $      ETA = -W / ( DPSI+DPHI )
-         TEMP = TAU + ETA
-         IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN
-            IF( W.LT.ZERO ) THEN
-               ETA = ( DLTUB-TAU ) / TWO
-            ELSE
-               ETA = ( DLTLB-TAU ) / TWO
-            END IF
-         END IF
-         DO 50 J = 1, N
-            DELTA( J ) = DELTA( J ) - ETA
-   50    CONTINUE
-*
-         TAU = TAU + ETA
-*
-*        Evaluate PSI and the derivative DPSI
-*
-         DPSI = ZERO
-         PSI = ZERO
-         ERRETM = ZERO
-         DO 60 J = 1, II
-            TEMP = Z( J ) / DELTA( J )
-            PSI = PSI + Z( J )*TEMP
-            DPSI = DPSI + TEMP*TEMP
-            ERRETM = ERRETM + PSI
-   60    CONTINUE
-         ERRETM = ABS( ERRETM )
-*
-*        Evaluate PHI and the derivative DPHI
-*
-         TEMP = Z( N ) / DELTA( N )
-         PHI = Z( N )*TEMP
-         DPHI = TEMP*TEMP
-         ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +
-     $            ABS( TAU )*( DPSI+DPHI )
-*
-         W = RHOINV + PHI + PSI
-*
-*        Main loop to update the values of the array   DELTA
-*
-         ITER = NITER + 1
-*
-         DO 90 NITER = ITER, MAXIT
-*
-*           Test for convergence
-*
-            IF( ABS( W ).LE.EPS*ERRETM ) THEN
-               DLAM = D( I ) + TAU
-               GO TO 250
-            END IF
-*
-            IF( W.LE.ZERO ) THEN
-               DLTLB = MAX( DLTLB, TAU )
-            ELSE
-               DLTUB = MIN( DLTUB, TAU )
-            END IF
-*
-*           Calculate the new step
-*
-            C = W - DELTA( N-1 )*DPSI - DELTA( N )*DPHI
-            A = ( DELTA( N-1 )+DELTA( N ) )*W -
-     $          DELTA( N-1 )*DELTA( N )*( DPSI+DPHI )
-            B = DELTA( N-1 )*DELTA( N )*W
-            IF( A.GE.ZERO ) THEN
-               ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
-            ELSE
-               ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) )
-            END IF
-*
-*           Note, eta should be positive if w is negative, and
-*           eta should be negative otherwise. However,
-*           if for some reason caused by roundoff, eta*w > 0,
-*           we simply use one Newton step instead. This way
-*           will guarantee eta*w < 0.
-*
-            IF( W*ETA.GT.ZERO )
-     $         ETA = -W / ( DPSI+DPHI )
-            TEMP = TAU + ETA
-            IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN
-               IF( W.LT.ZERO ) THEN
-                  ETA = ( DLTUB-TAU ) / TWO
-               ELSE
-                  ETA = ( DLTLB-TAU ) / TWO
-               END IF
-            END IF
-            DO 70 J = 1, N
-               DELTA( J ) = DELTA( J ) - ETA
-   70       CONTINUE
-*
-            TAU = TAU + ETA
-*
-*           Evaluate PSI and the derivative DPSI
-*
-            DPSI = ZERO
-            PSI = ZERO
-            ERRETM = ZERO
-            DO 80 J = 1, II
-               TEMP = Z( J ) / DELTA( J )
-               PSI = PSI + Z( J )*TEMP
-               DPSI = DPSI + TEMP*TEMP
-               ERRETM = ERRETM + PSI
-   80       CONTINUE
-            ERRETM = ABS( ERRETM )
-*
-*           Evaluate PHI and the derivative DPHI
-*
-            TEMP = Z( N ) / DELTA( N )
-            PHI = Z( N )*TEMP
-            DPHI = TEMP*TEMP
-            ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +
-     $               ABS( TAU )*( DPSI+DPHI )
-*
-            W = RHOINV + PHI + PSI
-   90    CONTINUE
-*
-*        Return with INFO = 1, NITER = MAXIT and not converged
-*
-         INFO = 1
-         DLAM = D( I ) + TAU
-         GO TO 250
-*
-*        End for the case I = N
-*
-      ELSE
-*
-*        The case for I < N
-*
-         NITER = 1
-         IP1 = I + 1
-*
-*        Calculate initial guess
-*
-         DEL = D( IP1 ) - D( I )
-         MIDPT = DEL / TWO
-         DO 100 J = 1, N
-            DELTA( J ) = ( D( J )-D( I ) ) - MIDPT
-  100    CONTINUE
-*
-         PSI = ZERO
-         DO 110 J = 1, I - 1
-            PSI = PSI + Z( J )*Z( J ) / DELTA( J )
-  110    CONTINUE
-*
-         PHI = ZERO
-         DO 120 J = N, I + 2, -1
-            PHI = PHI + Z( J )*Z( J ) / DELTA( J )
-  120    CONTINUE
-         C = RHOINV + PSI + PHI
-         W = C + Z( I )*Z( I ) / DELTA( I ) +
-     $       Z( IP1 )*Z( IP1 ) / DELTA( IP1 )
-*
-         IF( W.GT.ZERO ) THEN
-*
-*           d(i)< the ith eigenvalue < (d(i)+d(i+1))/2
-*
-*           We choose d(i) as origin.
-*
-            ORGATI = .TRUE.
-            A = C*DEL + Z( I )*Z( I ) + Z( IP1 )*Z( IP1 )
-            B = Z( I )*Z( I )*DEL
-            IF( A.GT.ZERO ) THEN
-               TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
-            ELSE
-               TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
-            END IF
-            DLTLB = ZERO
-            DLTUB = MIDPT
-         ELSE
-*
-*           (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1)
-*
-*           We choose d(i+1) as origin.
-*
-            ORGATI = .FALSE.
-            A = C*DEL - Z( I )*Z( I ) - Z( IP1 )*Z( IP1 )
-            B = Z( IP1 )*Z( IP1 )*DEL
-            IF( A.LT.ZERO ) THEN
-               TAU = TWO*B / ( A-SQRT( ABS( A*A+FOUR*B*C ) ) )
-            ELSE
-               TAU = -( A+SQRT( ABS( A*A+FOUR*B*C ) ) ) / ( TWO*C )
-            END IF
-            DLTLB = -MIDPT
-            DLTUB = ZERO
-         END IF
-*
-         IF( ORGATI ) THEN
-            DO 130 J = 1, N
-               DELTA( J ) = ( D( J )-D( I ) ) - TAU
-  130       CONTINUE
-         ELSE
-            DO 140 J = 1, N
-               DELTA( J ) = ( D( J )-D( IP1 ) ) - TAU
-  140       CONTINUE
-         END IF
-         IF( ORGATI ) THEN
-            II = I
-         ELSE
-            II = I + 1
-         END IF
-         IIM1 = II - 1
-         IIP1 = II + 1
-*
-*        Evaluate PSI and the derivative DPSI
-*
-         DPSI = ZERO
-         PSI = ZERO
-         ERRETM = ZERO
-         DO 150 J = 1, IIM1
-            TEMP = Z( J ) / DELTA( J )
-            PSI = PSI + Z( J )*TEMP
-            DPSI = DPSI + TEMP*TEMP
-            ERRETM = ERRETM + PSI
-  150    CONTINUE
-         ERRETM = ABS( ERRETM )
-*
-*        Evaluate PHI and the derivative DPHI
-*
-         DPHI = ZERO
-         PHI = ZERO
-         DO 160 J = N, IIP1, -1
-            TEMP = Z( J ) / DELTA( J )
-            PHI = PHI + Z( J )*TEMP
-            DPHI = DPHI + TEMP*TEMP
-            ERRETM = ERRETM + PHI
-  160    CONTINUE
-*
-         W = RHOINV + PHI + PSI
-*
-*        W is the value of the secular function with
-*        its ii-th element removed.
-*
-         SWTCH3 = .FALSE.
-         IF( ORGATI ) THEN
-            IF( W.LT.ZERO )
-     $         SWTCH3 = .TRUE.
-         ELSE
-            IF( W.GT.ZERO )
-     $         SWTCH3 = .TRUE.
-         END IF
-         IF( II.EQ.1 .OR. II.EQ.N )
-     $      SWTCH3 = .FALSE.
-*
-         TEMP = Z( II ) / DELTA( II )
-         DW = DPSI + DPHI + TEMP*TEMP
-         TEMP = Z( II )*TEMP
-         W = W + TEMP
-         ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV +
-     $            THREE*ABS( TEMP ) + ABS( TAU )*DW
-*
-*        Test for convergence
-*
-         IF( ABS( W ).LE.EPS*ERRETM ) THEN
-            IF( ORGATI ) THEN
-               DLAM = D( I ) + TAU
-            ELSE
-               DLAM = D( IP1 ) + TAU
-            END IF
-            GO TO 250
-         END IF
-*
-         IF( W.LE.ZERO ) THEN
-            DLTLB = MAX( DLTLB, TAU )
-         ELSE
-            DLTUB = MIN( DLTUB, TAU )
-         END IF
-*
-*        Calculate the new step
-*
-         NITER = NITER + 1
-         IF( .NOT.SWTCH3 ) THEN
-            IF( ORGATI ) THEN
-               C = W - DELTA( IP1 )*DW - ( D( I )-D( IP1 ) )*
-     $             ( Z( I ) / DELTA( I ) )**2
-            ELSE
-               C = W - DELTA( I )*DW - ( D( IP1 )-D( I ) )*
-     $             ( Z( IP1 ) / DELTA( IP1 ) )**2
-            END IF
-            A = ( DELTA( I )+DELTA( IP1 ) )*W -
-     $          DELTA( I )*DELTA( IP1 )*DW
-            B = DELTA( I )*DELTA( IP1 )*W
-            IF( C.EQ.ZERO ) THEN
-               IF( A.EQ.ZERO ) THEN
-                  IF( ORGATI ) THEN
-                     A = Z( I )*Z( I ) + DELTA( IP1 )*DELTA( IP1 )*
-     $                   ( DPSI+DPHI )
-                  ELSE
-                     A = Z( IP1 )*Z( IP1 ) + DELTA( I )*DELTA( I )*
-     $                   ( DPSI+DPHI )
-                  END IF
-               END IF
-               ETA = B / A
-            ELSE IF( A.LE.ZERO ) THEN
-               ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
-            ELSE
-               ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
-            END IF
-         ELSE
-*
-*           Interpolation using THREE most relevant poles
-*
-            TEMP = RHOINV + PSI + PHI
-            IF( ORGATI ) THEN
-               TEMP1 = Z( IIM1 ) / DELTA( IIM1 )
-               TEMP1 = TEMP1*TEMP1
-               C = TEMP - DELTA( IIP1 )*( DPSI+DPHI ) -
-     $             ( D( IIM1 )-D( IIP1 ) )*TEMP1
-               ZZ( 1 ) = Z( IIM1 )*Z( IIM1 )
-               ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )*
-     $                   ( ( DPSI-TEMP1 )+DPHI )
-            ELSE
-               TEMP1 = Z( IIP1 ) / DELTA( IIP1 )
-               TEMP1 = TEMP1*TEMP1
-               C = TEMP - DELTA( IIM1 )*( DPSI+DPHI ) -
-     $             ( D( IIP1 )-D( IIM1 ) )*TEMP1
-               ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )*
-     $                   ( DPSI+( DPHI-TEMP1 ) )
-               ZZ( 3 ) = Z( IIP1 )*Z( IIP1 )
-            END IF
-            ZZ( 2 ) = Z( II )*Z( II )
-            CALL DLAED6( NITER, ORGATI, C, DELTA( IIM1 ), ZZ, W, ETA,
-     $                   INFO )
-            IF( INFO.NE.0 )
-     $         GO TO 250
-         END IF
-*
-*        Note, eta should be positive if w is negative, and
-*        eta should be negative otherwise. However,
-*        if for some reason caused by roundoff, eta*w > 0,
-*        we simply use one Newton step instead. This way
-*        will guarantee eta*w < 0.
-*
-         IF( W*ETA.GE.ZERO )
-     $      ETA = -W / DW
-         TEMP = TAU + ETA
-         IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN
-            IF( W.LT.ZERO ) THEN
-               ETA = ( DLTUB-TAU ) / TWO
-            ELSE
-               ETA = ( DLTLB-TAU ) / TWO
-            END IF
-         END IF
-*
-         PREW = W
-*
-         DO 180 J = 1, N
-            DELTA( J ) = DELTA( J ) - ETA
-  180    CONTINUE
-*
-*        Evaluate PSI and the derivative DPSI
-*
-         DPSI = ZERO
-         PSI = ZERO
-         ERRETM = ZERO
-         DO 190 J = 1, IIM1
-            TEMP = Z( J ) / DELTA( J )
-            PSI = PSI + Z( J )*TEMP
-            DPSI = DPSI + TEMP*TEMP
-            ERRETM = ERRETM + PSI
-  190    CONTINUE
-         ERRETM = ABS( ERRETM )
-*
-*        Evaluate PHI and the derivative DPHI
-*
-         DPHI = ZERO
-         PHI = ZERO
-         DO 200 J = N, IIP1, -1
-            TEMP = Z( J ) / DELTA( J )
-            PHI = PHI + Z( J )*TEMP
-            DPHI = DPHI + TEMP*TEMP
-            ERRETM = ERRETM + PHI
-  200    CONTINUE
-*
-         TEMP = Z( II ) / DELTA( II )
-         DW = DPSI + DPHI + TEMP*TEMP
-         TEMP = Z( II )*TEMP
-         W = RHOINV + PHI + PSI + TEMP
-         ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV +
-     $            THREE*ABS( TEMP ) + ABS( TAU+ETA )*DW
-*
-         SWTCH = .FALSE.
-         IF( ORGATI ) THEN
-            IF( -W.GT.ABS( PREW ) / TEN )
-     $         SWTCH = .TRUE.
-         ELSE
-            IF( W.GT.ABS( PREW ) / TEN )
-     $         SWTCH = .TRUE.
-         END IF
-*
-         TAU = TAU + ETA
-*
-*        Main loop to update the values of the array   DELTA
-*
-         ITER = NITER + 1
-*
-         DO 240 NITER = ITER, MAXIT
-*
-*           Test for convergence
-*
-            IF( ABS( W ).LE.EPS*ERRETM ) THEN
-               IF( ORGATI ) THEN
-                  DLAM = D( I ) + TAU
-               ELSE
-                  DLAM = D( IP1 ) + TAU
-               END IF
-               GO TO 250
-            END IF
-*
-            IF( W.LE.ZERO ) THEN
-               DLTLB = MAX( DLTLB, TAU )
-            ELSE
-               DLTUB = MIN( DLTUB, TAU )
-            END IF
-*
-*           Calculate the new step
-*
-            IF( .NOT.SWTCH3 ) THEN
-               IF( .NOT.SWTCH ) THEN
-                  IF( ORGATI ) THEN
-                     C = W - DELTA( IP1 )*DW -
-     $                   ( D( I )-D( IP1 ) )*( Z( I ) / DELTA( I ) )**2
-                  ELSE
-                     C = W - DELTA( I )*DW - ( D( IP1 )-D( I ) )*
-     $                   ( Z( IP1 ) / DELTA( IP1 ) )**2
-                  END IF
-               ELSE
-                  TEMP = Z( II ) / DELTA( II )
-                  IF( ORGATI ) THEN
-                     DPSI = DPSI + TEMP*TEMP
-                  ELSE
-                     DPHI = DPHI + TEMP*TEMP
-                  END IF
-                  C = W - DELTA( I )*DPSI - DELTA( IP1 )*DPHI
-               END IF
-               A = ( DELTA( I )+DELTA( IP1 ) )*W -
-     $             DELTA( I )*DELTA( IP1 )*DW
-               B = DELTA( I )*DELTA( IP1 )*W
-               IF( C.EQ.ZERO ) THEN
-                  IF( A.EQ.ZERO ) THEN
-                     IF( .NOT.SWTCH ) THEN
-                        IF( ORGATI ) THEN
-                           A = Z( I )*Z( I ) + DELTA( IP1 )*
-     $                         DELTA( IP1 )*( DPSI+DPHI )
-                        ELSE
-                           A = Z( IP1 )*Z( IP1 ) +
-     $                         DELTA( I )*DELTA( I )*( DPSI+DPHI )
-                        END IF
-                     ELSE
-                        A = DELTA( I )*DELTA( I )*DPSI +
-     $                      DELTA( IP1 )*DELTA( IP1 )*DPHI
-                     END IF
-                  END IF
-                  ETA = B / A
-               ELSE IF( A.LE.ZERO ) THEN
-                  ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
-               ELSE
-                  ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
-               END IF
-            ELSE
-*
-*              Interpolation using THREE most relevant poles
-*
-               TEMP = RHOINV + PSI + PHI
-               IF( SWTCH ) THEN
-                  C = TEMP - DELTA( IIM1 )*DPSI - DELTA( IIP1 )*DPHI
-                  ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )*DPSI
-                  ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )*DPHI
-               ELSE
-                  IF( ORGATI ) THEN
-                     TEMP1 = Z( IIM1 ) / DELTA( IIM1 )
-                     TEMP1 = TEMP1*TEMP1
-                     C = TEMP - DELTA( IIP1 )*( DPSI+DPHI ) -
-     $                   ( D( IIM1 )-D( IIP1 ) )*TEMP1
-                     ZZ( 1 ) = Z( IIM1 )*Z( IIM1 )
-                     ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )*
-     $                         ( ( DPSI-TEMP1 )+DPHI )
-                  ELSE
-                     TEMP1 = Z( IIP1 ) / DELTA( IIP1 )
-                     TEMP1 = TEMP1*TEMP1
-                     C = TEMP - DELTA( IIM1 )*( DPSI+DPHI ) -
-     $                   ( D( IIP1 )-D( IIM1 ) )*TEMP1
-                     ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )*
-     $                         ( DPSI+( DPHI-TEMP1 ) )
-                     ZZ( 3 ) = Z( IIP1 )*Z( IIP1 )
-                  END IF
-               END IF
-               CALL DLAED6( NITER, ORGATI, C, DELTA( IIM1 ), ZZ, W, ETA,
-     $                      INFO )
-               IF( INFO.NE.0 )
-     $            GO TO 250
-            END IF
-*
-*           Note, eta should be positive if w is negative, and
-*           eta should be negative otherwise. However,
-*           if for some reason caused by roundoff, eta*w > 0,
-*           we simply use one Newton step instead. This way
-*           will guarantee eta*w < 0.
-*
-            IF( W*ETA.GE.ZERO )
-     $         ETA = -W / DW
-            TEMP = TAU + ETA
-            IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN
-               IF( W.LT.ZERO ) THEN
-                  ETA = ( DLTUB-TAU ) / TWO
-               ELSE
-                  ETA = ( DLTLB-TAU ) / TWO
-               END IF
-            END IF
-*
-            DO 210 J = 1, N
-               DELTA( J ) = DELTA( J ) - ETA
-  210       CONTINUE
-*
-            TAU = TAU + ETA
-            PREW = W
-*
-*           Evaluate PSI and the derivative DPSI
-*
-            DPSI = ZERO
-            PSI = ZERO
-            ERRETM = ZERO
-            DO 220 J = 1, IIM1
-               TEMP = Z( J ) / DELTA( J )
-               PSI = PSI + Z( J )*TEMP
-               DPSI = DPSI + TEMP*TEMP
-               ERRETM = ERRETM + PSI
-  220       CONTINUE
-            ERRETM = ABS( ERRETM )
-*
-*           Evaluate PHI and the derivative DPHI
-*
-            DPHI = ZERO
-            PHI = ZERO
-            DO 230 J = N, IIP1, -1
-               TEMP = Z( J ) / DELTA( J )
-               PHI = PHI + Z( J )*TEMP
-               DPHI = DPHI + TEMP*TEMP
-               ERRETM = ERRETM + PHI
-  230       CONTINUE
-*
-            TEMP = Z( II ) / DELTA( II )
-            DW = DPSI + DPHI + TEMP*TEMP
-            TEMP = Z( II )*TEMP
-            W = RHOINV + PHI + PSI + TEMP
-            ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV +
-     $               THREE*ABS( TEMP ) + ABS( TAU )*DW
-            IF( W*PREW.GT.ZERO .AND. ABS( W ).GT.ABS( PREW ) / TEN )
-     $         SWTCH = .NOT.SWTCH
-*
-  240    CONTINUE
-*
-*        Return with INFO = 1, NITER = MAXIT and not converged
-*
-         INFO = 1
-         IF( ORGATI ) THEN
-            DLAM = D( I ) + TAU
-         ELSE
-            DLAM = D( IP1 ) + TAU
-         END IF
-*
-      END IF
-*
-  250 CONTINUE
-*
-      RETURN
-*
-*     End of DLAED4
-*
-      END
--- a/lib/linalg/dlaed5.cpp
+++ b/lib/linalg/dlaed5.cpp
@ -0,0 +1,58 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+int dlaed5_(integer *i__, doublereal *d__, doublereal *z__, doublereal *delta, doublereal *rho,
+            doublereal *dlam)
+{
+    doublereal d__1;
+    double sqrt(doublereal);
+    doublereal b, c__, w, del, tau, temp;
+    --delta;
+    --z__;
+    --d__;
+    del = d__[2] - d__[1];
+    if (*i__ == 1) {
+        w = *rho * 2. * (z__[2] * z__[2] - z__[1] * z__[1]) / del + 1.;
+        if (w > 0.) {
+            b = del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+            c__ = *rho * z__[1] * z__[1] * del;
+            tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1))));
+            *dlam = d__[1] + tau;
+            delta[1] = -z__[1] / tau;
+            delta[2] = z__[2] / (del - tau);
+        } else {
+            b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+            c__ = *rho * z__[2] * z__[2] * del;
+            if (b > 0.) {
+                tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.));
+            } else {
+                tau = (b - sqrt(b * b + c__ * 4.)) / 2.;
+            }
+            *dlam = d__[2] + tau;
+            delta[1] = -z__[1] / (del + tau);
+            delta[2] = -z__[2] / tau;
+        }
+        temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
+        delta[1] /= temp;
+        delta[2] /= temp;
+    } else {
+        b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+        c__ = *rho * z__[2] * z__[2] * del;
+        if (b > 0.) {
+            tau = (b + sqrt(b * b + c__ * 4.)) / 2.;
+        } else {
+            tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.));
+        }
+        *dlam = d__[2] + tau;
+        delta[1] = -z__[1] / (del + tau);
+        delta[2] = -z__[2] / tau;
+        temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
+        delta[1] /= temp;
+        delta[2] /= temp;
+    }
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dlaed5.f
+++ b/lib/linalg/dlaed5.f
@ -1,186 +0,0 @@
-*> \brief \b DLAED5 used by DSTEDC. Solves the 2-by-2 secular equation.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DLAED5 + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed5.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed5.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed5.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DLAED5( I, D, Z, DELTA, RHO, DLAM )
-*
-*       .. Scalar Arguments ..
-*       INTEGER            I
-*       DOUBLE PRECISION   DLAM, RHO
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION   D( 2 ), DELTA( 2 ), Z( 2 )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> This subroutine computes the I-th eigenvalue of a symmetric rank-one
-*> modification of a 2-by-2 diagonal matrix
-*>
-*>            diag( D )  +  RHO * Z * transpose(Z) .
-*>
-*> The diagonal elements in the array D are assumed to satisfy
-*>
-*>            D(i) < D(j)  for  i < j .
-*>
-*> We also assume RHO > 0 and that the Euclidean norm of the vector
-*> Z is one.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] I
-*> \verbatim
-*>          I is INTEGER
-*>         The index of the eigenvalue to be computed.  I = 1 or I = 2.
-*> \endverbatim
-*>
-*> \param[in] D
-*> \verbatim
-*>          D is DOUBLE PRECISION array, dimension (2)
-*>         The original eigenvalues.  We assume D(1) < D(2).
-*> \endverbatim
-*>
-*> \param[in] Z
-*> \verbatim
-*>          Z is DOUBLE PRECISION array, dimension (2)
-*>         The components of the updating vector.
-*> \endverbatim
-*>
-*> \param[out] DELTA
-*> \verbatim
-*>          DELTA is DOUBLE PRECISION array, dimension (2)
-*>         The vector DELTA contains the information necessary
-*>         to construct the eigenvectors.
-*> \endverbatim
-*>
-*> \param[in] RHO
-*> \verbatim
-*>          RHO is DOUBLE PRECISION
-*>         The scalar in the symmetric updating formula.
-*> \endverbatim
-*>
-*> \param[out] DLAM
-*> \verbatim
-*>          DLAM is DOUBLE PRECISION
-*>         The computed lambda_I, the I-th updated eigenvalue.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup auxOTHERcomputational
-*
-*> \par Contributors:
-*  ==================
-*>
-*>     Ren-Cang Li, Computer Science Division, University of California
-*>     at Berkeley, USA
-*>
-*  =====================================================================
-      SUBROUTINE DLAED5( I, D, Z, DELTA, RHO, DLAM )
-*
-*  -- LAPACK computational routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      INTEGER            I
-      DOUBLE PRECISION   DLAM, RHO
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION   D( 2 ), DELTA( 2 ), Z( 2 )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION   ZERO, ONE, TWO, FOUR
-      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0,
-     $                   FOUR = 4.0D0 )
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION   B, C, DEL, TAU, TEMP, W
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      DEL = D( 2 ) - D( 1 )
-      IF( I.EQ.1 ) THEN
-         W = ONE + TWO*RHO*( Z( 2 )*Z( 2 )-Z( 1 )*Z( 1 ) ) / DEL
-         IF( W.GT.ZERO ) THEN
-            B = DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
-            C = RHO*Z( 1 )*Z( 1 )*DEL
-*
-*           B > ZERO, always
-*
-            TAU = TWO*C / ( B+SQRT( ABS( B*B-FOUR*C ) ) )
-            DLAM = D( 1 ) + TAU
-            DELTA( 1 ) = -Z( 1 ) / TAU
-            DELTA( 2 ) = Z( 2 ) / ( DEL-TAU )
-         ELSE
-            B = -DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
-            C = RHO*Z( 2 )*Z( 2 )*DEL
-            IF( B.GT.ZERO ) THEN
-               TAU = -TWO*C / ( B+SQRT( B*B+FOUR*C ) )
-            ELSE
-               TAU = ( B-SQRT( B*B+FOUR*C ) ) / TWO
-            END IF
-            DLAM = D( 2 ) + TAU
-            DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
-            DELTA( 2 ) = -Z( 2 ) / TAU
-         END IF
-         TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
-         DELTA( 1 ) = DELTA( 1 ) / TEMP
-         DELTA( 2 ) = DELTA( 2 ) / TEMP
-      ELSE
-*
-*     Now I=2
-*
-         B = -DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
-         C = RHO*Z( 2 )*Z( 2 )*DEL
-         IF( B.GT.ZERO ) THEN
-            TAU = ( B+SQRT( B*B+FOUR*C ) ) / TWO
-         ELSE
-            TAU = TWO*C / ( -B+SQRT( B*B+FOUR*C ) )
-         END IF
-         DLAM = D( 2 ) + TAU
-         DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
-         DELTA( 2 ) = -Z( 2 ) / TAU
-         TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
-         DELTA( 1 ) = DELTA( 1 ) / TEMP
-         DELTA( 2 ) = DELTA( 2 ) / TEMP
-      END IF
-      RETURN
-*
-*     End of DLAED5
-*
-      END
--- a/lib/linalg/dlaed6.cpp
+++ b/lib/linalg/dlaed6.cpp
@ -0,0 +1,209 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+int dlaed6_(integer *kniter, logical *orgati, doublereal *rho, doublereal *d__, doublereal *z__,
+            doublereal *finit, doublereal *tau, integer *info)
+{
+    integer i__1;
+    doublereal d__1, d__2, d__3, d__4;
+    double sqrt(doublereal), log(doublereal), pow_lmp_di(doublereal *, integer *);
+    doublereal a, b, c__, f;
+    integer i__;
+    doublereal fc, df, ddf, lbd, eta, ubd, eps, base;
+    integer iter;
+    doublereal temp, temp1, temp2, temp3, temp4;
+    logical scale;
+    integer niter;
+    doublereal small1, small2, sminv1, sminv2;
+    extern doublereal dlamch_(char *, ftnlen);
+    doublereal dscale[3], sclfac, zscale[3], erretm, sclinv;
+    --z__;
+    --d__;
+    *info = 0;
+    if (*orgati) {
+        lbd = d__[2];
+        ubd = d__[3];
+    } else {
+        lbd = d__[1];
+        ubd = d__[2];
+    }
+    if (*finit < 0.) {
+        lbd = 0.;
+    } else {
+        ubd = 0.;
+    }
+    niter = 1;
+    *tau = 0.;
+    if (*kniter == 2) {
+        if (*orgati) {
+            temp = (d__[3] - d__[2]) / 2.;
+            c__ = *rho + z__[1] / (d__[1] - d__[2] - temp);
+            a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3];
+            b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2];
+        } else {
+            temp = (d__[1] - d__[2]) / 2.;
+            c__ = *rho + z__[3] / (d__[3] - d__[2] - temp);
+            a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2];
+            b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1];
+        }
+        d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1, d__2), d__2 = abs(c__);
+        temp = max(d__1, d__2);
+        a /= temp;
+        b /= temp;
+        c__ /= temp;
+        if (c__ == 0.) {
+            *tau = b / a;
+        } else if (a <= 0.) {
+            *tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
+        } else {
+            *tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
+        }
+        if (*tau < lbd || *tau > ubd) {
+            *tau = (lbd + ubd) / 2.;
+        }
+        if (d__[1] == *tau || d__[2] == *tau || d__[3] == *tau) {
+            *tau = 0.;
+        } else {
+            temp = *finit + *tau * z__[1] / (d__[1] * (d__[1] - *tau)) +
+                   *tau * z__[2] / (d__[2] * (d__[2] - *tau)) +
+                   *tau * z__[3] / (d__[3] * (d__[3] - *tau));
+            if (temp <= 0.) {
+                lbd = *tau;
+            } else {
+                ubd = *tau;
+            }
+            if (abs(*finit) <= abs(temp)) {
+                *tau = 0.;
+            }
+        }
+    }
+    eps = dlamch_((char *)"Epsilon", (ftnlen)7);
+    base = dlamch_((char *)"Base", (ftnlen)4);
+    i__1 = (integer)(log(dlamch_((char *)"SafMin", (ftnlen)6)) / log(base) / 3.);
+    small1 = pow_lmp_di(&base, &i__1);
+    sminv1 = 1. / small1;
+    small2 = small1 * small1;
+    sminv2 = sminv1 * sminv1;
+    if (*orgati) {
+        d__3 = (d__1 = d__[2] - *tau, abs(d__1)), d__4 = (d__2 = d__[3] - *tau, abs(d__2));
+        temp = min(d__3, d__4);
+    } else {
+        d__3 = (d__1 = d__[1] - *tau, abs(d__1)), d__4 = (d__2 = d__[2] - *tau, abs(d__2));
+        temp = min(d__3, d__4);
+    }
+    scale = FALSE_;
+    if (temp <= small1) {
+        scale = TRUE_;
+        if (temp <= small2) {
+            sclfac = sminv2;
+            sclinv = small2;
+        } else {
+            sclfac = sminv1;
+            sclinv = small1;
+        }
+        for (i__ = 1; i__ <= 3; ++i__) {
+            dscale[i__ - 1] = d__[i__] * sclfac;
+            zscale[i__ - 1] = z__[i__] * sclfac;
+        }
+        *tau *= sclfac;
+        lbd *= sclfac;
+        ubd *= sclfac;
+    } else {
+        for (i__ = 1; i__ <= 3; ++i__) {
+            dscale[i__ - 1] = d__[i__];
+            zscale[i__ - 1] = z__[i__];
+        }
+    }
+    fc = 0.;
+    df = 0.;
+    ddf = 0.;
+    for (i__ = 1; i__ <= 3; ++i__) {
+        temp = 1. / (dscale[i__ - 1] - *tau);
+        temp1 = zscale[i__ - 1] * temp;
+        temp2 = temp1 * temp;
+        temp3 = temp2 * temp;
+        fc += temp1 / dscale[i__ - 1];
+        df += temp2;
+        ddf += temp3;
+    }
+    f = *finit + *tau * fc;
+    if (abs(f) <= 0.) {
+        goto L60;
+    }
+    if (f <= 0.) {
+        lbd = *tau;
+    } else {
+        ubd = *tau;
+    }
+    iter = niter + 1;
+    for (niter = iter; niter <= 40; ++niter) {
+        if (*orgati) {
+            temp1 = dscale[1] - *tau;
+            temp2 = dscale[2] - *tau;
+        } else {
+            temp1 = dscale[0] - *tau;
+            temp2 = dscale[1] - *tau;
+        }
+        a = (temp1 + temp2) * f - temp1 * temp2 * df;
+        b = temp1 * temp2 * f;
+        c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf;
+        d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1, d__2), d__2 = abs(c__);
+        temp = max(d__1, d__2);
+        a /= temp;
+        b /= temp;
+        c__ /= temp;
+        if (c__ == 0.) {
+            eta = b / a;
+        } else if (a <= 0.) {
+            eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
+        } else {
+            eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
+        }
+        if (f * eta >= 0.) {
+            eta = -f / df;
+        }
+        *tau += eta;
+        if (*tau < lbd || *tau > ubd) {
+            *tau = (lbd + ubd) / 2.;
+        }
+        fc = 0.;
+        erretm = 0.;
+        df = 0.;
+        ddf = 0.;
+        for (i__ = 1; i__ <= 3; ++i__) {
+            if (dscale[i__ - 1] - *tau != 0.) {
+                temp = 1. / (dscale[i__ - 1] - *tau);
+                temp1 = zscale[i__ - 1] * temp;
+                temp2 = temp1 * temp;
+                temp3 = temp2 * temp;
+                temp4 = temp1 / dscale[i__ - 1];
+                fc += temp4;
+                erretm += abs(temp4);
+                df += temp2;
+                ddf += temp3;
+            } else {
+                goto L60;
+            }
+        }
+        f = *finit + *tau * fc;
+        erretm = (abs(*finit) + abs(*tau) * erretm) * 8. + abs(*tau) * df;
+        if (abs(f) <= eps * 4. * erretm || ubd - lbd <= eps * 4. * abs(*tau)) {
+            goto L60;
+        }
+        if (f <= 0.) {
+            lbd = *tau;
+        } else {
+            ubd = *tau;
+        }
+    }
+    *info = 1;
+L60:
+    if (scale) {
+        *tau *= sclinv;
+    }
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/lib/linalg/dlaed6.f
+++ b/lib/linalg/dlaed6.f
@ -1,407 +0,0 @@
-*> \brief \b DLAED6 used by DSTEDC. Computes one Newton step in solution of the secular equation.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DLAED6 + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed6.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed6.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed6.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DLAED6( KNITER, ORGATI, RHO, D, Z, FINIT, TAU, INFO )
-*
-*       .. Scalar Arguments ..
-*       LOGICAL            ORGATI
-*       INTEGER            INFO, KNITER
-*       DOUBLE PRECISION   FINIT, RHO, TAU
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION   D( 3 ), Z( 3 )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DLAED6 computes the positive or negative root (closest to the origin)
-*> of
-*>                  z(1)        z(2)        z(3)
-*> f(x) =   rho + --------- + ---------- + ---------
-*>                 d(1)-x      d(2)-x      d(3)-x
-*>
-*> It is assumed that
-*>
-*>       if ORGATI = .true. the root is between d(2) and d(3);
-*>       otherwise it is between d(1) and d(2)
-*>
-*> This routine will be called by DLAED4 when necessary. In most cases,
-*> the root sought is the smallest in magnitude, though it might not be
-*> in some extremely rare situations.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] KNITER
-*> \verbatim
-*>          KNITER is INTEGER
-*>               Refer to DLAED4 for its significance.
-*> \endverbatim
-*>
-*> \param[in] ORGATI
-*> \verbatim
-*>          ORGATI is LOGICAL
-*>               If ORGATI is true, the needed root is between d(2) and
-*>               d(3); otherwise it is between d(1) and d(2).  See
-*>               DLAED4 for further details.
-*> \endverbatim
-*>
-*> \param[in] RHO
-*> \verbatim
-*>          RHO is DOUBLE PRECISION
-*>               Refer to the equation f(x) above.
-*> \endverbatim
-*>
-*> \param[in] D
-*> \verbatim
-*>          D is DOUBLE PRECISION array, dimension (3)
-*>               D satisfies d(1) < d(2) < d(3).
-*> \endverbatim
-*>
-*> \param[in] Z
-*> \verbatim
-*>          Z is DOUBLE PRECISION array, dimension (3)
-*>               Each of the elements in z must be positive.
-*> \endverbatim
-*>
-*> \param[in] FINIT
-*> \verbatim
-*>          FINIT is DOUBLE PRECISION
-*>               The value of f at 0. It is more accurate than the one
-*>               evaluated inside this routine (if someone wants to do
-*>               so).
-*> \endverbatim
-*>
-*> \param[out] TAU
-*> \verbatim
-*>          TAU is DOUBLE PRECISION
-*>               The root of the equation f(x).
-*> \endverbatim
-*>
-*> \param[out] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>               = 0: successful exit
-*>               > 0: if INFO = 1, failure to converge
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \ingroup auxOTHERcomputational
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  10/02/03: This version has a few statements commented out for thread
-*>  safety (machine parameters are computed on each entry). SJH.
-*>
-*>  05/10/06: Modified from a new version of Ren-Cang Li, use
-*>     Gragg-Thornton-Warner cubic convergent scheme for better stability.
-*> \endverbatim
-*
-*> \par Contributors:
-*  ==================
-*>
-*>     Ren-Cang Li, Computer Science Division, University of California
-*>     at Berkeley, USA
-*>
-*  =====================================================================
-      SUBROUTINE DLAED6( KNITER, ORGATI, RHO, D, Z, FINIT, TAU, INFO )
-*
-*  -- LAPACK computational routine --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-*     .. Scalar Arguments ..
-      LOGICAL            ORGATI
-      INTEGER            INFO, KNITER
-      DOUBLE PRECISION   FINIT, RHO, TAU
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION   D( 3 ), Z( 3 )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      INTEGER            MAXIT
-      PARAMETER          ( MAXIT = 40 )
-      DOUBLE PRECISION   ZERO, ONE, TWO, THREE, FOUR, EIGHT
-      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0,
-     $                   THREE = 3.0D0, FOUR = 4.0D0, EIGHT = 8.0D0 )
-*     ..
-*     .. External Functions ..
-      DOUBLE PRECISION   DLAMCH
-      EXTERNAL           DLAMCH
-*     ..
-*     .. Local Arrays ..
-      DOUBLE PRECISION   DSCALE( 3 ), ZSCALE( 3 )
-*     ..
-*     .. Local Scalars ..
-      LOGICAL            SCALE
-      INTEGER            I, ITER, NITER
-      DOUBLE PRECISION   A, B, BASE, C, DDF, DF, EPS, ERRETM, ETA, F,
-     $                   FC, SCLFAC, SCLINV, SMALL1, SMALL2, SMINV1,
-     $                   SMINV2, TEMP, TEMP1, TEMP2, TEMP3, TEMP4,
-     $                   LBD, UBD
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, INT, LOG, MAX, MIN, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      INFO = 0
-*
-      IF( ORGATI ) THEN
-         LBD = D(2)
-         UBD = D(3)
-      ELSE
-         LBD = D(1)
-         UBD = D(2)
-      END IF
-      IF( FINIT .LT. ZERO )THEN
-         LBD = ZERO
-      ELSE
-         UBD = ZERO
-      END IF
-*
-      NITER = 1
-      TAU = ZERO
-      IF( KNITER.EQ.2 ) THEN
-         IF( ORGATI ) THEN
-            TEMP = ( D( 3 )-D( 2 ) ) / TWO
-            C = RHO + Z( 1 ) / ( ( D( 1 )-D( 2 ) )-TEMP )
-            A = C*( D( 2 )+D( 3 ) ) + Z( 2 ) + Z( 3 )
-            B = C*D( 2 )*D( 3 ) + Z( 2 )*D( 3 ) + Z( 3 )*D( 2 )
-         ELSE
-            TEMP = ( D( 1 )-D( 2 ) ) / TWO
-            C = RHO + Z( 3 ) / ( ( D( 3 )-D( 2 ) )-TEMP )
-            A = C*( D( 1 )+D( 2 ) ) + Z( 1 ) + Z( 2 )
-            B = C*D( 1 )*D( 2 ) + Z( 1 )*D( 2 ) + Z( 2 )*D( 1 )
-         END IF
-         TEMP = MAX( ABS( A ), ABS( B ), ABS( C ) )
-         A = A / TEMP
-         B = B / TEMP
-         C = C / TEMP
-         IF( C.EQ.ZERO ) THEN
-            TAU = B / A
-         ELSE IF( A.LE.ZERO ) THEN
-            TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
-         ELSE
-            TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
-         END IF
-         IF( TAU .LT. LBD .OR. TAU .GT. UBD )
-     $      TAU = ( LBD+UBD )/TWO
-         IF( D(1).EQ.TAU .OR. D(2).EQ.TAU .OR. D(3).EQ.TAU ) THEN
-            TAU = ZERO
-         ELSE
-            TEMP = FINIT + TAU*Z(1)/( D(1)*( D( 1 )-TAU ) ) +
-     $                     TAU*Z(2)/( D(2)*( D( 2 )-TAU ) ) +
-     $                     TAU*Z(3)/( D(3)*( D( 3 )-TAU ) )
-            IF( TEMP .LE. ZERO )THEN
-               LBD = TAU
-            ELSE
-               UBD = TAU
-            END IF
-            IF( ABS( FINIT ).LE.ABS( TEMP ) )
-     $         TAU = ZERO
-         END IF
-      END IF
-*
-*     get machine parameters for possible scaling to avoid overflow
-*
-*     modified by Sven: parameters SMALL1, SMINV1, SMALL2,
-*     SMINV2, EPS are not SAVEd anymore between one call to the
-*     others but recomputed at each call
-*
-      EPS = DLAMCH( 'Epsilon' )
-      BASE = DLAMCH( 'Base' )
-      SMALL1 = BASE**( INT( LOG( DLAMCH( 'SafMin' ) ) / LOG( BASE ) /
-     $         THREE ) )
-      SMINV1 = ONE / SMALL1
-      SMALL2 = SMALL1*SMALL1
-      SMINV2 = SMINV1*SMINV1
-*
-*     Determine if scaling of inputs necessary to avoid overflow
-*     when computing 1/TEMP**3
-*
-      IF( ORGATI ) THEN
-         TEMP = MIN( ABS( D( 2 )-TAU ), ABS( D( 3 )-TAU ) )
-      ELSE
-         TEMP = MIN( ABS( D( 1 )-TAU ), ABS( D( 2 )-TAU ) )
-      END IF
-      SCALE = .FALSE.
-      IF( TEMP.LE.SMALL1 ) THEN
-         SCALE = .TRUE.
-         IF( TEMP.LE.SMALL2 ) THEN
-*
-*        Scale up by power of radix nearest 1/SAFMIN**(2/3)
-*
-            SCLFAC = SMINV2
-            SCLINV = SMALL2
-         ELSE
-*
-*        Scale up by power of radix nearest 1/SAFMIN**(1/3)
-*
-            SCLFAC = SMINV1
-            SCLINV = SMALL1
-         END IF
-*
-*        Scaling up safe because D, Z, TAU scaled elsewhere to be O(1)
-*
-         DO 10 I = 1, 3
-            DSCALE( I ) = D( I )*SCLFAC
-            ZSCALE( I ) = Z( I )*SCLFAC
-   10    CONTINUE
-         TAU = TAU*SCLFAC
-         LBD = LBD*SCLFAC
-         UBD = UBD*SCLFAC
-      ELSE
-*
-*        Copy D and Z to DSCALE and ZSCALE
-*
-         DO 20 I = 1, 3
-            DSCALE( I ) = D( I )
-            ZSCALE( I ) = Z( I )
-   20    CONTINUE
-      END IF
-*
-      FC = ZERO
-      DF = ZERO
-      DDF = ZERO
-      DO 30 I = 1, 3
-         TEMP = ONE / ( DSCALE( I )-TAU )
-         TEMP1 = ZSCALE( I )*TEMP
-         TEMP2 = TEMP1*TEMP
-         TEMP3 = TEMP2*TEMP
-         FC = FC + TEMP1 / DSCALE( I )
-         DF = DF + TEMP2
-         DDF = DDF + TEMP3
-   30 CONTINUE
-      F = FINIT + TAU*FC
-*
-      IF( ABS( F ).LE.ZERO )
-     $   GO TO 60
-      IF( F .LE. ZERO )THEN
-         LBD = TAU
-      ELSE
-         UBD = TAU
-      END IF
-*
-*        Iteration begins -- Use Gragg-Thornton-Warner cubic convergent
-*                            scheme
-*
-*     It is not hard to see that
-*
-*           1) Iterations will go up monotonically
-*              if FINIT < 0;
-*
-*           2) Iterations will go down monotonically
-*              if FINIT > 0.
-*
-      ITER = NITER + 1
-*
-      DO 50 NITER = ITER, MAXIT
-*
-         IF( ORGATI ) THEN
-            TEMP1 = DSCALE( 2 ) - TAU
-            TEMP2 = DSCALE( 3 ) - TAU
-         ELSE
-            TEMP1 = DSCALE( 1 ) - TAU
-            TEMP2 = DSCALE( 2 ) - TAU
-         END IF
-         A = ( TEMP1+TEMP2 )*F - TEMP1*TEMP2*DF
-         B = TEMP1*TEMP2*F
-         C = F - ( TEMP1+TEMP2 )*DF + TEMP1*TEMP2*DDF
-         TEMP = MAX( ABS( A ), ABS( B ), ABS( C ) )
-         A = A / TEMP
-         B = B / TEMP
-         C = C / TEMP
-         IF( C.EQ.ZERO ) THEN
-            ETA = B / A
-         ELSE IF( A.LE.ZERO ) THEN
-            ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
-         ELSE
-            ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
-         END IF
-         IF( F*ETA.GE.ZERO ) THEN
-            ETA = -F / DF
-         END IF
-*
-         TAU = TAU + ETA
-         IF( TAU .LT. LBD .OR. TAU .GT. UBD )
-     $      TAU = ( LBD + UBD )/TWO
-*
-         FC = ZERO
-         ERRETM = ZERO
-         DF = ZERO
-         DDF = ZERO
-         DO 40 I = 1, 3
-            IF ( ( DSCALE( I )-TAU ).NE.ZERO ) THEN
-               TEMP = ONE / ( DSCALE( I )-TAU )
-               TEMP1 = ZSCALE( I )*TEMP
-               TEMP2 = TEMP1*TEMP
-               TEMP3 = TEMP2*TEMP
-               TEMP4 = TEMP1 / DSCALE( I )
-               FC = FC + TEMP4
-               ERRETM = ERRETM + ABS( TEMP4 )
-               DF = DF + TEMP2
-               DDF = DDF + TEMP3
-            ELSE
-               GO TO 60
-            END IF
-   40    CONTINUE
-         F = FINIT + TAU*FC
-         ERRETM = EIGHT*( ABS( FINIT )+ABS( TAU )*ERRETM ) +
-     $            ABS( TAU )*DF
-         IF( ( ABS( F ).LE.FOUR*EPS*ERRETM ) .OR.
-     $      ( (UBD-LBD).LE.FOUR*EPS*ABS(TAU) )  )
-     $      GO TO 60
-         IF( F .LE. ZERO )THEN
-            LBD = TAU
-         ELSE
-            UBD = TAU
-         END IF
-   50 CONTINUE
-      INFO = 1
-   60 CONTINUE
-*
-*     Undo scaling
-*
-      IF( SCALE )
-     $   TAU = TAU*SCLINV
-      RETURN
-*
-*     End of DLAED6
-*
-      END
--- a/lib/linalg/dlaed7.cpp
+++ b/lib/linalg/dlaed7.cpp
@ -0,0 +1,131 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "lmp_f2c.h"
+static integer c__2 = 2;
+static integer c__1 = 1;
+static doublereal c_b10 = 1.;
+static doublereal c_b11 = 0.;
+static integer c_n1 = -1;
+int dlaed7_(integer *icompq, integer *n, integer *qsiz, integer *tlvls, integer *curlvl,
+            integer *curpbm, doublereal *d__, doublereal *q, integer *ldq, integer *indxq,
+            doublereal *rho, integer *cutpnt, doublereal *qstore, integer *qptr, integer *prmptr,
+            integer *perm, integer *givptr, integer *givcol, doublereal *givnum, doublereal *work,
+            integer *iwork, integer *info)
+{
+    integer q_dim1, q_offset, i__1, i__2;
+    integer pow_lmp_ii(integer *, integer *);
+    integer i__, k, n1, n2, is, iw, iz, iq2, ptr, ldq2, indx, curr;
+    extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
+                      integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
+                      ftnlen, ftnlen);
+    integer indxc, indxp;
+    extern int dlaed8_(integer *, integer *, integer *, integer *, doublereal *, doublereal *,
+                       integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
+                       doublereal *, integer *, doublereal *, integer *, integer *, integer *,
+                       doublereal *, integer *, integer *, integer *),
+        dlaed9_(integer *, integer *, integer *, integer *, doublereal *, doublereal *, integer *,
+                doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *),
+        dlaeda_(integer *, integer *, integer *, integer *, integer *, integer *, integer *,
+                integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *,
+                integer *);
+    integer idlmda;
+    extern int dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *),
+        xerbla_(char *, integer *, ftnlen);
+    integer coltyp;
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --indxq;
+    --qstore;
+    --qptr;
+    --prmptr;
+    --perm;
+    --givptr;
+    givcol -= 3;
+    givnum -= 3;
+    --work;
+    --iwork;
+    *info = 0;
+    if (*icompq < 0 || *icompq > 1) {
+        *info = -1;
+    } else if (*n < 0) {
+        *info = -2;
+    } else if (*icompq == 1 && *qsiz < *n) {
+        *info = -3;
+    } else if (*ldq < max(1, *n)) {
+        *info = -9;
+    } else if (min(1, *n) > *cutpnt || *n < *cutpnt) {
+        *info = -12;
+    }
+    if (*info != 0) {
+        i__1 = -(*info);
+        xerbla_((char *)"DLAED7", &i__1, (ftnlen)6);
+        return 0;
+    }
+    if (*n == 0) {
+        return 0;
+    }
+    if (*icompq == 1) {
+        ldq2 = *qsiz;
+    } else {
+        ldq2 = *n;
+    }
+    iz = 1;
+    idlmda = iz + *n;
+    iw = idlmda + *n;
+    iq2 = iw + *n;
+    is = iq2 + *n * ldq2;
+    indx = 1;
+    indxc = indx + *n;
+    coltyp = indxc + *n;
+    indxp = coltyp + *n;
+    ptr = pow_lmp_ii(&c__2, tlvls) + 1;
+    i__1 = *curlvl - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+        i__2 = *tlvls - i__;
+        ptr += pow_lmp_ii(&c__2, &i__2);
+    }
+    curr = ptr + *curpbm;
+    dlaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], &givcol[3], &givnum[3],
+            &qstore[1], &qptr[1], &work[iz], &work[iz + *n], info);
+    if (*curlvl == *tlvls) {
+        qptr[curr] = 1;
+        prmptr[curr] = 1;
+        givptr[curr] = 1;
+    }
+    dlaed8_(icompq, &k, n, qsiz, &d__[1], &q[q_offset], ldq, &indxq[1], rho, cutpnt, &work[iz],
+            &work[idlmda], &work[iq2], &ldq2, &work[iw], &perm[prmptr[curr]], &givptr[curr + 1],
+            &givcol[(givptr[curr] << 1) + 1], &givnum[(givptr[curr] << 1) + 1], &iwork[indxp],
+            &iwork[indx], info);
+    prmptr[curr + 1] = prmptr[curr] + *n;
+    givptr[curr + 1] += givptr[curr];
+    if (k != 0) {
+        dlaed9_(&k, &c__1, &k, n, &d__[1], &work[is], &k, rho, &work[idlmda], &work[iw],
+                &qstore[qptr[curr]], &k, info);
+        if (*info != 0) {
+            goto L30;
+        }
+        if (*icompq == 1) {
+            dgemm_((char *)"N", (char *)"N", qsiz, &k, &k, &c_b10, &work[iq2], &ldq2, &qstore[qptr[curr]], &k,
+                   &c_b11, &q[q_offset], ldq, (ftnlen)1, (ftnlen)1);
+        }
+        i__1 = k;
+        qptr[curr + 1] = qptr[curr] + i__1 * i__1;
+        n1 = k;
+        n2 = *n - k;
+        dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
+    } else {
+        qptr[curr + 1] = qptr[curr];
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+            indxq[i__] = i__;
+        }
+    }
+L30:
+    return 0;
+}
+#ifdef __cplusplus
+}
+#endif
--- a/Show More
+++ b/Show More