Merge pull request #3579 from akohlmey/linalg-in-cpp

Convert linalg library from Fortran to C++
This commit is contained in:
Axel Kohlmeyer
2023-01-05 21:16:06 -05:00
committed by GitHub
431 changed files with 31249 additions and 63712 deletions

View File

@ -440,16 +440,12 @@ if(PKG_MSCG OR PKG_ATC OR PKG_AWPMD OR PKG_ML-QUIP OR PKG_ML-POD OR PKG_LATTE OR
find_package(BLAS) find_package(BLAS)
endif() endif()
if(NOT LAPACK_FOUND OR NOT BLAS_FOUND OR USE_INTERNAL_LINALG) if(NOT LAPACK_FOUND OR NOT BLAS_FOUND OR USE_INTERNAL_LINALG)
include(CheckGeneratorSupport) file(GLOB LINALG_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/linalg/[^.]*.cpp)
if(NOT CMAKE_GENERATOR_SUPPORT_FORTRAN) add_library(linalg STATIC ${LINALG_SOURCES})
status(FATAL_ERROR "Cannot build internal linear algebra library as CMake build tool lacks Fortran support")
endif()
enable_language(Fortran)
file(GLOB LAPACK_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/linalg/[^.]*.[fF])
add_library(linalg STATIC ${LAPACK_SOURCES})
set_target_properties(linalg PROPERTIES OUTPUT_NAME lammps_linalg${LAMMPS_MACHINE}) set_target_properties(linalg PROPERTIES OUTPUT_NAME lammps_linalg${LAMMPS_MACHINE})
set(BLAS_LIBRARIES "$<TARGET_FILE:linalg>") set(BLAS_LIBRARIES "$<TARGET_FILE:linalg>")
set(LAPACK_LIBRARIES "$<TARGET_FILE:linalg>") set(LAPACK_LIBRARIES "$<TARGET_FILE:linalg>")
target_link_libraries(lammps PRIVATE linalg)
else() else()
list(APPEND LAPACK_LIBRARIES ${BLAS_LIBRARIES}) list(APPEND LAPACK_LIBRARIES ${BLAS_LIBRARIES})
endif() endif()

View File

@ -72,7 +72,7 @@
"configurationType": "Debug", "configurationType": "Debug",
"buildRoot": "${workspaceRoot}\\build\\${name}", "buildRoot": "${workspaceRoot}\\build\\${name}",
"installRoot": "${workspaceRoot}\\install\\${name}", "installRoot": "${workspaceRoot}\\install\\${name}",
"cmakeCommandArgs": "-C ${workspaceRoot}\\cmake\\presets\\windows.cmake -DCMAKE_C_COMPILER=clang-cl.exe -DCMAKE_CXX_COMPILER=clang-cl.exe", "cmakeCommandArgs": "-C ${workspaceRoot}\\cmake\\presets\\windows.cmake -DCMAKE_C_COMPILER=clang-cl.exe -DCMAKE_CXX_COMPILER=clang-cl.exe -DBUILD_MPI=off",
"buildCommandArgs": "", "buildCommandArgs": "",
"ctestCommandArgs": "", "ctestCommandArgs": "",
"inheritEnvironments": [ "clang_cl_x64" ], "inheritEnvironments": [ "clang_cl_x64" ],
@ -105,7 +105,7 @@
"configurationType": "Release", "configurationType": "Release",
"buildRoot": "${workspaceRoot}\\build\\${name}", "buildRoot": "${workspaceRoot}\\build\\${name}",
"installRoot": "${workspaceRoot}\\install\\${name}", "installRoot": "${workspaceRoot}\\install\\${name}",
"cmakeCommandArgs": "-C ${workspaceRoot}\\cmake\\presets\\windows.cmake -DCMAKE_C_COMPILER=clang-cl.exe -DCMAKE_CXX_COMPILER=clang-cl.exe", "cmakeCommandArgs": "-C ${workspaceRoot}\\cmake\\presets\\windows.cmake -DCMAKE_C_COMPILER=clang-cl.exe -DCMAKE_CXX_COMPILER=clang-cl.exe -DBUILD_MPI=off",
"buildCommandArgs": "", "buildCommandArgs": "",
"ctestCommandArgs": "-V", "ctestCommandArgs": "-V",
"inheritEnvironments": [ "clang_cl_x64" ], "inheritEnvironments": [ "clang_cl_x64" ],
@ -305,4 +305,4 @@
] ]
} }
] ]
} }

View File

@ -1,6 +1,7 @@
set(WIN_PACKAGES set(WIN_PACKAGES
AMOEBA AMOEBA
ASPHERE ASPHERE
AWPMD
BOCS BOCS
BODY BODY
BPM BPM
@ -20,6 +21,7 @@ set(WIN_PACKAGES
DPD-SMOOTH DPD-SMOOTH
DRUDE DRUDE
EFF EFF
ELECTRODE
EXTRA-COMPUTE EXTRA-COMPUTE
EXTRA-DUMP EXTRA-DUMP
EXTRA-FIX EXTRA-FIX
@ -35,6 +37,7 @@ set(WIN_PACKAGES
MEAM MEAM
MISC MISC
ML-IAP ML-IAP
ML-POD
ML-SNAP ML-SNAP
MOFFF MOFFF
MOLECULE MOLECULE

View File

@ -858,11 +858,11 @@ library.
.. code-block:: bash .. code-block:: bash
$ make lib-latte # print help message $ make lib-latte # print help message
$ make lib-latte args="-b" # download and build in lib/latte/LATTE-master $ make lib-latte args="-b" # download and build in lib/latte/LATTE-master
$ make lib-latte args="-p $HOME/latte" # use existing LATTE installation in $HOME/latte $ make lib-latte args="-p $HOME/latte" # use existing LATTE installation in $HOME/latte
$ make lib-latte args="-b -m gfortran" # download and build in lib/latte and $ make lib-latte args="-b -m gfortran" # download and build in lib/latte and
# copy Makefile.lammps.gfortran to Makefile.lammps # copy Makefile.lammps.gfortran to Makefile.lammps
Note that 3 symbolic (soft) links, ``includelink`` and ``liblink`` Note that 3 symbolic (soft) links, ``includelink`` and ``liblink``
and ``filelink.o``, are created in ``lib/latte`` to point to and ``filelink.o``, are created in ``lib/latte`` to point to
@ -1208,10 +1208,10 @@ The ATC package requires the MANYBODY package also be installed.
.. code-block:: bash .. code-block:: bash
$ make lib-linalg # print help message $ make lib-linalg # print help message
$ make lib-linalg args="-m serial" # build with GNU Fortran compiler (settings as with "make serial") $ make lib-linalg args="-m serial" # build with GNU C++ compiler (settings as with "make serial")
$ make lib-linalg args="-m mpi" # build with default MPI Fortran compiler (settings as with "make mpi") $ make lib-linalg args="-m mpi" # build with default MPI C++ compiler (settings as with "make mpi")
$ make lib-linalg args="-m gfortran" # build with GNU Fortran compiler $ make lib-linalg args="-m g++" # build with GNU C++ compiler
---------- ----------
@ -1259,10 +1259,10 @@ AWPMD package
.. code-block:: bash .. code-block:: bash
$ make lib-linalg # print help message $ make lib-linalg # print help message
$ make lib-linalg args="-m serial" # build with GNU Fortran compiler (settings as with "make serial") $ make lib-linalg args="-m serial" # build with GNU C++ compiler (settings as with "make serial")
$ make lib-linalg args="-m mpi" # build with default MPI Fortran compiler (settings as with "make mpi") $ make lib-linalg args="-m mpi" # build with default MPI C++ compiler (settings as with "make mpi")
$ make lib-linalg args="-m gfortran" # build with GNU Fortran compiler $ make lib-linalg args="-m g++" # build with GNU C++ compiler
---------- ----------
@ -1363,10 +1363,10 @@ This package depends on the KSPACE package.
.. code-block:: bash .. code-block:: bash
$ make lib-linalg # print help message $ make lib-linalg # print help message
$ make lib-linalg args="-m serial" # build with GNU Fortran compiler (settings as with "make serial") $ make lib-linalg args="-m serial" # build with GNU C++ compiler (settings as with "make serial")
$ make lib-linalg args="-m mpi" # build with default MPI Fortran compiler (settings as with "make mpi") $ make lib-linalg args="-m mpi" # build with default MPI C++ compiler (settings as with "make mpi")
$ make lib-linalg args="-m gfortran" # build with GNU Fortran compiler $ make lib-linalg args="-m g++" # build with GNU C++ compiler
The package itself is activated with ``make yes-KSPACE`` and The package itself is activated with ``make yes-KSPACE`` and
``make yes-ELECTRODE`` ``make yes-ELECTRODE``
@ -1447,10 +1447,10 @@ ML-POD package
.. code-block:: bash .. code-block:: bash
$ make lib-linalg # print help message $ make lib-linalg # print help message
$ make lib-linalg args="-m serial" # build with GNU Fortran compiler (settings as with "make serial") $ make lib-linalg args="-m serial" # build with GNU C++ compiler (settings as with "make serial")
$ make lib-linalg args="-m mpi" # build with default MPI Fortran compiler (settings as with "make mpi") $ make lib-linalg args="-m mpi" # build with default MPI C++ compiler (settings as with "make mpi")
$ make lib-linalg args="-m gfortran" # build with GNU Fortran compiler $ make lib-linalg args="-m g++" # build with GNU C++ compiler
The package itself is activated with ``make yes-ML-POD``. The package itself is activated with ``make yes-ML-POD``.

View File

@ -1,6 +1,6 @@
# Settings that the LAMMPS build will import when this package library is used # Settings that the LAMMPS build will import when this package library is used
atc_SYSINC = atc_SYSINC =
atc_SYSLIB = -llinalg -lgfortran atc_SYSLIB = -llinalg
atc_SYSPATH = -L../../lib/linalg$(LIBOBJDIR) atc_SYSPATH = -L../../lib/linalg$(LIBOBJDIR)

View File

@ -1,5 +1,5 @@
# Settings that the LAMMPS build will import when this package library is used # Settings that the LAMMPS build will import when this package library is used
awpmd_SYSINC = awpmd_SYSINC =
awpmd_SYSLIB = -llinalg -lgfortran awpmd_SYSLIB = -llinalg
awpmd_SYSPATH = -L../../lib/linalg$(LIBOBJDIR) awpmd_SYSPATH = -L../../lib/linalg$(LIBOBJDIR)

View File

@ -1,19 +0,0 @@
# ifndef ERF_H
# define ERF_H
# ifdef _WIN32
# ifdef __cplusplus
extern "C" {
# endif
double erf(double x);
double erfc(double x);
# ifdef __cplusplus
}
# endif
# endif
# endif

View File

@ -1,53 +1,36 @@
// Interface for LAPACK function // Interface for LAPACK function
# ifndef LAPACK_INTER_H #ifndef LAPACK_INTER_H
# define LAPACK_INTER_H #define LAPACK_INTER_H
#include <complex> #include <complex>
typedef int lapack_int; typedef int lapack_int;
typedef complex<float> lapack_complex_float; typedef complex<float> lapack_complex_float;
typedef complex<double> lapack_complex_double; typedef complex<double> lapack_complex_double;
#if defined(_WIN32) && !defined(__MINGW32__) #define DGETRF dgetrf_
#define DGETRS dgetrs_
#define DGETRI dgetri_
#define ZPPTRF zpptrf_
#define ZPPTRI zpptri_
//#define MKL_Complex8 lapack_complex_float #ifdef __cplusplus
//#define MKL_Complex16 lapack_complex_double extern "C" {
#include "mkl.h" #endif /* __cplusplus */
void dgetrf_(const lapack_int *m, const lapack_int *n, double *a,
inline void ZPPTRF( char* uplo, const lapack_int* n, lapack_complex_double* ap, lapack_int* info ) { const lapack_int *lda, lapack_int *ipiv, lapack_int *info);
ZPPTRF(uplo, (int*)n, (MKL_Complex16*)ap, (int*)info); void dgetrs_(const char *trans, const lapack_int *n, const lapack_int *nrhs,
} const double *a, const lapack_int *lda, const lapack_int *ipiv,
inline void ZPPTRI( char* uplo, const lapack_int* n, lapack_complex_double* ap, lapack_int* info ){ double *b, const lapack_int *ldb, lapack_int *info);
ZPPTRI(uplo, (int*)n, (MKL_Complex16*)ap, (int*)info); void dgetri_(const lapack_int *n, double *a, const lapack_int *lda,
} const lapack_int *ipiv, double *work, const lapack_int *lwork,
lapack_int *info);
#else void zpptrf_(const char *uplo, const lapack_int *n, lapack_complex_double *ap,
lapack_int *info);
#define DGETRF dgetrf_ void zpptri_(const char *uplo, const lapack_int *n, lapack_complex_double *ap,
#define DGETRS dgetrs_ lapack_int *info);
#define DGETRI dgetri_ #ifdef __cplusplus
#define ZPPTRF zpptrf_ }
#define ZPPTRI zpptri_ #endif /* __cplusplus */
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
void dgetrf_( const lapack_int* m, const lapack_int* n, double* a, const lapack_int* lda,
lapack_int* ipiv, lapack_int* info );
void dgetrs_( const char* trans, const lapack_int* n, const lapack_int* nrhs,
const double* a, const lapack_int* lda, const lapack_int* ipiv,
double* b, const lapack_int* ldb, lapack_int* info );
void dgetri_( const lapack_int* n, double* a, const lapack_int* lda,
const lapack_int* ipiv, double* work, const lapack_int* lwork,
lapack_int* info );
void zpptrf_( const char* uplo, const lapack_int* n, lapack_complex_double* ap,
lapack_int* info );
void zpptri_( const char* uplo, const lapack_int* n, lapack_complex_double* ap,
lapack_int* info );
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif
#endif /* lapack_intER_H */ #endif /* lapack_intER_H */

View File

@ -149,10 +149,8 @@
# include "pairhash.h" # include "pairhash.h"
# include "TCP/tcpdefs.h" # include "TCP/tcpdefs.h"
# include "wavepacket.h" # include "wavepacket.h"
# include "erf.h"
# include "cerf.h" # include "cerf.h"
using namespace std; using namespace std;
# include "lapack_inter.h" # include "lapack_inter.h"

View File

@ -1,6 +1,4 @@
# include "wpmd_split.h" # include "wpmd_split.h"
//# include "erf.h"
void AWPMD_split::resize(int flag){ void AWPMD_split::resize(int flag){
for(int s=0;s<2;s++){ for(int s=0;s<2;s++){

View File

@ -14,7 +14,7 @@
#include <algorithm> #include <algorithm>
// used to set the absolute path of a replica file // used to set the absolute path of a replica file
#if defined(WIN32) && !defined(__CYGWIN__) #if defined(_WIN32) && !defined(__CYGWIN__)
#include <direct.h> #include <direct.h>
#define CHDIR ::_chdir #define CHDIR ::_chdir
#define GETCWD ::_getcwd #define GETCWD ::_getcwd

View File

@ -8,10 +8,10 @@
// Colvars repository at GitHub. // Colvars repository at GitHub.
// Using access() to check if a file exists (until we can assume C++14/17) // Using access() to check if a file exists (until we can assume C++14/17)
#if !defined(WIN32) || defined(__CYGWIN__) #if !defined(_WIN32) || defined(__CYGWIN__)
#include <unistd.h> #include <unistd.h>
#endif #endif
#if defined(WIN32) #if defined(_WIN32)
#include <io.h> #include <io.h>
#endif #endif
@ -678,7 +678,7 @@ int colvarproxy_io::backup_file(char const *filename)
// Simplified version of NAMD_file_exists() // Simplified version of NAMD_file_exists()
int exit_code; int exit_code;
do { do {
#if defined(WIN32) && !defined(__CYGWIN__) #if defined(_WIN32) && !defined(__CYGWIN__)
// We could use _access_s here, but it is probably too new // We could use _access_s here, but it is probably too new
exit_code = _access(filename, 00); exit_code = _access(filename, 00);
#else #else
@ -708,7 +708,7 @@ int colvarproxy_io::backup_file(char const *filename)
int colvarproxy_io::remove_file(char const *filename) int colvarproxy_io::remove_file(char const *filename)
{ {
int error_code = COLVARS_OK; int error_code = COLVARS_OK;
#if defined(WIN32) && !defined(__CYGWIN__) #if defined(_WIN32) && !defined(__CYGWIN__)
// Because the file may be open by other processes, rename it to filename.old // Because the file may be open by other processes, rename it to filename.old
std::string const renamed_file(std::string(filename)+".old"); std::string const renamed_file(std::string(filename)+".old");
// It may still be there from an interrupted run, so remove it to be safe // It may still be there from an interrupted run, so remove it to be safe
@ -741,7 +741,7 @@ int colvarproxy_io::remove_file(char const *filename)
int colvarproxy_io::rename_file(char const *filename, char const *newfilename) int colvarproxy_io::rename_file(char const *filename, char const *newfilename)
{ {
int error_code = COLVARS_OK; int error_code = COLVARS_OK;
#if defined(WIN32) && !defined(__CYGWIN__) #if defined(_WIN32) && !defined(__CYGWIN__)
// On straight Windows, must remove the destination before renaming it // On straight Windows, must remove the destination before renaming it
error_code |= remove_file(newfilename); error_code |= remove_file(newfilename);
#endif #endif

View File

@ -1,5 +1,5 @@
# Settings that the LAMMPS build will import when this package library is used # Settings that the LAMMPS build will import when this package library is used
electrode_SYSINC = electrode_SYSINC =
electrode_SYSLIB = -llinalg -lgfortran electrode_SYSLIB = -llinalg
electrode_SYSPATH = -L../../lib/linalg$(LIBOBJDIR) electrode_SYSPATH = -L../../lib/linalg$(LIBOBJDIR)

View File

@ -6,21 +6,17 @@ SHELL = /bin/sh
# ------ FILES ------ # ------ FILES ------
SRC = $(wildcard *.f) SRC = $(wildcard *.cpp)
SRC1 = $(wildcard *.F)
FILES = $(SRC) $(SRC1) Makefile.* README
# ------ DEFINITIONS ------ # ------ DEFINITIONS ------
LIB = liblinalg.a LIB = liblinalg.a
OBJ = $(SRC:.f=.o) $(SRC1:.F=.o) OBJ = $(SRC:.cpp=.o)
# ------ SETTINGS ------ # ------ SETTINGS ------
FC = gfortran CXX = g++ -std=c++11
FFLAGS = -O3 -fPIC -ffast-math -fstrict-aliasing -fno-second-underscore CCFLAGS = -O3 -fPIC -ffast-math -fstrict-aliasing
FFLAGS0 = -O0 -fPIC -fno-second-underscore
ARCHIVE = ar ARCHIVE = ar
AR = ar AR = ar
ARCHFLAG = -rcs ARCHFLAG = -rcs
@ -34,20 +30,11 @@ lib: $(OBJ)
# ------ COMPILE RULES ------ # ------ COMPILE RULES ------
%.o:%.F %.o:%.cpp
$(FC) $(FFLAGS) -c $< $(CC) $(CCFLAGS) -c $<
%.o:%.f
$(FC) $(FFLAGS) -c $<
dlamch.o: dlamch.f
$(FC) $(FFLAGS0) -c $<
# ------ CLEAN ------ # ------ CLEAN ------
clean: clean:
-rm -f *.o *.mod *~ $(LIB) -rm -f *.o *~ $(LIB)
tar:
-tar -czvf ../linalg.tar.gz $(FILES)

View File

@ -6,21 +6,17 @@ SHELL = /bin/sh
# ------ FILES ------ # ------ FILES ------
SRC = $(wildcard *.f) SRC = $(wildcard *.cpp)
SRC1 = $(wildcard *.F)
FILES = $(SRC) $(SRC1) Makefile.* README
# ------ DEFINITIONS ------ # ------ DEFINITIONS ------
LIB = liblinalg.a LIB = liblinalg.a
OBJ = $(SRC:.f=.o) $(SRC1:.F=.o) OBJ = $(SRC:.cpp=.o)
# ------ SETTINGS ------ # ------ SETTINGS ------
FC = mpifort CC = mpicxx
FFLAGS = -O3 -fPIC CCFLAGS = -O3 -fPIC
FFLAGS0 = -O0 -fPIC
ARCHIVE = ar ARCHIVE = ar
AR = ar AR = ar
ARCHFLAG = -rcs ARCHFLAG = -rcs
@ -34,20 +30,11 @@ lib: $(OBJ)
# ------ COMPILE RULES ------ # ------ COMPILE RULES ------
%.o:%.F %.o:%.cpp
$(FC) $(FFLAGS) -c $< $(CC) $(CCFLAGS) -c $<
%.o:%.f
$(FC) $(FFLAGS) -c $<
dlamch.o: dlamch.f
$(FC) $(FFLAGS0) -c $<
# ------ CLEAN ------ # ------ CLEAN ------
clean: clean:
-rm -f *.o *.mod *~ $(LIB) -rm -f *.o *~ $(LIB)
tar:
-tar -czvf ../linalg.tar.gz $(FILES)

View File

@ -1 +1 @@
Makefile.gfortran Makefile.g++

View File

@ -1,7 +1,13 @@
This directory has generic BLAS and LAPACK source files needed by the This directory has generic BLAS and LAPACK source files needed by the
ATC, AWPMD, ELECTRODE, LATTE, and ML-POD packages (and possibly by other ATC, AWPMD, ELECTRODE, LATTE, and ML-POD packages (and possibly by other
packages) in the future that can be used instead of platform or vendor packages) in the future that can be used instead of platform or vendor
optimized BLAS/LAPACK library. optimized BLAS/LAPACK library. To simplify installation, these files
have been translated from the Fortran versions of the BLAS and LAPACK
references source files at https://netlib.org/lapack/ to C++ with f2c.
The package with the tools to do the translation and the matching
original Fortran sources are at https://github.com/lammps/linalg.
Please note that even through the files are C++ source code the
resulting library will follow the Fortran binary conventions.
Note that this is an *incomplete* subset of full BLAS/LAPACK. Note that this is an *incomplete* subset of full BLAS/LAPACK.
@ -20,7 +26,7 @@ can do it manually by following the instructions below.
Build the library using one of the provided Makefile.* files or create Build the library using one of the provided Makefile.* files or create
your own, specific to your compiler and system. For example: your own, specific to your compiler and system. For example:
make -f Makefile.gfortran make -f Makefile.g++
When you are done building this library, one file should exist in this When you are done building this library, one file should exist in this
directory: directory:

13
lib/linalg/d_lmp_cnjg.cpp Normal file
View File

@ -0,0 +1,13 @@
#include "lmp_f2c.h"
extern "C" {
void d_lmp_cnjg(doublecomplex *r, doublecomplex *z)
{
doublereal zi = z->i;
r->r = z->r;
r->i = -zi;
}
}

10
lib/linalg/d_lmp_imag.cpp Normal file
View File

@ -0,0 +1,10 @@
#include "lmp_f2c.h"
extern "C" {
double d_lmp_imag(doublecomplex *z)
{
return (z->i);
}
}

14
lib/linalg/d_lmp_lg10.cpp Normal file
View File

@ -0,0 +1,14 @@
#include "lmp_f2c.h"
#undef abs
static constexpr double log10e = 0.43429448190325182765;
#include <cmath>
extern "C" {
double d_lmp_lg10(doublereal *x)
{
return (log10e * log(*x));
}
}

12
lib/linalg/d_lmp_sign.cpp Normal file
View File

@ -0,0 +1,12 @@
#include "lmp_f2c.h"
extern "C" {
double d_lmp_sign(doublereal *a, doublereal *b)
{
double x;
x = (*a >= 0 ? *a : -*a);
return (*b >= 0 ? x : -x);
}
}

50
lib/linalg/dasum.cpp Normal file
View File

@ -0,0 +1,50 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
doublereal dasum_(integer *n, doublereal *dx, integer *incx)
{
integer i__1, i__2;
doublereal ret_val, d__1, d__2, d__3, d__4, d__5, d__6;
integer i__, m, mp1;
doublereal dtemp;
integer nincx;
--dx;
ret_val = 0.;
dtemp = 0.;
if (*n <= 0 || *incx <= 0) {
return ret_val;
}
if (*incx == 1) {
m = *n % 6;
if (m != 0) {
i__1 = m;
for (i__ = 1; i__ <= i__1; ++i__) {
dtemp += (d__1 = dx[i__], abs(d__1));
}
if (*n < 6) {
ret_val = dtemp;
return ret_val;
}
}
mp1 = m + 1;
i__1 = *n;
for (i__ = mp1; i__ <= i__1; i__ += 6) {
dtemp = dtemp + (d__1 = dx[i__], abs(d__1)) + (d__2 = dx[i__ + 1], abs(d__2)) +
(d__3 = dx[i__ + 2], abs(d__3)) + (d__4 = dx[i__ + 3], abs(d__4)) +
(d__5 = dx[i__ + 4], abs(d__5)) + (d__6 = dx[i__ + 5], abs(d__6));
}
} else {
nincx = *n * *incx;
i__1 = nincx;
i__2 = *incx;
for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
dtemp += (d__1 = dx[i__], abs(d__1));
}
}
ret_val = dtemp;
return ret_val;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,131 +0,0 @@
*> \brief \b DASUM
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* DOUBLE PRECISION FUNCTION DASUM(N,DX,INCX)
*
* .. Scalar Arguments ..
* INTEGER INCX,N
* ..
* .. Array Arguments ..
* DOUBLE PRECISION DX(*)
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DASUM takes the sum of the absolute values.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> number of elements in input vector(s)
*> \endverbatim
*>
*> \param[in] DX
*> \verbatim
*> DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
*> \endverbatim
*>
*> \param[in] INCX
*> \verbatim
*> INCX is INTEGER
*> storage spacing between elements of DX
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup double_blas_level1
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> jack dongarra, linpack, 3/11/78.
*> modified 3/93 to return if incx .le. 0.
*> modified 12/3/93, array(1) declarations changed to array(*)
*> \endverbatim
*>
* =====================================================================
DOUBLE PRECISION FUNCTION DASUM(N,DX,INCX)
*
* -- Reference BLAS level1 routine --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INCX,N
* ..
* .. Array Arguments ..
DOUBLE PRECISION DX(*)
* ..
*
* =====================================================================
*
* .. Local Scalars ..
DOUBLE PRECISION DTEMP
INTEGER I,M,MP1,NINCX
* ..
* .. Intrinsic Functions ..
INTRINSIC DABS,MOD
* ..
DASUM = 0.0d0
DTEMP = 0.0d0
IF (N.LE.0 .OR. INCX.LE.0) RETURN
IF (INCX.EQ.1) THEN
* code for increment equal to 1
*
*
* clean-up loop
*
M = MOD(N,6)
IF (M.NE.0) THEN
DO I = 1,M
DTEMP = DTEMP + DABS(DX(I))
END DO
IF (N.LT.6) THEN
DASUM = DTEMP
RETURN
END IF
END IF
MP1 = M + 1
DO I = MP1,N,6
DTEMP = DTEMP + DABS(DX(I)) + DABS(DX(I+1)) +
$ DABS(DX(I+2)) + DABS(DX(I+3)) +
$ DABS(DX(I+4)) + DABS(DX(I+5))
END DO
ELSE
*
* code for increment not equal to 1
*
NINCX = N*INCX
DO I = 1,NINCX,INCX
DTEMP = DTEMP + DABS(DX(I))
END DO
END IF
DASUM = DTEMP
RETURN
*
* End of DASUM
*
END

56
lib/linalg/daxpy.cpp Normal file
View File

@ -0,0 +1,56 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int daxpy_(integer *n, doublereal *da, doublereal *dx, integer *incx, doublereal *dy, integer *incy)
{
integer i__1;
integer i__, m, ix, iy, mp1;
--dy;
--dx;
if (*n <= 0) {
return 0;
}
if (*da == 0.) {
return 0;
}
if (*incx == 1 && *incy == 1) {
m = *n % 4;
if (m != 0) {
i__1 = m;
for (i__ = 1; i__ <= i__1; ++i__) {
dy[i__] += *da * dx[i__];
}
}
if (*n < 4) {
return 0;
}
mp1 = m + 1;
i__1 = *n;
for (i__ = mp1; i__ <= i__1; i__ += 4) {
dy[i__] += *da * dx[i__];
dy[i__ + 1] += *da * dx[i__ + 1];
dy[i__ + 2] += *da * dx[i__ + 2];
dy[i__ + 3] += *da * dx[i__ + 3];
}
} else {
ix = 1;
iy = 1;
if (*incx < 0) {
ix = (-(*n) + 1) * *incx + 1;
}
if (*incy < 0) {
iy = (-(*n) + 1) * *incy + 1;
}
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
dy[iy] += *da * dx[ix];
ix += *incx;
iy += *incy;
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,152 +0,0 @@
*> \brief \b DAXPY
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* SUBROUTINE DAXPY(N,DA,DX,INCX,DY,INCY)
*
* .. Scalar Arguments ..
* DOUBLE PRECISION DA
* INTEGER INCX,INCY,N
* ..
* .. Array Arguments ..
* DOUBLE PRECISION DX(*),DY(*)
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DAXPY constant times a vector plus a vector.
*> uses unrolled loops for increments equal to one.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> number of elements in input vector(s)
*> \endverbatim
*>
*> \param[in] DA
*> \verbatim
*> DA is DOUBLE PRECISION
*> On entry, DA specifies the scalar alpha.
*> \endverbatim
*>
*> \param[in] DX
*> \verbatim
*> DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
*> \endverbatim
*>
*> \param[in] INCX
*> \verbatim
*> INCX is INTEGER
*> storage spacing between elements of DX
*> \endverbatim
*>
*> \param[in,out] DY
*> \verbatim
*> DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
*> \endverbatim
*>
*> \param[in] INCY
*> \verbatim
*> INCY is INTEGER
*> storage spacing between elements of DY
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup double_blas_level1
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> jack dongarra, linpack, 3/11/78.
*> modified 12/3/93, array(1) declarations changed to array(*)
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DAXPY(N,DA,DX,INCX,DY,INCY)
*
* -- Reference BLAS level1 routine --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
DOUBLE PRECISION DA
INTEGER INCX,INCY,N
* ..
* .. Array Arguments ..
DOUBLE PRECISION DX(*),DY(*)
* ..
*
* =====================================================================
*
* .. Local Scalars ..
INTEGER I,IX,IY,M,MP1
* ..
* .. Intrinsic Functions ..
INTRINSIC MOD
* ..
IF (N.LE.0) RETURN
IF (DA.EQ.0.0d0) RETURN
IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
*
* code for both increments equal to 1
*
*
* clean-up loop
*
M = MOD(N,4)
IF (M.NE.0) THEN
DO I = 1,M
DY(I) = DY(I) + DA*DX(I)
END DO
END IF
IF (N.LT.4) RETURN
MP1 = M + 1
DO I = MP1,N,4
DY(I) = DY(I) + DA*DX(I)
DY(I+1) = DY(I+1) + DA*DX(I+1)
DY(I+2) = DY(I+2) + DA*DX(I+2)
DY(I+3) = DY(I+3) + DA*DX(I+3)
END DO
ELSE
*
* code for unequal increments or equal increments
* not equal to 1
*
IX = 1
IY = 1
IF (INCX.LT.0) IX = (-N+1)*INCX + 1
IF (INCY.LT.0) IY = (-N+1)*INCY + 1
DO I = 1,N
DY(IY) = DY(IY) + DA*DX(IX)
IX = IX + INCX
IY = IY + INCY
END DO
END IF
RETURN
*
* End of DAXPY
*
END

522
lib/linalg/dbdsqr.cpp Normal file
View File

@ -0,0 +1,522 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static doublereal c_b15 = -.125;
static integer c__1 = 1;
static doublereal c_b49 = 1.;
static doublereal c_b72 = -1.;
int dbdsqr_(char *uplo, integer *n, integer *ncvt, integer *nru, integer *ncc, doublereal *d__,
doublereal *e, doublereal *vt, integer *ldvt, doublereal *u, integer *ldu,
doublereal *c__, integer *ldc, doublereal *work, integer *info, ftnlen uplo_len)
{
integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2;
doublereal d__1, d__2, d__3, d__4;
double pow_lmp_dd(doublereal *, doublereal *), sqrt(doublereal), d_lmp_sign(doublereal *, doublereal *);
integer iterdivn;
doublereal f, g, h__;
integer i__, j, m;
doublereal r__;
integer maxitdivn;
doublereal cs;
integer ll;
doublereal sn, mu;
integer nm1, nm12, nm13, lll;
doublereal eps, sll, tol, abse;
integer idir;
doublereal abss;
integer oldm;
doublereal cosl;
integer isub, iter;
doublereal unfl, sinl, cosr, smin, smax, sinr;
extern int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *,
doublereal *),
dlas2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *),
dscal_(integer *, doublereal *, doublereal *, integer *);
extern logical lsame_(char *, char *, ftnlen, ftnlen);
doublereal oldcs;
extern int dlasr_(char *, char *, char *, integer *, integer *, doublereal *, doublereal *,
doublereal *, integer *, ftnlen, ftnlen, ftnlen);
integer oldll;
doublereal shift, sigmn, oldsn;
extern int dswap_(integer *, doublereal *, integer *, doublereal *, integer *);
doublereal sminl, sigmx;
logical lower;
extern int dlasq1_(integer *, doublereal *, doublereal *, doublereal *, integer *),
dlasv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *,
doublereal *, doublereal *, doublereal *);
extern doublereal dlamch_(char *, ftnlen);
extern int dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *),
xerbla_(char *, integer *, ftnlen);
doublereal sminoa, thresh;
logical rotate;
doublereal tolmul;
--d__;
--e;
vt_dim1 = *ldvt;
vt_offset = 1 + vt_dim1;
vt -= vt_offset;
u_dim1 = *ldu;
u_offset = 1 + u_dim1;
u -= u_offset;
c_dim1 = *ldc;
c_offset = 1 + c_dim1;
c__ -= c_offset;
--work;
*info = 0;
lower = lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1);
if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lower) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*ncvt < 0) {
*info = -3;
} else if (*nru < 0) {
*info = -4;
} else if (*ncc < 0) {
*info = -5;
} else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1, *n)) {
*info = -9;
} else if (*ldu < max(1, *nru)) {
*info = -11;
} else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1, *n)) {
*info = -13;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DBDSQR", &i__1, (ftnlen)6);
return 0;
}
if (*n == 0) {
return 0;
}
if (*n == 1) {
goto L160;
}
rotate = *ncvt > 0 || *nru > 0 || *ncc > 0;
if (!rotate) {
dlasq1_(n, &d__[1], &e[1], &work[1], info);
if (*info != 2) {
return 0;
}
*info = 0;
}
nm1 = *n - 1;
nm12 = nm1 + nm1;
nm13 = nm12 + nm1;
idir = 0;
eps = dlamch_((char *)"Epsilon", (ftnlen)7);
unfl = dlamch_((char *)"Safe minimum", (ftnlen)12);
if (lower) {
i__1 = *n - 1;
for (i__ = 1; i__ <= i__1; ++i__) {
dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
d__[i__] = r__;
e[i__] = sn * d__[i__ + 1];
d__[i__ + 1] = cs * d__[i__ + 1];
work[i__] = cs;
work[nm1 + i__] = sn;
}
if (*nru > 0) {
dlasr_((char *)"R", (char *)"V", (char *)"F", nru, n, &work[1], &work[*n], &u[u_offset], ldu, (ftnlen)1,
(ftnlen)1, (ftnlen)1);
}
if (*ncc > 0) {
dlasr_((char *)"L", (char *)"V", (char *)"F", n, ncc, &work[1], &work[*n], &c__[c_offset], ldc, (ftnlen)1,
(ftnlen)1, (ftnlen)1);
}
}
d__3 = 100., d__4 = pow_lmp_dd(&eps, &c_b15);
d__1 = 10., d__2 = min(d__3, d__4);
tolmul = max(d__1, d__2);
tol = tolmul * eps;
smax = 0.;
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
d__2 = smax, d__3 = (d__1 = d__[i__], abs(d__1));
smax = max(d__2, d__3);
}
i__1 = *n - 1;
for (i__ = 1; i__ <= i__1; ++i__) {
d__2 = smax, d__3 = (d__1 = e[i__], abs(d__1));
smax = max(d__2, d__3);
}
sminl = 0.;
if (tol >= 0.) {
sminoa = abs(d__[1]);
if (sminoa == 0.) {
goto L50;
}
mu = sminoa;
i__1 = *n;
for (i__ = 2; i__ <= i__1; ++i__) {
mu = (d__2 = d__[i__], abs(d__2)) * (mu / (mu + (d__1 = e[i__ - 1], abs(d__1))));
sminoa = min(sminoa, mu);
if (sminoa == 0.) {
goto L50;
}
}
L50:
sminoa /= sqrt((doublereal)(*n));
d__1 = tol * sminoa, d__2 = *n * (*n * unfl) * 6;
thresh = max(d__1, d__2);
} else {
d__1 = abs(tol) * smax, d__2 = *n * (*n * unfl) * 6;
thresh = max(d__1, d__2);
}
maxitdivn = *n * 6;
iterdivn = 0;
iter = -1;
oldll = -1;
oldm = -1;
m = *n;
L60:
if (m <= 1) {
goto L160;
}
if (iter >= *n) {
iter -= *n;
++iterdivn;
if (iterdivn >= maxitdivn) {
goto L200;
}
}
if (tol < 0. && (d__1 = d__[m], abs(d__1)) <= thresh) {
d__[m] = 0.;
}
smax = (d__1 = d__[m], abs(d__1));
smin = smax;
i__1 = m - 1;
for (lll = 1; lll <= i__1; ++lll) {
ll = m - lll;
abss = (d__1 = d__[ll], abs(d__1));
abse = (d__1 = e[ll], abs(d__1));
if (tol < 0. && abss <= thresh) {
d__[ll] = 0.;
}
if (abse <= thresh) {
goto L80;
}
smin = min(smin, abss);
d__1 = max(smax, abss);
smax = max(d__1, abse);
}
ll = 0;
goto L90;
L80:
e[ll] = 0.;
if (ll == m - 1) {
--m;
goto L60;
}
L90:
++ll;
if (ll == m - 1) {
dlasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr, &sinl, &cosl);
d__[m - 1] = sigmx;
e[m - 1] = 0.;
d__[m] = sigmn;
if (*ncvt > 0) {
drot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, &cosr, &sinr);
}
if (*nru > 0) {
drot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], &c__1, &cosl, &sinl);
}
if (*ncc > 0) {
drot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, &cosl, &sinl);
}
m += -2;
goto L60;
}
if (ll > oldm || m < oldll) {
if ((d__1 = d__[ll], abs(d__1)) >= (d__2 = d__[m], abs(d__2))) {
idir = 1;
} else {
idir = 2;
}
}
if (idir == 1) {
if ((d__2 = e[m - 1], abs(d__2)) <= abs(tol) * (d__1 = d__[m], abs(d__1)) ||
tol < 0. && (d__3 = e[m - 1], abs(d__3)) <= thresh) {
e[m - 1] = 0.;
goto L60;
}
if (tol >= 0.) {
mu = (d__1 = d__[ll], abs(d__1));
sminl = mu;
i__1 = m - 1;
for (lll = ll; lll <= i__1; ++lll) {
if ((d__1 = e[lll], abs(d__1)) <= tol * mu) {
e[lll] = 0.;
goto L60;
}
mu = (d__2 = d__[lll + 1], abs(d__2)) * (mu / (mu + (d__1 = e[lll], abs(d__1))));
sminl = min(sminl, mu);
}
}
} else {
if ((d__2 = e[ll], abs(d__2)) <= abs(tol) * (d__1 = d__[ll], abs(d__1)) ||
tol < 0. && (d__3 = e[ll], abs(d__3)) <= thresh) {
e[ll] = 0.;
goto L60;
}
if (tol >= 0.) {
mu = (d__1 = d__[m], abs(d__1));
sminl = mu;
i__1 = ll;
for (lll = m - 1; lll >= i__1; --lll) {
if ((d__1 = e[lll], abs(d__1)) <= tol * mu) {
e[lll] = 0.;
goto L60;
}
mu = (d__2 = d__[lll], abs(d__2)) * (mu / (mu + (d__1 = e[lll], abs(d__1))));
sminl = min(sminl, mu);
}
}
}
oldll = ll;
oldm = m;
d__1 = eps, d__2 = tol * .01;
if (tol >= 0. && *n * tol * (sminl / smax) <= max(d__1, d__2)) {
shift = 0.;
} else {
if (idir == 1) {
sll = (d__1 = d__[ll], abs(d__1));
dlas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__);
} else {
sll = (d__1 = d__[m], abs(d__1));
dlas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__);
}
if (sll > 0.) {
d__1 = shift / sll;
if (d__1 * d__1 < eps) {
shift = 0.;
}
}
}
iter = iter + m - ll;
if (shift == 0.) {
if (idir == 1) {
cs = 1.;
oldcs = 1.;
i__1 = m - 1;
for (i__ = ll; i__ <= i__1; ++i__) {
d__1 = d__[i__] * cs;
dlartg_(&d__1, &e[i__], &cs, &sn, &r__);
if (i__ > ll) {
e[i__ - 1] = oldsn * r__;
}
d__1 = oldcs * r__;
d__2 = d__[i__ + 1] * sn;
dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]);
work[i__ - ll + 1] = cs;
work[i__ - ll + 1 + nm1] = sn;
work[i__ - ll + 1 + nm12] = oldcs;
work[i__ - ll + 1 + nm13] = oldsn;
}
h__ = d__[m] * cs;
d__[m] = h__ * oldcs;
e[m - 1] = h__ * oldsn;
if (*ncvt > 0) {
i__1 = m - ll + 1;
dlasr_((char *)"L", (char *)"V", (char *)"F", &i__1, ncvt, &work[1], &work[*n], &vt[ll + vt_dim1], ldvt,
(ftnlen)1, (ftnlen)1, (ftnlen)1);
}
if (*nru > 0) {
i__1 = m - ll + 1;
dlasr_((char *)"R", (char *)"V", (char *)"F", nru, &i__1, &work[nm12 + 1], &work[nm13 + 1],
&u[ll * u_dim1 + 1], ldu, (ftnlen)1, (ftnlen)1, (ftnlen)1);
}
if (*ncc > 0) {
i__1 = m - ll + 1;
dlasr_((char *)"L", (char *)"V", (char *)"F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + 1],
&c__[ll + c_dim1], ldc, (ftnlen)1, (ftnlen)1, (ftnlen)1);
}
if ((d__1 = e[m - 1], abs(d__1)) <= thresh) {
e[m - 1] = 0.;
}
} else {
cs = 1.;
oldcs = 1.;
i__1 = ll + 1;
for (i__ = m; i__ >= i__1; --i__) {
d__1 = d__[i__] * cs;
dlartg_(&d__1, &e[i__ - 1], &cs, &sn, &r__);
if (i__ < m) {
e[i__] = oldsn * r__;
}
d__1 = oldcs * r__;
d__2 = d__[i__ - 1] * sn;
dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]);
work[i__ - ll] = cs;
work[i__ - ll + nm1] = -sn;
work[i__ - ll + nm12] = oldcs;
work[i__ - ll + nm13] = -oldsn;
}
h__ = d__[ll] * cs;
d__[ll] = h__ * oldcs;
e[ll] = h__ * oldsn;
if (*ncvt > 0) {
i__1 = m - ll + 1;
dlasr_((char *)"L", (char *)"V", (char *)"B", &i__1, ncvt, &work[nm12 + 1], &work[nm13 + 1],
&vt[ll + vt_dim1], ldvt, (ftnlen)1, (ftnlen)1, (ftnlen)1);
}
if (*nru > 0) {
i__1 = m - ll + 1;
dlasr_((char *)"R", (char *)"V", (char *)"B", nru, &i__1, &work[1], &work[*n], &u[ll * u_dim1 + 1], ldu,
(ftnlen)1, (ftnlen)1, (ftnlen)1);
}
if (*ncc > 0) {
i__1 = m - ll + 1;
dlasr_((char *)"L", (char *)"V", (char *)"B", &i__1, ncc, &work[1], &work[*n], &c__[ll + c_dim1], ldc,
(ftnlen)1, (ftnlen)1, (ftnlen)1);
}
if ((d__1 = e[ll], abs(d__1)) <= thresh) {
e[ll] = 0.;
}
}
} else {
if (idir == 1) {
f = ((d__1 = d__[ll], abs(d__1)) - shift) *
(d_lmp_sign(&c_b49, &d__[ll]) + shift / d__[ll]);
g = e[ll];
i__1 = m - 1;
for (i__ = ll; i__ <= i__1; ++i__) {
dlartg_(&f, &g, &cosr, &sinr, &r__);
if (i__ > ll) {
e[i__ - 1] = r__;
}
f = cosr * d__[i__] + sinr * e[i__];
e[i__] = cosr * e[i__] - sinr * d__[i__];
g = sinr * d__[i__ + 1];
d__[i__ + 1] = cosr * d__[i__ + 1];
dlartg_(&f, &g, &cosl, &sinl, &r__);
d__[i__] = r__;
f = cosl * e[i__] + sinl * d__[i__ + 1];
d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__];
if (i__ < m - 1) {
g = sinl * e[i__ + 1];
e[i__ + 1] = cosl * e[i__ + 1];
}
work[i__ - ll + 1] = cosr;
work[i__ - ll + 1 + nm1] = sinr;
work[i__ - ll + 1 + nm12] = cosl;
work[i__ - ll + 1 + nm13] = sinl;
}
e[m - 1] = f;
if (*ncvt > 0) {
i__1 = m - ll + 1;
dlasr_((char *)"L", (char *)"V", (char *)"F", &i__1, ncvt, &work[1], &work[*n], &vt[ll + vt_dim1], ldvt,
(ftnlen)1, (ftnlen)1, (ftnlen)1);
}
if (*nru > 0) {
i__1 = m - ll + 1;
dlasr_((char *)"R", (char *)"V", (char *)"F", nru, &i__1, &work[nm12 + 1], &work[nm13 + 1],
&u[ll * u_dim1 + 1], ldu, (ftnlen)1, (ftnlen)1, (ftnlen)1);
}
if (*ncc > 0) {
i__1 = m - ll + 1;
dlasr_((char *)"L", (char *)"V", (char *)"F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + 1],
&c__[ll + c_dim1], ldc, (ftnlen)1, (ftnlen)1, (ftnlen)1);
}
if ((d__1 = e[m - 1], abs(d__1)) <= thresh) {
e[m - 1] = 0.;
}
} else {
f = ((d__1 = d__[m], abs(d__1)) - shift) * (d_lmp_sign(&c_b49, &d__[m]) + shift / d__[m]);
g = e[m - 1];
i__1 = ll + 1;
for (i__ = m; i__ >= i__1; --i__) {
dlartg_(&f, &g, &cosr, &sinr, &r__);
if (i__ < m) {
e[i__] = r__;
}
f = cosr * d__[i__] + sinr * e[i__ - 1];
e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__];
g = sinr * d__[i__ - 1];
d__[i__ - 1] = cosr * d__[i__ - 1];
dlartg_(&f, &g, &cosl, &sinl, &r__);
d__[i__] = r__;
f = cosl * e[i__ - 1] + sinl * d__[i__ - 1];
d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1];
if (i__ > ll + 1) {
g = sinl * e[i__ - 2];
e[i__ - 2] = cosl * e[i__ - 2];
}
work[i__ - ll] = cosr;
work[i__ - ll + nm1] = -sinr;
work[i__ - ll + nm12] = cosl;
work[i__ - ll + nm13] = -sinl;
}
e[ll] = f;
if ((d__1 = e[ll], abs(d__1)) <= thresh) {
e[ll] = 0.;
}
if (*ncvt > 0) {
i__1 = m - ll + 1;
dlasr_((char *)"L", (char *)"V", (char *)"B", &i__1, ncvt, &work[nm12 + 1], &work[nm13 + 1],
&vt[ll + vt_dim1], ldvt, (ftnlen)1, (ftnlen)1, (ftnlen)1);
}
if (*nru > 0) {
i__1 = m - ll + 1;
dlasr_((char *)"R", (char *)"V", (char *)"B", nru, &i__1, &work[1], &work[*n], &u[ll * u_dim1 + 1], ldu,
(ftnlen)1, (ftnlen)1, (ftnlen)1);
}
if (*ncc > 0) {
i__1 = m - ll + 1;
dlasr_((char *)"L", (char *)"V", (char *)"B", &i__1, ncc, &work[1], &work[*n], &c__[ll + c_dim1], ldc,
(ftnlen)1, (ftnlen)1, (ftnlen)1);
}
}
}
goto L60;
L160:
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
if (d__[i__] < 0.) {
d__[i__] = -d__[i__];
if (*ncvt > 0) {
dscal_(ncvt, &c_b72, &vt[i__ + vt_dim1], ldvt);
}
}
}
i__1 = *n - 1;
for (i__ = 1; i__ <= i__1; ++i__) {
isub = 1;
smin = d__[1];
i__2 = *n + 1 - i__;
for (j = 2; j <= i__2; ++j) {
if (d__[j] <= smin) {
isub = j;
smin = d__[j];
}
}
if (isub != *n + 1 - i__) {
d__[isub] = d__[*n + 1 - i__];
d__[*n + 1 - i__] = smin;
if (*ncvt > 0) {
dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ + vt_dim1], ldvt);
}
if (*nru > 0) {
dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) * u_dim1 + 1], &c__1);
}
if (*ncc > 0) {
dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ + c_dim1], ldc);
}
}
}
goto L220;
L200:
*info = 0;
i__1 = *n - 1;
for (i__ = 1; i__ <= i__1; ++i__) {
if (e[i__] != 0.) {
++(*info);
}
}
L220:
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,864 +0,0 @@
*> \brief \b DBDSQR
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DBDSQR + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dbdsqr.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dbdsqr.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dbdsqr.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U,
* LDU, C, LDC, WORK, INFO )
*
* .. Scalar Arguments ..
* CHARACTER UPLO
* INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU
* ..
* .. Array Arguments ..
* DOUBLE PRECISION C( LDC, * ), D( * ), E( * ), U( LDU, * ),
* $ VT( LDVT, * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DBDSQR computes the singular values and, optionally, the right and/or
*> left singular vectors from the singular value decomposition (SVD) of
*> a real N-by-N (upper or lower) bidiagonal matrix B using the implicit
*> zero-shift QR algorithm. The SVD of B has the form
*>
*> B = Q * S * P**T
*>
*> where S is the diagonal matrix of singular values, Q is an orthogonal
*> matrix of left singular vectors, and P is an orthogonal matrix of
*> right singular vectors. If left singular vectors are requested, this
*> subroutine actually returns U*Q instead of Q, and, if right singular
*> vectors are requested, this subroutine returns P**T*VT instead of
*> P**T, for given real input matrices U and VT. When U and VT are the
*> orthogonal matrices that reduce a general matrix A to bidiagonal
*> form: A = U*B*VT, as computed by DGEBRD, then
*>
*> A = (U*Q) * S * (P**T*VT)
*>
*> is the SVD of A. Optionally, the subroutine may also compute Q**T*C
*> for a given real input matrix C.
*>
*> See "Computing Small Singular Values of Bidiagonal Matrices With
*> Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
*> LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11,
*> no. 5, pp. 873-912, Sept 1990) and
*> "Accurate singular values and differential qd algorithms," by
*> B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics
*> Department, University of California at Berkeley, July 1992
*> for a detailed description of the algorithm.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] UPLO
*> \verbatim
*> UPLO is CHARACTER*1
*> = 'U': B is upper bidiagonal;
*> = 'L': B is lower bidiagonal.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The order of the matrix B. N >= 0.
*> \endverbatim
*>
*> \param[in] NCVT
*> \verbatim
*> NCVT is INTEGER
*> The number of columns of the matrix VT. NCVT >= 0.
*> \endverbatim
*>
*> \param[in] NRU
*> \verbatim
*> NRU is INTEGER
*> The number of rows of the matrix U. NRU >= 0.
*> \endverbatim
*>
*> \param[in] NCC
*> \verbatim
*> NCC is INTEGER
*> The number of columns of the matrix C. NCC >= 0.
*> \endverbatim
*>
*> \param[in,out] D
*> \verbatim
*> D is DOUBLE PRECISION array, dimension (N)
*> On entry, the n diagonal elements of the bidiagonal matrix B.
*> On exit, if INFO=0, the singular values of B in decreasing
*> order.
*> \endverbatim
*>
*> \param[in,out] E
*> \verbatim
*> E is DOUBLE PRECISION array, dimension (N-1)
*> On entry, the N-1 offdiagonal elements of the bidiagonal
*> matrix B.
*> On exit, if INFO = 0, E is destroyed; if INFO > 0, D and E
*> will contain the diagonal and superdiagonal elements of a
*> bidiagonal matrix orthogonally equivalent to the one given
*> as input.
*> \endverbatim
*>
*> \param[in,out] VT
*> \verbatim
*> VT is DOUBLE PRECISION array, dimension (LDVT, NCVT)
*> On entry, an N-by-NCVT matrix VT.
*> On exit, VT is overwritten by P**T * VT.
*> Not referenced if NCVT = 0.
*> \endverbatim
*>
*> \param[in] LDVT
*> \verbatim
*> LDVT is INTEGER
*> The leading dimension of the array VT.
*> LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0.
*> \endverbatim
*>
*> \param[in,out] U
*> \verbatim
*> U is DOUBLE PRECISION array, dimension (LDU, N)
*> On entry, an NRU-by-N matrix U.
*> On exit, U is overwritten by U * Q.
*> Not referenced if NRU = 0.
*> \endverbatim
*>
*> \param[in] LDU
*> \verbatim
*> LDU is INTEGER
*> The leading dimension of the array U. LDU >= max(1,NRU).
*> \endverbatim
*>
*> \param[in,out] C
*> \verbatim
*> C is DOUBLE PRECISION array, dimension (LDC, NCC)
*> On entry, an N-by-NCC matrix C.
*> On exit, C is overwritten by Q**T * C.
*> Not referenced if NCC = 0.
*> \endverbatim
*>
*> \param[in] LDC
*> \verbatim
*> LDC is INTEGER
*> The leading dimension of the array C.
*> LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0.
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (4*(N-1))
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: If INFO = -i, the i-th argument had an illegal value
*> > 0:
*> if NCVT = NRU = NCC = 0,
*> = 1, a split was marked by a positive value in E
*> = 2, current block of Z not diagonalized after 30*N
*> iterations (in inner while loop)
*> = 3, termination criterion of outer while loop not met
*> (program created more than N unreduced blocks)
*> else NCVT = NRU = NCC = 0,
*> the algorithm did not converge; D and E contain the
*> elements of a bidiagonal matrix which is orthogonally
*> similar to the input matrix B; if INFO = i, i
*> elements of E have not converged to zero.
*> \endverbatim
*
*> \par Internal Parameters:
* =========================
*>
*> \verbatim
*> TOLMUL DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8)))
*> TOLMUL controls the convergence criterion of the QR loop.
*> If it is positive, TOLMUL*EPS is the desired relative
*> precision in the computed singular values.
*> If it is negative, abs(TOLMUL*EPS*sigma_max) is the
*> desired absolute accuracy in the computed singular
*> values (corresponds to relative accuracy
*> abs(TOLMUL*EPS) in the largest singular value.
*> abs(TOLMUL) should be between 1 and 1/EPS, and preferably
*> between 10 (for fast convergence) and .1/EPS
*> (for there to be some accuracy in the results).
*> Default is to lose at either one eighth or 2 of the
*> available decimal digits in each computed singular value
*> (whichever is smaller).
*>
*> MAXITR INTEGER, default = 6
*> MAXITR controls the maximum number of passes of the
*> algorithm through its inner loop. The algorithms stops
*> (and so fails to converge) if the number of passes
*> through the inner loop exceeds MAXITR*N**2.
*>
*> \endverbatim
*
*> \par Note:
* ===========
*>
*> \verbatim
*> Bug report from Cezary Dendek.
*> On March 23rd 2017, the INTEGER variable MAXIT = MAXITR*N**2 is
*> removed since it can overflow pretty easily (for N larger or equal
*> than 18,919). We instead use MAXITDIVN = MAXITR*N.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup auxOTHERcomputational
*
* =====================================================================
SUBROUTINE DBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U,
$ LDU, C, LDC, WORK, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
CHARACTER UPLO
INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU
* ..
* .. Array Arguments ..
DOUBLE PRECISION C( LDC, * ), D( * ), E( * ), U( LDU, * ),
$ VT( LDVT, * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO
PARAMETER ( ZERO = 0.0D0 )
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D0 )
DOUBLE PRECISION NEGONE
PARAMETER ( NEGONE = -1.0D0 )
DOUBLE PRECISION HNDRTH
PARAMETER ( HNDRTH = 0.01D0 )
DOUBLE PRECISION TEN
PARAMETER ( TEN = 10.0D0 )
DOUBLE PRECISION HNDRD
PARAMETER ( HNDRD = 100.0D0 )
DOUBLE PRECISION MEIGTH
PARAMETER ( MEIGTH = -0.125D0 )
INTEGER MAXITR
PARAMETER ( MAXITR = 6 )
* ..
* .. Local Scalars ..
LOGICAL LOWER, ROTATE
INTEGER I, IDIR, ISUB, ITER, ITERDIVN, J, LL, LLL, M,
$ MAXITDIVN, NM1, NM12, NM13, OLDLL, OLDM
DOUBLE PRECISION ABSE, ABSS, COSL, COSR, CS, EPS, F, G, H, MU,
$ OLDCS, OLDSN, R, SHIFT, SIGMN, SIGMX, SINL,
$ SINR, SLL, SMAX, SMIN, SMINL, SMINOA,
$ SN, THRESH, TOL, TOLMUL, UNFL
* ..
* .. External Functions ..
LOGICAL LSAME
DOUBLE PRECISION DLAMCH
EXTERNAL LSAME, DLAMCH
* ..
* .. External Subroutines ..
EXTERNAL DLARTG, DLAS2, DLASQ1, DLASR, DLASV2, DROT,
$ DSCAL, DSWAP, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, DBLE, MAX, MIN, SIGN, SQRT
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
LOWER = LSAME( UPLO, 'L' )
IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LOWER ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( NCVT.LT.0 ) THEN
INFO = -3
ELSE IF( NRU.LT.0 ) THEN
INFO = -4
ELSE IF( NCC.LT.0 ) THEN
INFO = -5
ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR.
$ ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN
INFO = -9
ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN
INFO = -11
ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR.
$ ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN
INFO = -13
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DBDSQR', -INFO )
RETURN
END IF
IF( N.EQ.0 )
$ RETURN
IF( N.EQ.1 )
$ GO TO 160
*
* ROTATE is true if any singular vectors desired, false otherwise
*
ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 )
*
* If no singular vectors desired, use qd algorithm
*
IF( .NOT.ROTATE ) THEN
CALL DLASQ1( N, D, E, WORK, INFO )
*
* If INFO equals 2, dqds didn't finish, try to finish
*
IF( INFO .NE. 2 ) RETURN
INFO = 0
END IF
*
NM1 = N - 1
NM12 = NM1 + NM1
NM13 = NM12 + NM1
IDIR = 0
*
* Get machine constants
*
EPS = DLAMCH( 'Epsilon' )
UNFL = DLAMCH( 'Safe minimum' )
*
* If matrix lower bidiagonal, rotate to be upper bidiagonal
* by applying Givens rotations on the left
*
IF( LOWER ) THEN
DO 10 I = 1, N - 1
CALL DLARTG( D( I ), E( I ), CS, SN, R )
D( I ) = R
E( I ) = SN*D( I+1 )
D( I+1 ) = CS*D( I+1 )
WORK( I ) = CS
WORK( NM1+I ) = SN
10 CONTINUE
*
* Update singular vectors if desired
*
IF( NRU.GT.0 )
$ CALL DLASR( 'R', 'V', 'F', NRU, N, WORK( 1 ), WORK( N ), U,
$ LDU )
IF( NCC.GT.0 )
$ CALL DLASR( 'L', 'V', 'F', N, NCC, WORK( 1 ), WORK( N ), C,
$ LDC )
END IF
*
* Compute singular values to relative accuracy TOL
* (By setting TOL to be negative, algorithm will compute
* singular values to absolute accuracy ABS(TOL)*norm(input matrix))
*
TOLMUL = MAX( TEN, MIN( HNDRD, EPS**MEIGTH ) )
TOL = TOLMUL*EPS
*
* Compute approximate maximum, minimum singular values
*
SMAX = ZERO
DO 20 I = 1, N
SMAX = MAX( SMAX, ABS( D( I ) ) )
20 CONTINUE
DO 30 I = 1, N - 1
SMAX = MAX( SMAX, ABS( E( I ) ) )
30 CONTINUE
SMINL = ZERO
IF( TOL.GE.ZERO ) THEN
*
* Relative accuracy desired
*
SMINOA = ABS( D( 1 ) )
IF( SMINOA.EQ.ZERO )
$ GO TO 50
MU = SMINOA
DO 40 I = 2, N
MU = ABS( D( I ) )*( MU / ( MU+ABS( E( I-1 ) ) ) )
SMINOA = MIN( SMINOA, MU )
IF( SMINOA.EQ.ZERO )
$ GO TO 50
40 CONTINUE
50 CONTINUE
SMINOA = SMINOA / SQRT( DBLE( N ) )
THRESH = MAX( TOL*SMINOA, MAXITR*(N*(N*UNFL)) )
ELSE
*
* Absolute accuracy desired
*
THRESH = MAX( ABS( TOL )*SMAX, MAXITR*(N*(N*UNFL)) )
END IF
*
* Prepare for main iteration loop for the singular values
* (MAXIT is the maximum number of passes through the inner
* loop permitted before nonconvergence signalled.)
*
MAXITDIVN = MAXITR*N
ITERDIVN = 0
ITER = -1
OLDLL = -1
OLDM = -1
*
* M points to last element of unconverged part of matrix
*
M = N
*
* Begin main iteration loop
*
60 CONTINUE
*
* Check for convergence or exceeding iteration count
*
IF( M.LE.1 )
$ GO TO 160
*
IF( ITER.GE.N ) THEN
ITER = ITER - N
ITERDIVN = ITERDIVN + 1
IF( ITERDIVN.GE.MAXITDIVN )
$ GO TO 200
END IF
*
* Find diagonal block of matrix to work on
*
IF( TOL.LT.ZERO .AND. ABS( D( M ) ).LE.THRESH )
$ D( M ) = ZERO
SMAX = ABS( D( M ) )
SMIN = SMAX
DO 70 LLL = 1, M - 1
LL = M - LLL
ABSS = ABS( D( LL ) )
ABSE = ABS( E( LL ) )
IF( TOL.LT.ZERO .AND. ABSS.LE.THRESH )
$ D( LL ) = ZERO
IF( ABSE.LE.THRESH )
$ GO TO 80
SMIN = MIN( SMIN, ABSS )
SMAX = MAX( SMAX, ABSS, ABSE )
70 CONTINUE
LL = 0
GO TO 90
80 CONTINUE
E( LL ) = ZERO
*
* Matrix splits since E(LL) = 0
*
IF( LL.EQ.M-1 ) THEN
*
* Convergence of bottom singular value, return to top of loop
*
M = M - 1
GO TO 60
END IF
90 CONTINUE
LL = LL + 1
*
* E(LL) through E(M-1) are nonzero, E(LL-1) is zero
*
IF( LL.EQ.M-1 ) THEN
*
* 2 by 2 block, handle separately
*
CALL DLASV2( D( M-1 ), E( M-1 ), D( M ), SIGMN, SIGMX, SINR,
$ COSR, SINL, COSL )
D( M-1 ) = SIGMX
E( M-1 ) = ZERO
D( M ) = SIGMN
*
* Compute singular vectors, if desired
*
IF( NCVT.GT.0 )
$ CALL DROT( NCVT, VT( M-1, 1 ), LDVT, VT( M, 1 ), LDVT, COSR,
$ SINR )
IF( NRU.GT.0 )
$ CALL DROT( NRU, U( 1, M-1 ), 1, U( 1, M ), 1, COSL, SINL )
IF( NCC.GT.0 )
$ CALL DROT( NCC, C( M-1, 1 ), LDC, C( M, 1 ), LDC, COSL,
$ SINL )
M = M - 2
GO TO 60
END IF
*
* If working on new submatrix, choose shift direction
* (from larger end diagonal element towards smaller)
*
IF( LL.GT.OLDM .OR. M.LT.OLDLL ) THEN
IF( ABS( D( LL ) ).GE.ABS( D( M ) ) ) THEN
*
* Chase bulge from top (big end) to bottom (small end)
*
IDIR = 1
ELSE
*
* Chase bulge from bottom (big end) to top (small end)
*
IDIR = 2
END IF
END IF
*
* Apply convergence tests
*
IF( IDIR.EQ.1 ) THEN
*
* Run convergence test in forward direction
* First apply standard test to bottom of matrix
*
IF( ABS( E( M-1 ) ).LE.ABS( TOL )*ABS( D( M ) ) .OR.
$ ( TOL.LT.ZERO .AND. ABS( E( M-1 ) ).LE.THRESH ) ) THEN
E( M-1 ) = ZERO
GO TO 60
END IF
*
IF( TOL.GE.ZERO ) THEN
*
* If relative accuracy desired,
* apply convergence criterion forward
*
MU = ABS( D( LL ) )
SMINL = MU
DO 100 LLL = LL, M - 1
IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN
E( LLL ) = ZERO
GO TO 60
END IF
MU = ABS( D( LLL+1 ) )*( MU / ( MU+ABS( E( LLL ) ) ) )
SMINL = MIN( SMINL, MU )
100 CONTINUE
END IF
*
ELSE
*
* Run convergence test in backward direction
* First apply standard test to top of matrix
*
IF( ABS( E( LL ) ).LE.ABS( TOL )*ABS( D( LL ) ) .OR.
$ ( TOL.LT.ZERO .AND. ABS( E( LL ) ).LE.THRESH ) ) THEN
E( LL ) = ZERO
GO TO 60
END IF
*
IF( TOL.GE.ZERO ) THEN
*
* If relative accuracy desired,
* apply convergence criterion backward
*
MU = ABS( D( M ) )
SMINL = MU
DO 110 LLL = M - 1, LL, -1
IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN
E( LLL ) = ZERO
GO TO 60
END IF
MU = ABS( D( LLL ) )*( MU / ( MU+ABS( E( LLL ) ) ) )
SMINL = MIN( SMINL, MU )
110 CONTINUE
END IF
END IF
OLDLL = LL
OLDM = M
*
* Compute shift. First, test if shifting would ruin relative
* accuracy, and if so set the shift to zero.
*
IF( TOL.GE.ZERO .AND. N*TOL*( SMINL / SMAX ).LE.
$ MAX( EPS, HNDRTH*TOL ) ) THEN
*
* Use a zero shift to avoid loss of relative accuracy
*
SHIFT = ZERO
ELSE
*
* Compute the shift from 2-by-2 block at end of matrix
*
IF( IDIR.EQ.1 ) THEN
SLL = ABS( D( LL ) )
CALL DLAS2( D( M-1 ), E( M-1 ), D( M ), SHIFT, R )
ELSE
SLL = ABS( D( M ) )
CALL DLAS2( D( LL ), E( LL ), D( LL+1 ), SHIFT, R )
END IF
*
* Test if shift negligible, and if so set to zero
*
IF( SLL.GT.ZERO ) THEN
IF( ( SHIFT / SLL )**2.LT.EPS )
$ SHIFT = ZERO
END IF
END IF
*
* Increment iteration count
*
ITER = ITER + M - LL
*
* If SHIFT = 0, do simplified QR iteration
*
IF( SHIFT.EQ.ZERO ) THEN
IF( IDIR.EQ.1 ) THEN
*
* Chase bulge from top to bottom
* Save cosines and sines for later singular vector updates
*
CS = ONE
OLDCS = ONE
DO 120 I = LL, M - 1
CALL DLARTG( D( I )*CS, E( I ), CS, SN, R )
IF( I.GT.LL )
$ E( I-1 ) = OLDSN*R
CALL DLARTG( OLDCS*R, D( I+1 )*SN, OLDCS, OLDSN, D( I ) )
WORK( I-LL+1 ) = CS
WORK( I-LL+1+NM1 ) = SN
WORK( I-LL+1+NM12 ) = OLDCS
WORK( I-LL+1+NM13 ) = OLDSN
120 CONTINUE
H = D( M )*CS
D( M ) = H*OLDCS
E( M-1 ) = H*OLDSN
*
* Update singular vectors
*
IF( NCVT.GT.0 )
$ CALL DLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ),
$ WORK( N ), VT( LL, 1 ), LDVT )
IF( NRU.GT.0 )
$ CALL DLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ),
$ WORK( NM13+1 ), U( 1, LL ), LDU )
IF( NCC.GT.0 )
$ CALL DLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ),
$ WORK( NM13+1 ), C( LL, 1 ), LDC )
*
* Test convergence
*
IF( ABS( E( M-1 ) ).LE.THRESH )
$ E( M-1 ) = ZERO
*
ELSE
*
* Chase bulge from bottom to top
* Save cosines and sines for later singular vector updates
*
CS = ONE
OLDCS = ONE
DO 130 I = M, LL + 1, -1
CALL DLARTG( D( I )*CS, E( I-1 ), CS, SN, R )
IF( I.LT.M )
$ E( I ) = OLDSN*R
CALL DLARTG( OLDCS*R, D( I-1 )*SN, OLDCS, OLDSN, D( I ) )
WORK( I-LL ) = CS
WORK( I-LL+NM1 ) = -SN
WORK( I-LL+NM12 ) = OLDCS
WORK( I-LL+NM13 ) = -OLDSN
130 CONTINUE
H = D( LL )*CS
D( LL ) = H*OLDCS
E( LL ) = H*OLDSN
*
* Update singular vectors
*
IF( NCVT.GT.0 )
$ CALL DLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ),
$ WORK( NM13+1 ), VT( LL, 1 ), LDVT )
IF( NRU.GT.0 )
$ CALL DLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ),
$ WORK( N ), U( 1, LL ), LDU )
IF( NCC.GT.0 )
$ CALL DLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ),
$ WORK( N ), C( LL, 1 ), LDC )
*
* Test convergence
*
IF( ABS( E( LL ) ).LE.THRESH )
$ E( LL ) = ZERO
END IF
ELSE
*
* Use nonzero shift
*
IF( IDIR.EQ.1 ) THEN
*
* Chase bulge from top to bottom
* Save cosines and sines for later singular vector updates
*
F = ( ABS( D( LL ) )-SHIFT )*
$ ( SIGN( ONE, D( LL ) )+SHIFT / D( LL ) )
G = E( LL )
DO 140 I = LL, M - 1
CALL DLARTG( F, G, COSR, SINR, R )
IF( I.GT.LL )
$ E( I-1 ) = R
F = COSR*D( I ) + SINR*E( I )
E( I ) = COSR*E( I ) - SINR*D( I )
G = SINR*D( I+1 )
D( I+1 ) = COSR*D( I+1 )
CALL DLARTG( F, G, COSL, SINL, R )
D( I ) = R
F = COSL*E( I ) + SINL*D( I+1 )
D( I+1 ) = COSL*D( I+1 ) - SINL*E( I )
IF( I.LT.M-1 ) THEN
G = SINL*E( I+1 )
E( I+1 ) = COSL*E( I+1 )
END IF
WORK( I-LL+1 ) = COSR
WORK( I-LL+1+NM1 ) = SINR
WORK( I-LL+1+NM12 ) = COSL
WORK( I-LL+1+NM13 ) = SINL
140 CONTINUE
E( M-1 ) = F
*
* Update singular vectors
*
IF( NCVT.GT.0 )
$ CALL DLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ),
$ WORK( N ), VT( LL, 1 ), LDVT )
IF( NRU.GT.0 )
$ CALL DLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ),
$ WORK( NM13+1 ), U( 1, LL ), LDU )
IF( NCC.GT.0 )
$ CALL DLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ),
$ WORK( NM13+1 ), C( LL, 1 ), LDC )
*
* Test convergence
*
IF( ABS( E( M-1 ) ).LE.THRESH )
$ E( M-1 ) = ZERO
*
ELSE
*
* Chase bulge from bottom to top
* Save cosines and sines for later singular vector updates
*
F = ( ABS( D( M ) )-SHIFT )*( SIGN( ONE, D( M ) )+SHIFT /
$ D( M ) )
G = E( M-1 )
DO 150 I = M, LL + 1, -1
CALL DLARTG( F, G, COSR, SINR, R )
IF( I.LT.M )
$ E( I ) = R
F = COSR*D( I ) + SINR*E( I-1 )
E( I-1 ) = COSR*E( I-1 ) - SINR*D( I )
G = SINR*D( I-1 )
D( I-1 ) = COSR*D( I-1 )
CALL DLARTG( F, G, COSL, SINL, R )
D( I ) = R
F = COSL*E( I-1 ) + SINL*D( I-1 )
D( I-1 ) = COSL*D( I-1 ) - SINL*E( I-1 )
IF( I.GT.LL+1 ) THEN
G = SINL*E( I-2 )
E( I-2 ) = COSL*E( I-2 )
END IF
WORK( I-LL ) = COSR
WORK( I-LL+NM1 ) = -SINR
WORK( I-LL+NM12 ) = COSL
WORK( I-LL+NM13 ) = -SINL
150 CONTINUE
E( LL ) = F
*
* Test convergence
*
IF( ABS( E( LL ) ).LE.THRESH )
$ E( LL ) = ZERO
*
* Update singular vectors if desired
*
IF( NCVT.GT.0 )
$ CALL DLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ),
$ WORK( NM13+1 ), VT( LL, 1 ), LDVT )
IF( NRU.GT.0 )
$ CALL DLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ),
$ WORK( N ), U( 1, LL ), LDU )
IF( NCC.GT.0 )
$ CALL DLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ),
$ WORK( N ), C( LL, 1 ), LDC )
END IF
END IF
*
* QR iteration finished, go back and check convergence
*
GO TO 60
*
* All singular values converged, so make them positive
*
160 CONTINUE
DO 170 I = 1, N
IF( D( I ).LT.ZERO ) THEN
D( I ) = -D( I )
*
* Change sign of singular vectors, if desired
*
IF( NCVT.GT.0 )
$ CALL DSCAL( NCVT, NEGONE, VT( I, 1 ), LDVT )
END IF
170 CONTINUE
*
* Sort the singular values into decreasing order (insertion sort on
* singular values, but only one transposition per singular vector)
*
DO 190 I = 1, N - 1
*
* Scan for smallest D(I)
*
ISUB = 1
SMIN = D( 1 )
DO 180 J = 2, N + 1 - I
IF( D( J ).LE.SMIN ) THEN
ISUB = J
SMIN = D( J )
END IF
180 CONTINUE
IF( ISUB.NE.N+1-I ) THEN
*
* Swap singular values and vectors
*
D( ISUB ) = D( N+1-I )
D( N+1-I ) = SMIN
IF( NCVT.GT.0 )
$ CALL DSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( N+1-I, 1 ),
$ LDVT )
IF( NRU.GT.0 )
$ CALL DSWAP( NRU, U( 1, ISUB ), 1, U( 1, N+1-I ), 1 )
IF( NCC.GT.0 )
$ CALL DSWAP( NCC, C( ISUB, 1 ), LDC, C( N+1-I, 1 ), LDC )
END IF
190 CONTINUE
GO TO 220
*
* Maximum number of iterations exceeded, failure to converge
*
200 CONTINUE
INFO = 0
DO 210 I = 1, N - 1
IF( E( I ).NE.ZERO )
$ INFO = INFO + 1
210 CONTINUE
220 CONTINUE
RETURN
*
* End of DBDSQR
*
END

14
lib/linalg/dcabs1.cpp Normal file
View File

@ -0,0 +1,14 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
doublereal dcabs1_(doublecomplex *z__)
{
doublereal ret_val, d__1, d__2;
double d_lmp_imag(doublecomplex *);
ret_val = (d__1 = z__->r, abs(d__1)) + (d__2 = d_lmp_imag(z__), abs(d__2));
return ret_val;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,66 +0,0 @@
*> \brief \b DCABS1
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* DOUBLE PRECISION FUNCTION DCABS1(Z)
*
* .. Scalar Arguments ..
* COMPLEX*16 Z
* ..
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DCABS1 computes |Re(.)| + |Im(.)| of a double complex number
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] Z
*> \verbatim
*> Z is COMPLEX*16
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup double_blas_level1
*
* =====================================================================
DOUBLE PRECISION FUNCTION DCABS1(Z)
*
* -- Reference BLAS level1 routine --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
COMPLEX*16 Z
* ..
* ..
* =====================================================================
*
* .. Intrinsic Functions ..
INTRINSIC ABS,DBLE,DIMAG
*
DCABS1 = ABS(DBLE(Z)) + ABS(DIMAG(Z))
RETURN
*
* End of DCABS1
*
END

56
lib/linalg/dcopy.cpp Normal file
View File

@ -0,0 +1,56 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dcopy_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy)
{
integer i__1;
integer i__, m, ix, iy, mp1;
--dy;
--dx;
if (*n <= 0) {
return 0;
}
if (*incx == 1 && *incy == 1) {
m = *n % 7;
if (m != 0) {
i__1 = m;
for (i__ = 1; i__ <= i__1; ++i__) {
dy[i__] = dx[i__];
}
if (*n < 7) {
return 0;
}
}
mp1 = m + 1;
i__1 = *n;
for (i__ = mp1; i__ <= i__1; i__ += 7) {
dy[i__] = dx[i__];
dy[i__ + 1] = dx[i__ + 1];
dy[i__ + 2] = dx[i__ + 2];
dy[i__ + 3] = dx[i__ + 3];
dy[i__ + 4] = dx[i__ + 4];
dy[i__ + 5] = dx[i__ + 5];
dy[i__ + 6] = dx[i__ + 6];
}
} else {
ix = 1;
iy = 1;
if (*incx < 0) {
ix = (-(*n) + 1) * *incx + 1;
}
if (*incy < 0) {
iy = (-(*n) + 1) * *incy + 1;
}
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
dy[iy] = dx[ix];
ix += *incx;
iy += *incy;
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,146 +0,0 @@
*> \brief \b DCOPY
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* SUBROUTINE DCOPY(N,DX,INCX,DY,INCY)
*
* .. Scalar Arguments ..
* INTEGER INCX,INCY,N
* ..
* .. Array Arguments ..
* DOUBLE PRECISION DX(*),DY(*)
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DCOPY copies a vector, x, to a vector, y.
*> uses unrolled loops for increments equal to 1.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> number of elements in input vector(s)
*> \endverbatim
*>
*> \param[in] DX
*> \verbatim
*> DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
*> \endverbatim
*>
*> \param[in] INCX
*> \verbatim
*> INCX is INTEGER
*> storage spacing between elements of DX
*> \endverbatim
*>
*> \param[out] DY
*> \verbatim
*> DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
*> \endverbatim
*>
*> \param[in] INCY
*> \verbatim
*> INCY is INTEGER
*> storage spacing between elements of DY
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup double_blas_level1
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> jack dongarra, linpack, 3/11/78.
*> modified 12/3/93, array(1) declarations changed to array(*)
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DCOPY(N,DX,INCX,DY,INCY)
*
* -- Reference BLAS level1 routine --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INCX,INCY,N
* ..
* .. Array Arguments ..
DOUBLE PRECISION DX(*),DY(*)
* ..
*
* =====================================================================
*
* .. Local Scalars ..
INTEGER I,IX,IY,M,MP1
* ..
* .. Intrinsic Functions ..
INTRINSIC MOD
* ..
IF (N.LE.0) RETURN
IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
*
* code for both increments equal to 1
*
*
* clean-up loop
*
M = MOD(N,7)
IF (M.NE.0) THEN
DO I = 1,M
DY(I) = DX(I)
END DO
IF (N.LT.7) RETURN
END IF
MP1 = M + 1
DO I = MP1,N,7
DY(I) = DX(I)
DY(I+1) = DX(I+1)
DY(I+2) = DX(I+2)
DY(I+3) = DX(I+3)
DY(I+4) = DX(I+4)
DY(I+5) = DX(I+5)
DY(I+6) = DX(I+6)
END DO
ELSE
*
* code for unequal increments or equal increments
* not equal to 1
*
IX = 1
IY = 1
IF (INCX.LT.0) IX = (-N+1)*INCX + 1
IF (INCY.LT.0) IY = (-N+1)*INCY + 1
DO I = 1,N
DY(IY) = DX(IX)
IX = IX + INCX
IY = IY + INCY
END DO
END IF
RETURN
*
* End of DCOPY
*
END

58
lib/linalg/ddot.cpp Normal file
View File

@ -0,0 +1,58 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy)
{
integer i__1;
doublereal ret_val;
integer i__, m, ix, iy, mp1;
doublereal dtemp;
--dy;
--dx;
ret_val = 0.;
dtemp = 0.;
if (*n <= 0) {
return ret_val;
}
if (*incx == 1 && *incy == 1) {
m = *n % 5;
if (m != 0) {
i__1 = m;
for (i__ = 1; i__ <= i__1; ++i__) {
dtemp += dx[i__] * dy[i__];
}
if (*n < 5) {
ret_val = dtemp;
return ret_val;
}
}
mp1 = m + 1;
i__1 = *n;
for (i__ = mp1; i__ <= i__1; i__ += 5) {
dtemp = dtemp + dx[i__] * dy[i__] + dx[i__ + 1] * dy[i__ + 1] +
dx[i__ + 2] * dy[i__ + 2] + dx[i__ + 3] * dy[i__ + 3] +
dx[i__ + 4] * dy[i__ + 4];
}
} else {
ix = 1;
iy = 1;
if (*incx < 0) {
ix = (-(*n) + 1) * *incx + 1;
}
if (*incy < 0) {
iy = (-(*n) + 1) * *incy + 1;
}
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
dtemp += dx[ix] * dy[iy];
ix += *incx;
iy += *incy;
}
}
ret_val = dtemp;
return ret_val;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,148 +0,0 @@
*> \brief \b DDOT
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY)
*
* .. Scalar Arguments ..
* INTEGER INCX,INCY,N
* ..
* .. Array Arguments ..
* DOUBLE PRECISION DX(*),DY(*)
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DDOT forms the dot product of two vectors.
*> uses unrolled loops for increments equal to one.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> number of elements in input vector(s)
*> \endverbatim
*>
*> \param[in] DX
*> \verbatim
*> DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
*> \endverbatim
*>
*> \param[in] INCX
*> \verbatim
*> INCX is INTEGER
*> storage spacing between elements of DX
*> \endverbatim
*>
*> \param[in] DY
*> \verbatim
*> DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
*> \endverbatim
*>
*> \param[in] INCY
*> \verbatim
*> INCY is INTEGER
*> storage spacing between elements of DY
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup double_blas_level1
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> jack dongarra, linpack, 3/11/78.
*> modified 12/3/93, array(1) declarations changed to array(*)
*> \endverbatim
*>
* =====================================================================
DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY)
*
* -- Reference BLAS level1 routine --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INCX,INCY,N
* ..
* .. Array Arguments ..
DOUBLE PRECISION DX(*),DY(*)
* ..
*
* =====================================================================
*
* .. Local Scalars ..
DOUBLE PRECISION DTEMP
INTEGER I,IX,IY,M,MP1
* ..
* .. Intrinsic Functions ..
INTRINSIC MOD
* ..
DDOT = 0.0d0
DTEMP = 0.0d0
IF (N.LE.0) RETURN
IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
*
* code for both increments equal to 1
*
*
* clean-up loop
*
M = MOD(N,5)
IF (M.NE.0) THEN
DO I = 1,M
DTEMP = DTEMP + DX(I)*DY(I)
END DO
IF (N.LT.5) THEN
DDOT=DTEMP
RETURN
END IF
END IF
MP1 = M + 1
DO I = MP1,N,5
DTEMP = DTEMP + DX(I)*DY(I) + DX(I+1)*DY(I+1) +
$ DX(I+2)*DY(I+2) + DX(I+3)*DY(I+3) + DX(I+4)*DY(I+4)
END DO
ELSE
*
* code for unequal increments or equal increments
* not equal to 1
*
IX = 1
IY = 1
IF (INCX.LT.0) IX = (-N+1)*INCX + 1
IF (INCY.LT.0) IY = (-N+1)*INCY + 1
DO I = 1,N
DTEMP = DTEMP + DX(IX)*DY(IY)
IX = IX + INCX
IY = IY + INCY
END DO
END IF
DDOT = DTEMP
RETURN
*
* End of DDOT
*
END

105
lib/linalg/dgebd2.cpp Normal file
View File

@ -0,0 +1,105 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
int dgebd2_(integer *m, integer *n, doublereal *a, integer *lda, doublereal *d__, doublereal *e,
doublereal *tauq, doublereal *taup, doublereal *work, integer *info)
{
integer a_dim1, a_offset, i__1, i__2, i__3;
integer i__;
extern int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *,
doublereal *, integer *, doublereal *, ftnlen),
dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *),
xerbla_(char *, integer *, ftnlen);
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--d__;
--e;
--tauq;
--taup;
--work;
*info = 0;
if (*m < 0) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*lda < max(1, *m)) {
*info = -4;
}
if (*info < 0) {
i__1 = -(*info);
xerbla_((char *)"DGEBD2", &i__1, (ftnlen)6);
return 0;
}
if (*m >= *n) {
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
i__2 = *m - i__ + 1;
i__3 = i__ + 1;
dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3, *m) + i__ * a_dim1], &c__1,
&tauq[i__]);
d__[i__] = a[i__ + i__ * a_dim1];
a[i__ + i__ * a_dim1] = 1.;
if (i__ < *n) {
i__2 = *m - i__ + 1;
i__3 = *n - i__;
dlarf_((char *)"Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tauq[i__],
&a[i__ + (i__ + 1) * a_dim1], lda, &work[1], (ftnlen)4);
}
a[i__ + i__ * a_dim1] = d__[i__];
if (i__ < *n) {
i__2 = *n - i__;
i__3 = i__ + 2;
dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min(i__3, *n) * a_dim1], lda,
&taup[i__]);
e[i__] = a[i__ + (i__ + 1) * a_dim1];
a[i__ + (i__ + 1) * a_dim1] = 1.;
i__2 = *m - i__;
i__3 = *n - i__;
dlarf_((char *)"Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1], lda, &taup[i__],
&a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1], (ftnlen)5);
a[i__ + (i__ + 1) * a_dim1] = e[i__];
} else {
taup[i__] = 0.;
}
}
} else {
i__1 = *m;
for (i__ = 1; i__ <= i__1; ++i__) {
i__2 = *n - i__ + 1;
i__3 = i__ + 1;
dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3, *n) * a_dim1], lda,
&taup[i__]);
d__[i__] = a[i__ + i__ * a_dim1];
a[i__ + i__ * a_dim1] = 1.;
if (i__ < *m) {
i__2 = *m - i__;
i__3 = *n - i__ + 1;
dlarf_((char *)"Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[i__],
&a[i__ + 1 + i__ * a_dim1], lda, &work[1], (ftnlen)5);
}
a[i__ + i__ * a_dim1] = d__[i__];
if (i__ < *m) {
i__2 = *m - i__;
i__3 = i__ + 2;
dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3, *m) + i__ * a_dim1], &c__1,
&tauq[i__]);
e[i__] = a[i__ + 1 + i__ * a_dim1];
a[i__ + 1 + i__ * a_dim1] = 1.;
i__2 = *m - i__;
i__3 = *n - i__;
dlarf_((char *)"Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tauq[i__],
&a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1], (ftnlen)4);
a[i__ + 1 + i__ * a_dim1] = e[i__];
} else {
tauq[i__] = 0.;
}
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,317 +0,0 @@
*> \brief \b DGEBD2 reduces a general matrix to bidiagonal form using an unblocked algorithm.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGEBD2 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgebd2.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgebd2.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgebd2.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ),
* $ TAUQ( * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGEBD2 reduces a real general m by n matrix A to upper or lower
*> bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
*>
*> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows in the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns in the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the m by n general matrix to be reduced.
*> On exit,
*> if m >= n, the diagonal and the first superdiagonal are
*> overwritten with the upper bidiagonal matrix B; the
*> elements below the diagonal, with the array TAUQ, represent
*> the orthogonal matrix Q as a product of elementary
*> reflectors, and the elements above the first superdiagonal,
*> with the array TAUP, represent the orthogonal matrix P as
*> a product of elementary reflectors;
*> if m < n, the diagonal and the first subdiagonal are
*> overwritten with the lower bidiagonal matrix B; the
*> elements below the first subdiagonal, with the array TAUQ,
*> represent the orthogonal matrix Q as a product of
*> elementary reflectors, and the elements above the diagonal,
*> with the array TAUP, represent the orthogonal matrix P as
*> a product of elementary reflectors.
*> See Further Details.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] D
*> \verbatim
*> D is DOUBLE PRECISION array, dimension (min(M,N))
*> The diagonal elements of the bidiagonal matrix B:
*> D(i) = A(i,i).
*> \endverbatim
*>
*> \param[out] E
*> \verbatim
*> E is DOUBLE PRECISION array, dimension (min(M,N)-1)
*> The off-diagonal elements of the bidiagonal matrix B:
*> if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
*> if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
*> \endverbatim
*>
*> \param[out] TAUQ
*> \verbatim
*> TAUQ is DOUBLE PRECISION array, dimension (min(M,N))
*> The scalar factors of the elementary reflectors which
*> represent the orthogonal matrix Q. See Further Details.
*> \endverbatim
*>
*> \param[out] TAUP
*> \verbatim
*> TAUP is DOUBLE PRECISION array, dimension (min(M,N))
*> The scalar factors of the elementary reflectors which
*> represent the orthogonal matrix P. See Further Details.
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (max(M,N))
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit.
*> < 0: if INFO = -i, the i-th argument had an illegal value.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEcomputational
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> The matrices Q and P are represented as products of elementary
*> reflectors:
*>
*> If m >= n,
*>
*> Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1)
*>
*> Each H(i) and G(i) has the form:
*>
*> H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T
*>
*> where tauq and taup are real scalars, and v and u are real vectors;
*> v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
*> u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
*> tauq is stored in TAUQ(i) and taup in TAUP(i).
*>
*> If m < n,
*>
*> Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m)
*>
*> Each H(i) and G(i) has the form:
*>
*> H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T
*>
*> where tauq and taup are real scalars, and v and u are real vectors;
*> v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
*> u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
*> tauq is stored in TAUQ(i) and taup in TAUP(i).
*>
*> The contents of A on exit are illustrated by the following examples:
*>
*> m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
*>
*> ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 )
*> ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 )
*> ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 )
*> ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 )
*> ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 )
*> ( v1 v2 v3 v4 v5 )
*>
*> where d and e denote diagonal and off-diagonal elements of B, vi
*> denotes an element of the vector defining H(i), and ui an element of
*> the vector defining G(i).
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ),
$ TAUQ( * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
INTEGER I
* ..
* .. External Subroutines ..
EXTERNAL DLARF, DLARFG, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input parameters
*
INFO = 0
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -4
END IF
IF( INFO.LT.0 ) THEN
CALL XERBLA( 'DGEBD2', -INFO )
RETURN
END IF
*
IF( M.GE.N ) THEN
*
* Reduce to upper bidiagonal form
*
DO 10 I = 1, N
*
* Generate elementary reflector H(i) to annihilate A(i+1:m,i)
*
CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1,
$ TAUQ( I ) )
D( I ) = A( I, I )
A( I, I ) = ONE
*
* Apply H(i) to A(i:m,i+1:n) from the left
*
IF( I.LT.N )
$ CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAUQ( I ),
$ A( I, I+1 ), LDA, WORK )
A( I, I ) = D( I )
*
IF( I.LT.N ) THEN
*
* Generate elementary reflector G(i) to annihilate
* A(i,i+2:n)
*
CALL DLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ),
$ LDA, TAUP( I ) )
E( I ) = A( I, I+1 )
A( I, I+1 ) = ONE
*
* Apply G(i) to A(i+1:m,i+1:n) from the right
*
CALL DLARF( 'Right', M-I, N-I, A( I, I+1 ), LDA,
$ TAUP( I ), A( I+1, I+1 ), LDA, WORK )
A( I, I+1 ) = E( I )
ELSE
TAUP( I ) = ZERO
END IF
10 CONTINUE
ELSE
*
* Reduce to lower bidiagonal form
*
DO 20 I = 1, M
*
* Generate elementary reflector G(i) to annihilate A(i,i+1:n)
*
CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA,
$ TAUP( I ) )
D( I ) = A( I, I )
A( I, I ) = ONE
*
* Apply G(i) to A(i+1:m,i:n) from the right
*
IF( I.LT.M )
$ CALL DLARF( 'Right', M-I, N-I+1, A( I, I ), LDA,
$ TAUP( I ), A( I+1, I ), LDA, WORK )
A( I, I ) = D( I )
*
IF( I.LT.M ) THEN
*
* Generate elementary reflector H(i) to annihilate
* A(i+2:m,i)
*
CALL DLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1,
$ TAUQ( I ) )
E( I ) = A( I+1, I )
A( I+1, I ) = ONE
*
* Apply H(i) to A(i+1:m,i+1:n) from the left
*
CALL DLARF( 'Left', M-I, N-I, A( I+1, I ), 1, TAUQ( I ),
$ A( I+1, I+1 ), LDA, WORK )
A( I+1, I ) = E( I )
ELSE
TAUQ( I ) = ZERO
END IF
20 CONTINUE
END IF
RETURN
*
* End of DGEBD2
*
END

129
lib/linalg/dgebrd.cpp Normal file
View File

@ -0,0 +1,129 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
static integer c_n1 = -1;
static integer c__3 = 3;
static integer c__2 = 2;
static doublereal c_b21 = -1.;
static doublereal c_b22 = 1.;
int dgebrd_(integer *m, integer *n, doublereal *a, integer *lda, doublereal *d__, doublereal *e,
doublereal *tauq, doublereal *taup, doublereal *work, integer *lwork, integer *info)
{
integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
integer i__, j, nb, nx, ws;
extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
ftnlen, ftnlen);
integer nbmin, iinfo, minmn;
extern int dgebd2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
doublereal *, doublereal *, doublereal *, integer *),
dlabrd_(integer *, integer *, integer *, doublereal *, integer *, doublereal *,
doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *,
integer *),
xerbla_(char *, integer *, ftnlen);
extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
ftnlen, ftnlen);
integer ldwrkx, ldwrky, lwkopt;
logical lquery;
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--d__;
--e;
--tauq;
--taup;
--work;
*info = 0;
i__1 = 1, i__2 = ilaenv_(&c__1, (char *)"DGEBRD", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
nb = max(i__1, i__2);
lwkopt = (*m + *n) * nb;
work[1] = (doublereal)lwkopt;
lquery = *lwork == -1;
if (*m < 0) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*lda < max(1, *m)) {
*info = -4;
} else {
i__1 = max(1, *m);
if (*lwork < max(i__1, *n) && !lquery) {
*info = -10;
}
}
if (*info < 0) {
i__1 = -(*info);
xerbla_((char *)"DGEBRD", &i__1, (ftnlen)6);
return 0;
} else if (lquery) {
return 0;
}
minmn = min(*m, *n);
if (minmn == 0) {
work[1] = 1.;
return 0;
}
ws = max(*m, *n);
ldwrkx = *m;
ldwrky = *n;
if (nb > 1 && nb < minmn) {
i__1 = nb, i__2 = ilaenv_(&c__3, (char *)"DGEBRD", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
nx = max(i__1, i__2);
if (nx < minmn) {
ws = (*m + *n) * nb;
if (*lwork < ws) {
nbmin = ilaenv_(&c__2, (char *)"DGEBRD", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
if (*lwork >= (*m + *n) * nbmin) {
nb = *lwork / (*m + *n);
} else {
nb = 1;
nx = minmn;
}
}
}
} else {
nx = minmn;
}
i__1 = minmn - nx;
i__2 = nb;
for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
i__3 = *m - i__ + 1;
i__4 = *n - i__ + 1;
dlabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], &tauq[i__],
&taup[i__], &work[1], &ldwrkx, &work[ldwrkx * nb + 1], &ldwrky);
i__3 = *m - i__ - nb + 1;
i__4 = *n - i__ - nb + 1;
dgemm_((char *)"No transpose", (char *)"Transpose", &i__3, &i__4, &nb, &c_b21, &a[i__ + nb + i__ * a_dim1],
lda, &work[ldwrkx * nb + nb + 1], &ldwrky, &c_b22,
&a[i__ + nb + (i__ + nb) * a_dim1], lda, (ftnlen)12, (ftnlen)9);
i__3 = *m - i__ - nb + 1;
i__4 = *n - i__ - nb + 1;
dgemm_((char *)"No transpose", (char *)"No transpose", &i__3, &i__4, &nb, &c_b21, &work[nb + 1], &ldwrkx,
&a[i__ + (i__ + nb) * a_dim1], lda, &c_b22, &a[i__ + nb + (i__ + nb) * a_dim1], lda,
(ftnlen)12, (ftnlen)12);
if (*m >= *n) {
i__3 = i__ + nb - 1;
for (j = i__; j <= i__3; ++j) {
a[j + j * a_dim1] = d__[j];
a[j + (j + 1) * a_dim1] = e[j];
}
} else {
i__3 = i__ + nb - 1;
for (j = i__; j <= i__3; ++j) {
a[j + j * a_dim1] = d__[j];
a[j + 1 + j * a_dim1] = e[j];
}
}
}
i__2 = *m - i__ + 1;
i__1 = *n - i__ + 1;
dgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], &tauq[i__], &taup[i__],
&work[1], &iinfo);
work[1] = (doublereal)ws;
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,349 +0,0 @@
*> \brief \b DGEBRD
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGEBRD + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgebrd.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgebrd.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgebrd.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK,
* INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, LWORK, M, N
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ),
* $ TAUQ( * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGEBRD reduces a general real M-by-N matrix A to upper or lower
*> bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
*>
*> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows in the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns in the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the M-by-N general matrix to be reduced.
*> On exit,
*> if m >= n, the diagonal and the first superdiagonal are
*> overwritten with the upper bidiagonal matrix B; the
*> elements below the diagonal, with the array TAUQ, represent
*> the orthogonal matrix Q as a product of elementary
*> reflectors, and the elements above the first superdiagonal,
*> with the array TAUP, represent the orthogonal matrix P as
*> a product of elementary reflectors;
*> if m < n, the diagonal and the first subdiagonal are
*> overwritten with the lower bidiagonal matrix B; the
*> elements below the first subdiagonal, with the array TAUQ,
*> represent the orthogonal matrix Q as a product of
*> elementary reflectors, and the elements above the diagonal,
*> with the array TAUP, represent the orthogonal matrix P as
*> a product of elementary reflectors.
*> See Further Details.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] D
*> \verbatim
*> D is DOUBLE PRECISION array, dimension (min(M,N))
*> The diagonal elements of the bidiagonal matrix B:
*> D(i) = A(i,i).
*> \endverbatim
*>
*> \param[out] E
*> \verbatim
*> E is DOUBLE PRECISION array, dimension (min(M,N)-1)
*> The off-diagonal elements of the bidiagonal matrix B:
*> if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
*> if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
*> \endverbatim
*>
*> \param[out] TAUQ
*> \verbatim
*> TAUQ is DOUBLE PRECISION array, dimension (min(M,N))
*> The scalar factors of the elementary reflectors which
*> represent the orthogonal matrix Q. See Further Details.
*> \endverbatim
*>
*> \param[out] TAUP
*> \verbatim
*> TAUP is DOUBLE PRECISION array, dimension (min(M,N))
*> The scalar factors of the elementary reflectors which
*> represent the orthogonal matrix P. See Further Details.
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> \verbatim
*> LWORK is INTEGER
*> The length of the array WORK. LWORK >= max(1,M,N).
*> For optimum performance LWORK >= (M+N)*NB, where NB
*> is the optimal blocksize.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
*> this value as the first entry of the WORK array, and no error
*> message related to LWORK is issued by XERBLA.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEcomputational
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> The matrices Q and P are represented as products of elementary
*> reflectors:
*>
*> If m >= n,
*>
*> Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1)
*>
*> Each H(i) and G(i) has the form:
*>
*> H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T
*>
*> where tauq and taup are real scalars, and v and u are real vectors;
*> v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
*> u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
*> tauq is stored in TAUQ(i) and taup in TAUP(i).
*>
*> If m < n,
*>
*> Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m)
*>
*> Each H(i) and G(i) has the form:
*>
*> H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T
*>
*> where tauq and taup are real scalars, and v and u are real vectors;
*> v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
*> u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
*> tauq is stored in TAUQ(i) and taup in TAUP(i).
*>
*> The contents of A on exit are illustrated by the following examples:
*>
*> m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
*>
*> ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 )
*> ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 )
*> ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 )
*> ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 )
*> ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 )
*> ( v1 v2 v3 v4 v5 )
*>
*> where d and e denote diagonal and off-diagonal elements of B, vi
*> denotes an element of the vector defining H(i), and ui an element of
*> the vector defining G(i).
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK,
$ INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LWORK, M, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ),
$ TAUQ( * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER I, IINFO, J, LDWRKX, LDWRKY, LWKOPT, MINMN, NB,
$ NBMIN, NX, WS
* ..
* .. External Subroutines ..
EXTERNAL DGEBD2, DGEMM, DLABRD, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC DBLE, MAX, MIN
* ..
* .. External Functions ..
INTEGER ILAENV
EXTERNAL ILAENV
* ..
* .. Executable Statements ..
*
* Test the input parameters
*
INFO = 0
NB = MAX( 1, ILAENV( 1, 'DGEBRD', ' ', M, N, -1, -1 ) )
LWKOPT = ( M+N )*NB
WORK( 1 ) = DBLE( LWKOPT )
LQUERY = ( LWORK.EQ.-1 )
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -4
ELSE IF( LWORK.LT.MAX( 1, M, N ) .AND. .NOT.LQUERY ) THEN
INFO = -10
END IF
IF( INFO.LT.0 ) THEN
CALL XERBLA( 'DGEBRD', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
RETURN
END IF
*
* Quick return if possible
*
MINMN = MIN( M, N )
IF( MINMN.EQ.0 ) THEN
WORK( 1 ) = 1
RETURN
END IF
*
WS = MAX( M, N )
LDWRKX = M
LDWRKY = N
*
IF( NB.GT.1 .AND. NB.LT.MINMN ) THEN
*
* Set the crossover point NX.
*
NX = MAX( NB, ILAENV( 3, 'DGEBRD', ' ', M, N, -1, -1 ) )
*
* Determine when to switch from blocked to unblocked code.
*
IF( NX.LT.MINMN ) THEN
WS = ( M+N )*NB
IF( LWORK.LT.WS ) THEN
*
* Not enough work space for the optimal NB, consider using
* a smaller block size.
*
NBMIN = ILAENV( 2, 'DGEBRD', ' ', M, N, -1, -1 )
IF( LWORK.GE.( M+N )*NBMIN ) THEN
NB = LWORK / ( M+N )
ELSE
NB = 1
NX = MINMN
END IF
END IF
END IF
ELSE
NX = MINMN
END IF
*
DO 30 I = 1, MINMN - NX, NB
*
* Reduce rows and columns i:i+nb-1 to bidiagonal form and return
* the matrices X and Y which are needed to update the unreduced
* part of the matrix
*
CALL DLABRD( M-I+1, N-I+1, NB, A( I, I ), LDA, D( I ), E( I ),
$ TAUQ( I ), TAUP( I ), WORK, LDWRKX,
$ WORK( LDWRKX*NB+1 ), LDWRKY )
*
* Update the trailing submatrix A(i+nb:m,i+nb:n), using an update
* of the form A := A - V*Y**T - X*U**T
*
CALL DGEMM( 'No transpose', 'Transpose', M-I-NB+1, N-I-NB+1,
$ NB, -ONE, A( I+NB, I ), LDA,
$ WORK( LDWRKX*NB+NB+1 ), LDWRKY, ONE,
$ A( I+NB, I+NB ), LDA )
CALL DGEMM( 'No transpose', 'No transpose', M-I-NB+1, N-I-NB+1,
$ NB, -ONE, WORK( NB+1 ), LDWRKX, A( I, I+NB ), LDA,
$ ONE, A( I+NB, I+NB ), LDA )
*
* Copy diagonal and off-diagonal elements of B back into A
*
IF( M.GE.N ) THEN
DO 10 J = I, I + NB - 1
A( J, J ) = D( J )
A( J, J+1 ) = E( J )
10 CONTINUE
ELSE
DO 20 J = I, I + NB - 1
A( J, J ) = D( J )
A( J+1, J ) = E( J )
20 CONTINUE
END IF
30 CONTINUE
*
* Use unblocked code to reduce the remainder of the matrix
*
CALL DGEBD2( M-I+1, N-I+1, A( I, I ), LDA, D( I ), E( I ),
$ TAUQ( I ), TAUP( I ), WORK, IINFO )
WORK( 1 ) = WS
RETURN
*
* End of DGEBRD
*
END

101
lib/linalg/dgecon.cpp Normal file
View File

@ -0,0 +1,101 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
int dgecon_(char *norm, integer *n, doublereal *a, integer *lda, doublereal *anorm,
doublereal *rcond, doublereal *work, integer *iwork, integer *info, ftnlen norm_len)
{
integer a_dim1, a_offset, i__1;
doublereal d__1;
doublereal sl;
integer ix;
doublereal su;
integer kase, kase1;
doublereal scale;
extern logical lsame_(char *, char *, ftnlen, ftnlen);
integer isave[3];
extern int drscl_(integer *, doublereal *, doublereal *, integer *),
dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *,
integer *);
extern doublereal dlamch_(char *, ftnlen);
extern integer idamax_(integer *, doublereal *, integer *);
extern int xerbla_(char *, integer *, ftnlen);
doublereal ainvnm;
extern int dlatrs_(char *, char *, char *, char *, integer *, doublereal *, integer *,
doublereal *, doublereal *, doublereal *, integer *, ftnlen, ftnlen, ftnlen,
ftnlen);
logical onenrm;
char normin[1];
doublereal smlnum;
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--work;
--iwork;
*info = 0;
onenrm = *(unsigned char *)norm == '1' || lsame_(norm, (char *)"O", (ftnlen)1, (ftnlen)1);
if (!onenrm && !lsame_(norm, (char *)"I", (ftnlen)1, (ftnlen)1)) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*lda < max(1, *n)) {
*info = -4;
} else if (*anorm < 0.) {
*info = -5;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGECON", &i__1, (ftnlen)6);
return 0;
}
*rcond = 0.;
if (*n == 0) {
*rcond = 1.;
return 0;
} else if (*anorm == 0.) {
return 0;
}
smlnum = dlamch_((char *)"Safe minimum", (ftnlen)12);
ainvnm = 0.;
*(unsigned char *)normin = 'N';
if (onenrm) {
kase1 = 1;
} else {
kase1 = 2;
}
kase = 0;
L10:
dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave);
if (kase != 0) {
if (kase == kase1) {
dlatrs_((char *)"Lower", (char *)"No transpose", (char *)"Unit", normin, n, &a[a_offset], lda, &work[1], &sl,
&work[(*n << 1) + 1], info, (ftnlen)5, (ftnlen)12, (ftnlen)4, (ftnlen)1);
dlatrs_((char *)"Upper", (char *)"No transpose", (char *)"Non-unit", normin, n, &a[a_offset], lda, &work[1],
&su, &work[*n * 3 + 1], info, (ftnlen)5, (ftnlen)12, (ftnlen)8, (ftnlen)1);
} else {
dlatrs_((char *)"Upper", (char *)"Transpose", (char *)"Non-unit", normin, n, &a[a_offset], lda, &work[1], &su,
&work[*n * 3 + 1], info, (ftnlen)5, (ftnlen)9, (ftnlen)8, (ftnlen)1);
dlatrs_((char *)"Lower", (char *)"Transpose", (char *)"Unit", normin, n, &a[a_offset], lda, &work[1], &sl,
&work[(*n << 1) + 1], info, (ftnlen)5, (ftnlen)9, (ftnlen)4, (ftnlen)1);
}
scale = sl * su;
*(unsigned char *)normin = 'Y';
if (scale != 1.) {
ix = idamax_(n, &work[1], &c__1);
if (scale < (d__1 = work[ix], abs(d__1)) * smlnum || scale == 0.) {
goto L20;
}
drscl_(n, &scale, &work[1], &c__1);
}
goto L10;
}
if (ainvnm != 0.) {
*rcond = 1. / ainvnm / *anorm;
}
L20:
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,258 +0,0 @@
*> \brief \b DGECON
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGECON + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgecon.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgecon.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgecon.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK,
* INFO )
*
* .. Scalar Arguments ..
* CHARACTER NORM
* INTEGER INFO, LDA, N
* DOUBLE PRECISION ANORM, RCOND
* ..
* .. Array Arguments ..
* INTEGER IWORK( * )
* DOUBLE PRECISION A( LDA, * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGECON estimates the reciprocal of the condition number of a general
*> real matrix A, in either the 1-norm or the infinity-norm, using
*> the LU factorization computed by DGETRF.
*>
*> An estimate is obtained for norm(inv(A)), and the reciprocal of the
*> condition number is computed as
*> RCOND = 1 / ( norm(A) * norm(inv(A)) ).
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] NORM
*> \verbatim
*> NORM is CHARACTER*1
*> Specifies whether the 1-norm condition number or the
*> infinity-norm condition number is required:
*> = '1' or 'O': 1-norm;
*> = 'I': Infinity-norm.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The order of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> The factors L and U from the factorization A = P*L*U
*> as computed by DGETRF.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,N).
*> \endverbatim
*>
*> \param[in] ANORM
*> \verbatim
*> ANORM is DOUBLE PRECISION
*> If NORM = '1' or 'O', the 1-norm of the original matrix A.
*> If NORM = 'I', the infinity-norm of the original matrix A.
*> \endverbatim
*>
*> \param[out] RCOND
*> \verbatim
*> RCOND is DOUBLE PRECISION
*> The reciprocal of the condition number of the matrix A,
*> computed as RCOND = 1/(norm(A) * norm(inv(A))).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (4*N)
*> \endverbatim
*>
*> \param[out] IWORK
*> \verbatim
*> IWORK is INTEGER array, dimension (N)
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEcomputational
*
* =====================================================================
SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK,
$ INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
CHARACTER NORM
INTEGER INFO, LDA, N
DOUBLE PRECISION ANORM, RCOND
* ..
* .. Array Arguments ..
INTEGER IWORK( * )
DOUBLE PRECISION A( LDA, * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE, ZERO
PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 )
* ..
* .. Local Scalars ..
LOGICAL ONENRM
CHARACTER NORMIN
INTEGER IX, KASE, KASE1
DOUBLE PRECISION AINVNM, SCALE, SL, SMLNUM, SU
* ..
* .. Local Arrays ..
INTEGER ISAVE( 3 )
* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER IDAMAX
DOUBLE PRECISION DLAMCH
EXTERNAL LSAME, IDAMAX, DLAMCH
* ..
* .. External Subroutines ..
EXTERNAL DLACN2, DLATRS, DRSCL, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, MAX
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
INFO = -4
ELSE IF( ANORM.LT.ZERO ) THEN
INFO = -5
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGECON', -INFO )
RETURN
END IF
*
* Quick return if possible
*
RCOND = ZERO
IF( N.EQ.0 ) THEN
RCOND = ONE
RETURN
ELSE IF( ANORM.EQ.ZERO ) THEN
RETURN
END IF
*
SMLNUM = DLAMCH( 'Safe minimum' )
*
* Estimate the norm of inv(A).
*
AINVNM = ZERO
NORMIN = 'N'
IF( ONENRM ) THEN
KASE1 = 1
ELSE
KASE1 = 2
END IF
KASE = 0
10 CONTINUE
CALL DLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE )
IF( KASE.NE.0 ) THEN
IF( KASE.EQ.KASE1 ) THEN
*
* Multiply by inv(L).
*
CALL DLATRS( 'Lower', 'No transpose', 'Unit', NORMIN, N, A,
$ LDA, WORK, SL, WORK( 2*N+1 ), INFO )
*
* Multiply by inv(U).
*
CALL DLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N,
$ A, LDA, WORK, SU, WORK( 3*N+1 ), INFO )
ELSE
*
* Multiply by inv(U**T).
*
CALL DLATRS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, A,
$ LDA, WORK, SU, WORK( 3*N+1 ), INFO )
*
* Multiply by inv(L**T).
*
CALL DLATRS( 'Lower', 'Transpose', 'Unit', NORMIN, N, A,
$ LDA, WORK, SL, WORK( 2*N+1 ), INFO )
END IF
*
* Divide X by 1/(SL*SU) if doing so will not cause overflow.
*
SCALE = SL*SU
NORMIN = 'Y'
IF( SCALE.NE.ONE ) THEN
IX = IDAMAX( N, WORK, 1 )
IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO )
$ GO TO 20
CALL DRSCL( N, SCALE, WORK, 1 )
END IF
GO TO 10
END IF
*
* Compute the estimate of the reciprocal condition number.
*
IF( AINVNM.NE.ZERO )
$ RCOND = ( ONE / AINVNM ) / ANORM
*
20 CONTINUE
RETURN
*
* End of DGECON
*
END

53
lib/linalg/dgelq2.cpp Normal file
View File

@ -0,0 +1,53 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dgelq2_(integer *m, integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal *work,
integer *info)
{
integer a_dim1, a_offset, i__1, i__2, i__3;
integer i__, k;
doublereal aii;
extern int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *,
doublereal *, integer *, doublereal *, ftnlen),
dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *),
xerbla_(char *, integer *, ftnlen);
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--tau;
--work;
*info = 0;
if (*m < 0) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*lda < max(1, *m)) {
*info = -4;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGELQ2", &i__1, (ftnlen)6);
return 0;
}
k = min(*m, *n);
i__1 = k;
for (i__ = 1; i__ <= i__1; ++i__) {
i__2 = *n - i__ + 1;
i__3 = i__ + 1;
dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3, *n) * a_dim1], lda, &tau[i__]);
if (i__ < *m) {
aii = a[i__ + i__ * a_dim1];
a[i__ + i__ * a_dim1] = 1.;
i__2 = *m - i__;
i__3 = *n - i__ + 1;
dlarf_((char *)"Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__],
&a[i__ + 1 + i__ * a_dim1], lda, &work[1], (ftnlen)5);
a[i__ + i__ * a_dim1] = aii;
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,197 +0,0 @@
*> \brief \b DGELQ2 computes the LQ factorization of a general rectangular matrix using an unblocked algorithm.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGELQ2 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelq2.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelq2.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelq2.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGELQ2( M, N, A, LDA, TAU, WORK, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGELQ2 computes an LQ factorization of a real m-by-n matrix A:
*>
*> A = ( L 0 ) * Q
*>
*> where:
*>
*> Q is a n-by-n orthogonal matrix;
*> L is a lower-triangular m-by-m matrix;
*> 0 is a m-by-(n-m) zero matrix, if m < n.
*>
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the m by n matrix A.
*> On exit, the elements on and below the diagonal of the array
*> contain the m by min(m,n) lower trapezoidal matrix L (L is
*> lower triangular if m <= n); the elements above the diagonal,
*> with the array TAU, represent the orthogonal matrix Q as a
*> product of elementary reflectors (see Further Details).
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] TAU
*> \verbatim
*> TAU is DOUBLE PRECISION array, dimension (min(M,N))
*> The scalar factors of the elementary reflectors (see Further
*> Details).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (M)
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEcomputational
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> The matrix Q is represented as a product of elementary reflectors
*>
*> Q = H(k) . . . H(2) H(1), where k = min(m,n).
*>
*> Each H(i) has the form
*>
*> H(i) = I - tau * v * v**T
*>
*> where tau is a real scalar, and v is a real vector with
*> v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
*> and tau in TAU(i).
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DGELQ2( M, N, A, LDA, TAU, WORK, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
INTEGER I, K
DOUBLE PRECISION AII
* ..
* .. External Subroutines ..
EXTERNAL DLARF, DLARFG, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input arguments
*
INFO = 0
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -4
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGELQ2', -INFO )
RETURN
END IF
*
K = MIN( M, N )
*
DO 10 I = 1, K
*
* Generate elementary reflector H(i) to annihilate A(i,i+1:n)
*
CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA,
$ TAU( I ) )
IF( I.LT.M ) THEN
*
* Apply H(i) to A(i+1:m,i:n) from the right
*
AII = A( I, I )
A( I, I ) = ONE
CALL DLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAU( I ),
$ A( I+1, I ), LDA, WORK )
A( I, I ) = AII
END IF
10 CONTINUE
RETURN
*
* End of DGELQ2
*
END

106
lib/linalg/dgelqf.cpp Normal file
View File

@ -0,0 +1,106 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
static integer c_n1 = -1;
static integer c__3 = 3;
static integer c__2 = 2;
int dgelqf_(integer *m, integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal *work,
integer *lwork, integer *info)
{
integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
extern int dgelq2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
integer *),
dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *,
integer *, doublereal *, integer *, doublereal *, integer *, doublereal *,
integer *, ftnlen, ftnlen, ftnlen, ftnlen),
dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *,
doublereal *, integer *, ftnlen, ftnlen),
xerbla_(char *, integer *, ftnlen);
extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
ftnlen, ftnlen);
integer ldwork, lwkopt;
logical lquery;
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--tau;
--work;
*info = 0;
nb = ilaenv_(&c__1, (char *)"DGELQF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
lwkopt = *m * nb;
work[1] = (doublereal)lwkopt;
lquery = *lwork == -1;
if (*m < 0) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*lda < max(1, *m)) {
*info = -4;
} else if (*lwork < max(1, *m) && !lquery) {
*info = -7;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGELQF", &i__1, (ftnlen)6);
return 0;
} else if (lquery) {
return 0;
}
k = min(*m, *n);
if (k == 0) {
work[1] = 1.;
return 0;
}
nbmin = 2;
nx = 0;
iws = *m;
if (nb > 1 && nb < k) {
i__1 = 0, i__2 = ilaenv_(&c__3, (char *)"DGELQF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
nx = max(i__1, i__2);
if (nx < k) {
ldwork = *m;
iws = ldwork * nb;
if (*lwork < iws) {
nb = *lwork / ldwork;
i__1 = 2,
i__2 = ilaenv_(&c__2, (char *)"DGELQF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
nbmin = max(i__1, i__2);
}
}
}
if (nb >= nbmin && nb < k && nx < k) {
i__1 = k - nx;
i__2 = nb;
for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
i__3 = k - i__ + 1;
ib = min(i__3, nb);
i__3 = *n - i__ + 1;
dgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1], &iinfo);
if (i__ + ib <= *m) {
i__3 = *n - i__ + 1;
dlarft_((char *)"Forward", (char *)"Rowwise", &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__],
&work[1], &ldwork, (ftnlen)7, (ftnlen)7);
i__3 = *m - i__ - ib + 1;
i__4 = *n - i__ + 1;
dlarfb_((char *)"Right", (char *)"No transpose", (char *)"Forward", (char *)"Rowwise", &i__3, &i__4, &ib,
&a[i__ + i__ * a_dim1], lda, &work[1], &ldwork, &a[i__ + ib + i__ * a_dim1],
lda, &work[ib + 1], &ldwork, (ftnlen)5, (ftnlen)12, (ftnlen)7, (ftnlen)7);
}
}
} else {
i__ = 1;
}
if (i__ <= k) {
i__2 = *m - i__ + 1;
i__1 = *n - i__ + 1;
dgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1], &iinfo);
}
work[1] = (doublereal)iws;
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,274 +0,0 @@
*> \brief \b DGELQF
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGELQF + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelqf.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelqf.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelqf.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, LWORK, M, N
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGELQF computes an LQ factorization of a real M-by-N matrix A:
*>
*> A = ( L 0 ) * Q
*>
*> where:
*>
*> Q is a N-by-N orthogonal matrix;
*> L is a lower-triangular M-by-M matrix;
*> 0 is a M-by-(N-M) zero matrix, if M < N.
*>
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the M-by-N matrix A.
*> On exit, the elements on and below the diagonal of the array
*> contain the m-by-min(m,n) lower trapezoidal matrix L (L is
*> lower triangular if m <= n); the elements above the diagonal,
*> with the array TAU, represent the orthogonal matrix Q as a
*> product of elementary reflectors (see Further Details).
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] TAU
*> \verbatim
*> TAU is DOUBLE PRECISION array, dimension (min(M,N))
*> The scalar factors of the elementary reflectors (see Further
*> Details).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> \verbatim
*> LWORK is INTEGER
*> The dimension of the array WORK. LWORK >= max(1,M).
*> For optimum performance LWORK >= M*NB, where NB is the
*> optimal blocksize.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
*> this value as the first entry of the WORK array, and no error
*> message related to LWORK is issued by XERBLA.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEcomputational
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> The matrix Q is represented as a product of elementary reflectors
*>
*> Q = H(k) . . . H(2) H(1), where k = min(m,n).
*>
*> Each H(i) has the form
*>
*> H(i) = I - tau * v * v**T
*>
*> where tau is a real scalar, and v is a real vector with
*> v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
*> and tau in TAU(i).
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LWORK, M, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB,
$ NBMIN, NX
* ..
* .. External Subroutines ..
EXTERNAL DGELQ2, DLARFB, DLARFT, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. External Functions ..
INTEGER ILAENV
EXTERNAL ILAENV
* ..
* .. Executable Statements ..
*
* Test the input arguments
*
INFO = 0
NB = ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
LWKOPT = M*NB
WORK( 1 ) = LWKOPT
LQUERY = ( LWORK.EQ.-1 )
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -4
ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN
INFO = -7
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGELQF', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
RETURN
END IF
*
* Quick return if possible
*
K = MIN( M, N )
IF( K.EQ.0 ) THEN
WORK( 1 ) = 1
RETURN
END IF
*
NBMIN = 2
NX = 0
IWS = M
IF( NB.GT.1 .AND. NB.LT.K ) THEN
*
* Determine when to cross over from blocked to unblocked code.
*
NX = MAX( 0, ILAENV( 3, 'DGELQF', ' ', M, N, -1, -1 ) )
IF( NX.LT.K ) THEN
*
* Determine if workspace is large enough for blocked code.
*
LDWORK = M
IWS = LDWORK*NB
IF( LWORK.LT.IWS ) THEN
*
* Not enough workspace to use optimal NB: reduce NB and
* determine the minimum value of NB.
*
NB = LWORK / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'DGELQF', ' ', M, N, -1,
$ -1 ) )
END IF
END IF
END IF
*
IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
*
* Use blocked code initially
*
DO 10 I = 1, K - NX, NB
IB = MIN( K-I+1, NB )
*
* Compute the LQ factorization of the current block
* A(i:i+ib-1,i:n)
*
CALL DGELQ2( IB, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
$ IINFO )
IF( I+IB.LE.M ) THEN
*
* Form the triangular factor of the block reflector
* H = H(i) H(i+1) . . . H(i+ib-1)
*
CALL DLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ),
$ LDA, TAU( I ), WORK, LDWORK )
*
* Apply H to A(i+ib:m,i:n) from the right
*
CALL DLARFB( 'Right', 'No transpose', 'Forward',
$ 'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ),
$ LDA, WORK, LDWORK, A( I+IB, I ), LDA,
$ WORK( IB+1 ), LDWORK )
END IF
10 CONTINUE
ELSE
I = 1
END IF
*
* Use unblocked code to factor the last or only block.
*
IF( I.LE.K )
$ CALL DGELQ2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
$ IINFO )
*
WORK( 1 ) = IWS
RETURN
*
* End of DGELQF
*
END

341
lib/linalg/dgelsd.cpp Normal file
View File

@ -0,0 +1,341 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__6 = 6;
static integer c_n1 = -1;
static integer c__9 = 9;
static integer c__0 = 0;
static integer c__1 = 1;
static doublereal c_b82 = 0.;
int dgelsd_(integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b,
integer *ldb, doublereal *s, doublereal *rcond, integer *rank, doublereal *work,
integer *lwork, integer *iwork, integer *info)
{
integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
double log(doublereal);
integer ie, il, mm;
doublereal eps, anrm, bnrm;
integer itau, nlvl, iascl, ibscl;
doublereal sfmin;
integer minmn, maxmn, itaup, itauq, mnthr, nwork;
extern int dlabad_(doublereal *, doublereal *),
dgebrd_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
doublereal *, doublereal *, doublereal *, integer *, integer *);
extern doublereal dlamch_(char *, ftnlen),
dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *, ftnlen);
extern int dgelqf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
integer *, integer *),
dlalsd_(char *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *,
integer *, doublereal *, integer *, doublereal *, integer *, integer *, ftnlen),
dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *,
doublereal *, integer *, integer *, ftnlen),
dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
integer *, integer *),
dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *,
ftnlen),
dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *,
ftnlen),
xerbla_(char *, integer *, ftnlen);
extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
ftnlen, ftnlen);
doublereal bignum;
extern int dormbr_(char *, char *, char *, integer *, integer *, integer *, doublereal *,
integer *, doublereal *, doublereal *, integer *, doublereal *, integer *,
integer *, ftnlen, ftnlen, ftnlen);
integer wlalsd;
extern int dormlq_(char *, char *, integer *, integer *, integer *, doublereal *, integer *,
doublereal *, doublereal *, integer *, doublereal *, integer *, integer *,
ftnlen, ftnlen);
integer ldwork;
extern int dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *,
doublereal *, doublereal *, integer *, doublereal *, integer *, integer *,
ftnlen, ftnlen);
integer liwork, minwrk, maxwrk;
doublereal smlnum;
logical lquery;
integer smlsiz;
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
b_dim1 = *ldb;
b_offset = 1 + b_dim1;
b -= b_offset;
--s;
--work;
--iwork;
*info = 0;
minmn = min(*m, *n);
maxmn = max(*m, *n);
mnthr = ilaenv_(&c__6, (char *)"DGELSD", (char *)" ", m, n, nrhs, &c_n1, (ftnlen)6, (ftnlen)1);
lquery = *lwork == -1;
if (*m < 0) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*nrhs < 0) {
*info = -3;
} else if (*lda < max(1, *m)) {
*info = -5;
} else if (*ldb < max(1, maxmn)) {
*info = -7;
}
smlsiz = ilaenv_(&c__9, (char *)"DGELSD", (char *)" ", &c__0, &c__0, &c__0, &c__0, (ftnlen)6, (ftnlen)1);
minwrk = 1;
liwork = 1;
minmn = max(1, minmn);
i__1 = (integer)(log((doublereal)minmn / (doublereal)(smlsiz + 1)) / log(2.)) + 1;
nlvl = max(i__1, 0);
if (*info == 0) {
maxwrk = 0;
liwork = minmn * 3 * nlvl + minmn * 11;
mm = *m;
if (*m >= *n && *m >= mnthr) {
mm = *n;
i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, (char *)"DGEQRF", (char *)" ", m, n, &c_n1, &c_n1,
(ftnlen)6, (ftnlen)1);
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, (char *)"DORMQR", (char *)"LT", m, nrhs, n, &c_n1,
(ftnlen)6, (ftnlen)2);
maxwrk = max(i__1, i__2);
}
if (*m >= *n) {
i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, (char *)"DGEBRD", (char *)" ", &mm, n, &c_n1,
&c_n1, (ftnlen)6, (ftnlen)1);
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, (char *)"DORMBR", (char *)"QLT", &mm, nrhs, n,
&c_n1, (ftnlen)6, (ftnlen)3);
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, (char *)"DORMBR", (char *)"PLN", n, nrhs, n,
&c_n1, (ftnlen)6, (ftnlen)3);
maxwrk = max(i__1, i__2);
i__1 = smlsiz + 1;
wlalsd = *n * 9 + (*n << 1) * smlsiz + (*n << 3) * nlvl + *n * *nrhs + i__1 * i__1;
i__1 = maxwrk, i__2 = *n * 3 + wlalsd;
maxwrk = max(i__1, i__2);
i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1, i__2),
i__2 = *n * 3 + wlalsd;
minwrk = max(i__1, i__2);
}
if (*n > *m) {
i__1 = smlsiz + 1;
wlalsd = *m * 9 + (*m << 1) * smlsiz + (*m << 3) * nlvl + *m * *nrhs + i__1 * i__1;
if (*n >= mnthr) {
maxwrk = *m + *m * ilaenv_(&c__1, (char *)"DGELQF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6,
(ftnlen)1);
i__1 = maxwrk, i__2 = *m * *m + (*m << 2) +
(*m << 1) * ilaenv_(&c__1, (char *)"DGEBRD", (char *)" ", m, m, &c_n1, &c_n1,
(ftnlen)6, (ftnlen)1);
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *m * *m + (*m << 2) +
*nrhs * ilaenv_(&c__1, (char *)"DORMBR", (char *)"QLT", m, nrhs, m, &c_n1,
(ftnlen)6, (ftnlen)3);
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *m * *m + (*m << 2) +
(*m - 1) * ilaenv_(&c__1, (char *)"DORMBR", (char *)"PLN", m, nrhs, m, &c_n1,
(ftnlen)6, (ftnlen)3);
maxwrk = max(i__1, i__2);
if (*nrhs > 1) {
i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
maxwrk = max(i__1, i__2);
} else {
i__1 = maxwrk, i__2 = *m * *m + (*m << 1);
maxwrk = max(i__1, i__2);
}
i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, (char *)"DORMLQ", (char *)"LT", n, nrhs, m, &c_n1,
(ftnlen)6, (ftnlen)2);
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + wlalsd;
maxwrk = max(i__1, i__2);
i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3, i__4), i__3 = max(i__3, *nrhs),
i__4 = *n - *m * 3;
i__1 = maxwrk, i__2 = (*m << 2) + *m * *m + max(i__3, i__4);
maxwrk = max(i__1, i__2);
} else {
maxwrk = *m * 3 + (*n + *m) * ilaenv_(&c__1, (char *)"DGEBRD", (char *)" ", m, n, &c_n1, &c_n1,
(ftnlen)6, (ftnlen)1);
i__1 = maxwrk, i__2 = *m * 3 + *nrhs * ilaenv_(&c__1, (char *)"DORMBR", (char *)"QLT", m, nrhs, n,
&c_n1, (ftnlen)6, (ftnlen)3);
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *m * 3 + *m * ilaenv_(&c__1, (char *)"DORMBR", (char *)"PLN", n, nrhs, m,
&c_n1, (ftnlen)6, (ftnlen)3);
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *m * 3 + wlalsd;
maxwrk = max(i__1, i__2);
}
i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *m, i__1 = max(i__1, i__2),
i__2 = *m * 3 + wlalsd;
minwrk = max(i__1, i__2);
}
minwrk = min(minwrk, maxwrk);
work[1] = (doublereal)maxwrk;
iwork[1] = liwork;
if (*lwork < minwrk && !lquery) {
*info = -12;
}
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGELSD", &i__1, (ftnlen)6);
return 0;
} else if (lquery) {
goto L10;
}
if (*m == 0 || *n == 0) {
*rank = 0;
return 0;
}
eps = dlamch_((char *)"P", (ftnlen)1);
sfmin = dlamch_((char *)"S", (ftnlen)1);
smlnum = sfmin / eps;
bignum = 1. / smlnum;
dlabad_(&smlnum, &bignum);
anrm = dlange_((char *)"M", m, n, &a[a_offset], lda, &work[1], (ftnlen)1);
iascl = 0;
if (anrm > 0. && anrm < smlnum) {
dlascl_((char *)"G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, info, (ftnlen)1);
iascl = 1;
} else if (anrm > bignum) {
dlascl_((char *)"G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, info, (ftnlen)1);
iascl = 2;
} else if (anrm == 0.) {
i__1 = max(*m, *n);
dlaset_((char *)"F", &i__1, nrhs, &c_b82, &c_b82, &b[b_offset], ldb, (ftnlen)1);
dlaset_((char *)"F", &minmn, &c__1, &c_b82, &c_b82, &s[1], &c__1, (ftnlen)1);
*rank = 0;
goto L10;
}
bnrm = dlange_((char *)"M", m, nrhs, &b[b_offset], ldb, &work[1], (ftnlen)1);
ibscl = 0;
if (bnrm > 0. && bnrm < smlnum) {
dlascl_((char *)"G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
ibscl = 1;
} else if (bnrm > bignum) {
dlascl_((char *)"G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
ibscl = 2;
}
if (*m < *n) {
i__1 = *n - *m;
dlaset_((char *)"F", &i__1, nrhs, &c_b82, &c_b82, &b[*m + 1 + b_dim1], ldb, (ftnlen)1);
}
if (*m >= *n) {
mm = *m;
if (*m >= mnthr) {
mm = *n;
itau = 1;
nwork = itau + *n;
i__1 = *lwork - nwork + 1;
dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, info);
i__1 = *lwork - nwork + 1;
dormqr_((char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[b_offset], ldb,
&work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1);
if (*n > 1) {
i__1 = *n - 1;
i__2 = *n - 1;
dlaset_((char *)"L", &i__1, &i__2, &c_b82, &c_b82, &a[a_dim1 + 2], lda, (ftnlen)1);
}
}
ie = 1;
itauq = ie + *n;
itaup = itauq + *n;
nwork = itaup + *n;
i__1 = *lwork - nwork + 1;
dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup],
&work[nwork], &i__1, info);
i__1 = *lwork - nwork + 1;
dormbr_((char *)"Q", (char *)"L", (char *)"T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], &b[b_offset], ldb,
&work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
dlalsd_((char *)"U", &smlsiz, n, nrhs, &s[1], &work[ie], &b[b_offset], ldb, rcond, rank,
&work[nwork], &iwork[1], info, (ftnlen)1);
if (*info != 0) {
goto L10;
}
i__1 = *lwork - nwork + 1;
dormbr_((char *)"P", (char *)"L", (char *)"N", n, nrhs, n, &a[a_offset], lda, &work[itaup], &b[b_offset], ldb,
&work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
} else {
i__1 = *m, i__2 = (*m << 1) - 4, i__1 = max(i__1, i__2), i__1 = max(i__1, *nrhs),
i__2 = *n - *m * 3, i__1 = max(i__1, i__2);
if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1, wlalsd)) {
ldwork = *m;
i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3, i__4), i__3 = max(i__3, *nrhs),
i__4 = *n - *m * 3;
i__1 = (*m << 2) + *m * *lda + max(i__3, i__4), i__2 = *m * *lda + *m + *m * *nrhs,
i__1 = max(i__1, i__2), i__2 = (*m << 2) + *m * *lda + wlalsd;
if (*lwork >= max(i__1, i__2)) {
ldwork = *lda;
}
itau = 1;
nwork = *m + 1;
i__1 = *lwork - nwork + 1;
dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, info);
il = nwork;
dlacpy_((char *)"L", m, m, &a[a_offset], lda, &work[il], &ldwork, (ftnlen)1);
i__1 = *m - 1;
i__2 = *m - 1;
dlaset_((char *)"U", &i__1, &i__2, &c_b82, &c_b82, &work[il + ldwork], &ldwork, (ftnlen)1);
ie = il + ldwork * *m;
itauq = ie + *m;
itaup = itauq + *m;
nwork = itaup + *m;
i__1 = *lwork - nwork + 1;
dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], &work[itaup],
&work[nwork], &i__1, info);
i__1 = *lwork - nwork + 1;
dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, m, &work[il], &ldwork, &work[itauq], &b[b_offset], ldb,
&work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
dlalsd_((char *)"U", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], ldb, rcond, rank,
&work[nwork], &iwork[1], info, (ftnlen)1);
if (*info != 0) {
goto L10;
}
i__1 = *lwork - nwork + 1;
dormbr_((char *)"P", (char *)"L", (char *)"N", m, nrhs, m, &work[il], &ldwork, &work[itaup], &b[b_offset], ldb,
&work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
i__1 = *n - *m;
dlaset_((char *)"F", &i__1, nrhs, &c_b82, &c_b82, &b[*m + 1 + b_dim1], ldb, (ftnlen)1);
nwork = itau + *m;
i__1 = *lwork - nwork + 1;
dormlq_((char *)"L", (char *)"T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[b_offset], ldb,
&work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1);
} else {
ie = 1;
itauq = ie + *m;
itaup = itauq + *m;
nwork = itaup + *m;
i__1 = *lwork - nwork + 1;
dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup],
&work[nwork], &i__1, info);
i__1 = *lwork - nwork + 1;
dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, &work[itauq], &b[b_offset], ldb,
&work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
dlalsd_((char *)"L", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], ldb, rcond, rank,
&work[nwork], &iwork[1], info, (ftnlen)1);
if (*info != 0) {
goto L10;
}
i__1 = *lwork - nwork + 1;
dormbr_((char *)"P", (char *)"L", (char *)"N", n, nrhs, m, &a[a_offset], lda, &work[itaup], &b[b_offset], ldb,
&work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
}
}
if (iascl == 1) {
dlascl_((char *)"G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
dlascl_((char *)"G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &minmn, info, (ftnlen)1);
} else if (iascl == 2) {
dlascl_((char *)"G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
dlascl_((char *)"G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &minmn, info, (ftnlen)1);
}
if (ibscl == 1) {
dlascl_((char *)"G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
} else if (ibscl == 2) {
dlascl_((char *)"G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
}
L10:
work[1] = (doublereal)maxwrk;
iwork[1] = liwork;
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,626 +0,0 @@
*> \brief <b> DGELSD computes the minimum-norm solution to a linear least squares problem for GE matrices</b>
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGELSD + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelsd.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelsd.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelsd.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
* WORK, LWORK, IWORK, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
* DOUBLE PRECISION RCOND
* ..
* .. Array Arguments ..
* INTEGER IWORK( * )
* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGELSD computes the minimum-norm solution to a real linear least
*> squares problem:
*> minimize 2-norm(| b - A*x |)
*> using the singular value decomposition (SVD) of A. A is an M-by-N
*> matrix which may be rank-deficient.
*>
*> Several right hand side vectors b and solution vectors x can be
*> handled in a single call; they are stored as the columns of the
*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution
*> matrix X.
*>
*> The problem is solved in three steps:
*> (1) Reduce the coefficient matrix A to bidiagonal form with
*> Householder transformations, reducing the original problem
*> into a "bidiagonal least squares problem" (BLS)
*> (2) Solve the BLS using a divide and conquer approach.
*> (3) Apply back all the Householder transformations to solve
*> the original least squares problem.
*>
*> The effective rank of A is determined by treating as zero those
*> singular values which are less than RCOND times the largest singular
*> value.
*>
*> The divide and conquer algorithm makes very mild assumptions about
*> floating point arithmetic. It will work on machines with a guard
*> digit in add/subtract, or on those binary machines without guard
*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
*> Cray-2. It could conceivably fail on hexadecimal or decimal machines
*> without guard digits, but we know of none.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of A. N >= 0.
*> \endverbatim
*>
*> \param[in] NRHS
*> \verbatim
*> NRHS is INTEGER
*> The number of right hand sides, i.e., the number of columns
*> of the matrices B and X. NRHS >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the M-by-N matrix A.
*> On exit, A has been destroyed.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[in,out] B
*> \verbatim
*> B is DOUBLE PRECISION array, dimension (LDB,NRHS)
*> On entry, the M-by-NRHS right hand side matrix B.
*> On exit, B is overwritten by the N-by-NRHS solution
*> matrix X. If m >= n and RANK = n, the residual
*> sum-of-squares for the solution in the i-th column is given
*> by the sum of squares of elements n+1:m in that column.
*> \endverbatim
*>
*> \param[in] LDB
*> \verbatim
*> LDB is INTEGER
*> The leading dimension of the array B. LDB >= max(1,max(M,N)).
*> \endverbatim
*>
*> \param[out] S
*> \verbatim
*> S is DOUBLE PRECISION array, dimension (min(M,N))
*> The singular values of A in decreasing order.
*> The condition number of A in the 2-norm = S(1)/S(min(m,n)).
*> \endverbatim
*>
*> \param[in] RCOND
*> \verbatim
*> RCOND is DOUBLE PRECISION
*> RCOND is used to determine the effective rank of A.
*> Singular values S(i) <= RCOND*S(1) are treated as zero.
*> If RCOND < 0, machine precision is used instead.
*> \endverbatim
*>
*> \param[out] RANK
*> \verbatim
*> RANK is INTEGER
*> The effective rank of A, i.e., the number of singular values
*> which are greater than RCOND*S(1).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> \verbatim
*> LWORK is INTEGER
*> The dimension of the array WORK. LWORK must be at least 1.
*> The exact minimum amount of workspace needed depends on M,
*> N and NRHS. As long as LWORK is at least
*> 12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2,
*> if M is greater than or equal to N or
*> 12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2,
*> if M is less than N, the code will execute correctly.
*> SMLSIZ is returned by ILAENV and is equal to the maximum
*> size of the subproblems at the bottom of the computation
*> tree (usually about 25), and
*> NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
*> this value as the first entry of the WORK array, and no error
*> message related to LWORK is issued by XERBLA.
*> \endverbatim
*>
*> \param[out] IWORK
*> \verbatim
*> IWORK is INTEGER array, dimension (MAX(1,LIWORK))
*> LIWORK >= max(1, 3 * MINMN * NLVL + 11 * MINMN),
*> where MINMN = MIN( M,N ).
*> On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value.
*> > 0: the algorithm for computing the SVD failed to converge;
*> if INFO = i, i off-diagonal elements of an intermediate
*> bidiagonal form did not converge to zero.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEsolve
*
*> \par Contributors:
* ==================
*>
*> Ming Gu and Ren-Cang Li, Computer Science Division, University of
*> California at Berkeley, USA \n
*> Osni Marques, LBNL/NERSC, USA \n
*
* =====================================================================
SUBROUTINE DGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
$ WORK, LWORK, IWORK, INFO )
*
* -- LAPACK driver routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
DOUBLE PRECISION RCOND
* ..
* .. Array Arguments ..
INTEGER IWORK( * )
DOUBLE PRECISION A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO, ONE, TWO
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 )
* ..
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER IASCL, IBSCL, IE, IL, ITAU, ITAUP, ITAUQ,
$ LDWORK, LIWORK, MAXMN, MAXWRK, MINMN, MINWRK,
$ MM, MNTHR, NLVL, NWORK, SMLSIZ, WLALSD
DOUBLE PRECISION ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM
* ..
* .. External Subroutines ..
EXTERNAL DGEBRD, DGELQF, DGEQRF, DLABAD, DLACPY, DLALSD,
$ DLASCL, DLASET, DORMBR, DORMLQ, DORMQR, XERBLA
* ..
* .. External Functions ..
INTEGER ILAENV
DOUBLE PRECISION DLAMCH, DLANGE
EXTERNAL ILAENV, DLAMCH, DLANGE
* ..
* .. Intrinsic Functions ..
INTRINSIC DBLE, INT, LOG, MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input arguments.
*
INFO = 0
MINMN = MIN( M, N )
MAXMN = MAX( M, N )
MNTHR = ILAENV( 6, 'DGELSD', ' ', M, N, NRHS, -1 )
LQUERY = ( LWORK.EQ.-1 )
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( NRHS.LT.0 ) THEN
INFO = -3
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -5
ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN
INFO = -7
END IF
*
SMLSIZ = ILAENV( 9, 'DGELSD', ' ', 0, 0, 0, 0 )
*
* Compute workspace.
* (Note: Comments in the code beginning "Workspace:" describe the
* minimal amount of workspace needed at that point in the code,
* as well as the preferred amount for good performance.
* NB refers to the optimal block size for the immediately
* following subroutine, as returned by ILAENV.)
*
MINWRK = 1
LIWORK = 1
MINMN = MAX( 1, MINMN )
NLVL = MAX( INT( LOG( DBLE( MINMN ) / DBLE( SMLSIZ+1 ) ) /
$ LOG( TWO ) ) + 1, 0 )
*
IF( INFO.EQ.0 ) THEN
MAXWRK = 0
LIWORK = 3*MINMN*NLVL + 11*MINMN
MM = M
IF( M.GE.N .AND. M.GE.MNTHR ) THEN
*
* Path 1a - overdetermined, with many more rows than columns.
*
MM = N
MAXWRK = MAX( MAXWRK, N+N*ILAENV( 1, 'DGEQRF', ' ', M, N,
$ -1, -1 ) )
MAXWRK = MAX( MAXWRK, N+NRHS*
$ ILAENV( 1, 'DORMQR', 'LT', M, NRHS, N, -1 ) )
END IF
IF( M.GE.N ) THEN
*
* Path 1 - overdetermined or exactly determined.
*
MAXWRK = MAX( MAXWRK, 3*N+( MM+N )*
$ ILAENV( 1, 'DGEBRD', ' ', MM, N, -1, -1 ) )
MAXWRK = MAX( MAXWRK, 3*N+NRHS*
$ ILAENV( 1, 'DORMBR', 'QLT', MM, NRHS, N, -1 ) )
MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
$ ILAENV( 1, 'DORMBR', 'PLN', N, NRHS, N, -1 ) )
WLALSD = 9*N+2*N*SMLSIZ+8*N*NLVL+N*NRHS+(SMLSIZ+1)**2
MAXWRK = MAX( MAXWRK, 3*N+WLALSD )
MINWRK = MAX( 3*N+MM, 3*N+NRHS, 3*N+WLALSD )
END IF
IF( N.GT.M ) THEN
WLALSD = 9*M+2*M*SMLSIZ+8*M*NLVL+M*NRHS+(SMLSIZ+1)**2
IF( N.GE.MNTHR ) THEN
*
* Path 2a - underdetermined, with many more columns
* than rows.
*
MAXWRK = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
MAXWRK = MAX( MAXWRK, M*M+4*M+2*M*
$ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
MAXWRK = MAX( MAXWRK, M*M+4*M+NRHS*
$ ILAENV( 1, 'DORMBR', 'QLT', M, NRHS, M, -1 ) )
MAXWRK = MAX( MAXWRK, M*M+4*M+( M-1 )*
$ ILAENV( 1, 'DORMBR', 'PLN', M, NRHS, M, -1 ) )
IF( NRHS.GT.1 ) THEN
MAXWRK = MAX( MAXWRK, M*M+M+M*NRHS )
ELSE
MAXWRK = MAX( MAXWRK, M*M+2*M )
END IF
MAXWRK = MAX( MAXWRK, M+NRHS*
$ ILAENV( 1, 'DORMLQ', 'LT', N, NRHS, M, -1 ) )
MAXWRK = MAX( MAXWRK, M*M+4*M+WLALSD )
! XXX: Ensure the Path 2a case below is triggered. The workspace
! calculation should use queries for all routines eventually.
MAXWRK = MAX( MAXWRK,
$ 4*M+M*M+MAX( M, 2*M-4, NRHS, N-3*M ) )
ELSE
*
* Path 2 - remaining underdetermined cases.
*
MAXWRK = 3*M + ( N+M )*ILAENV( 1, 'DGEBRD', ' ', M, N,
$ -1, -1 )
MAXWRK = MAX( MAXWRK, 3*M+NRHS*
$ ILAENV( 1, 'DORMBR', 'QLT', M, NRHS, N, -1 ) )
MAXWRK = MAX( MAXWRK, 3*M+M*
$ ILAENV( 1, 'DORMBR', 'PLN', N, NRHS, M, -1 ) )
MAXWRK = MAX( MAXWRK, 3*M+WLALSD )
END IF
MINWRK = MAX( 3*M+NRHS, 3*M+M, 3*M+WLALSD )
END IF
MINWRK = MIN( MINWRK, MAXWRK )
WORK( 1 ) = MAXWRK
IWORK( 1 ) = LIWORK
IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
INFO = -12
END IF
END IF
*
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGELSD', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
GO TO 10
END IF
*
* Quick return if possible.
*
IF( M.EQ.0 .OR. N.EQ.0 ) THEN
RANK = 0
RETURN
END IF
*
* Get machine parameters.
*
EPS = DLAMCH( 'P' )
SFMIN = DLAMCH( 'S' )
SMLNUM = SFMIN / EPS
BIGNUM = ONE / SMLNUM
CALL DLABAD( SMLNUM, BIGNUM )
*
* Scale A if max entry outside range [SMLNUM,BIGNUM].
*
ANRM = DLANGE( 'M', M, N, A, LDA, WORK )
IASCL = 0
IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
*
* Scale matrix norm up to SMLNUM.
*
CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
IASCL = 1
ELSE IF( ANRM.GT.BIGNUM ) THEN
*
* Scale matrix norm down to BIGNUM.
*
CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
IASCL = 2
ELSE IF( ANRM.EQ.ZERO ) THEN
*
* Matrix all zero. Return zero solution.
*
CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 )
RANK = 0
GO TO 10
END IF
*
* Scale B if max entry outside range [SMLNUM,BIGNUM].
*
BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK )
IBSCL = 0
IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
*
* Scale matrix norm up to SMLNUM.
*
CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
IBSCL = 1
ELSE IF( BNRM.GT.BIGNUM ) THEN
*
* Scale matrix norm down to BIGNUM.
*
CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
IBSCL = 2
END IF
*
* If M < N make sure certain entries of B are zero.
*
IF( M.LT.N )
$ CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
*
* Overdetermined case.
*
IF( M.GE.N ) THEN
*
* Path 1 - overdetermined or exactly determined.
*
MM = M
IF( M.GE.MNTHR ) THEN
*
* Path 1a - overdetermined, with many more rows than columns.
*
MM = N
ITAU = 1
NWORK = ITAU + N
*
* Compute A=Q*R.
* (Workspace: need 2*N, prefer N+N*NB)
*
CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
$ LWORK-NWORK+1, INFO )
*
* Multiply B by transpose(Q).
* (Workspace: need N+NRHS, prefer N+NRHS*NB)
*
CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B,
$ LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
*
* Zero out below R.
*
IF( N.GT.1 ) THEN
CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
END IF
END IF
*
IE = 1
ITAUQ = IE + N
ITAUP = ITAUQ + N
NWORK = ITAUP + N
*
* Bidiagonalize R in A.
* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
*
CALL DGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
$ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
$ INFO )
*
* Multiply B by transpose of left bidiagonalizing vectors of R.
* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
*
CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ),
$ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
*
* Solve the bidiagonal least squares problem.
*
CALL DLALSD( 'U', SMLSIZ, N, NRHS, S, WORK( IE ), B, LDB,
$ RCOND, RANK, WORK( NWORK ), IWORK, INFO )
IF( INFO.NE.0 ) THEN
GO TO 10
END IF
*
* Multiply B by right bidiagonalizing vectors of R.
*
CALL DORMBR( 'P', 'L', 'N', N, NRHS, N, A, LDA, WORK( ITAUP ),
$ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
*
ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+
$ MAX( M, 2*M-4, NRHS, N-3*M, WLALSD ) ) THEN
*
* Path 2a - underdetermined, with many more columns than rows
* and sufficient workspace for an efficient algorithm.
*
LDWORK = M
IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ),
$ M*LDA+M+M*NRHS, 4*M+M*LDA+WLALSD ) )LDWORK = LDA
ITAU = 1
NWORK = M + 1
*
* Compute A=L*Q.
* (Workspace: need 2*M, prefer M+M*NB)
*
CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
$ LWORK-NWORK+1, INFO )
IL = NWORK
*
* Copy L to WORK(IL), zeroing out above its diagonal.
*
CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK )
CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ),
$ LDWORK )
IE = IL + LDWORK*M
ITAUQ = IE + M
ITAUP = ITAUQ + M
NWORK = ITAUP + M
*
* Bidiagonalize L in WORK(IL).
* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
*
CALL DGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ),
$ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
$ LWORK-NWORK+1, INFO )
*
* Multiply B by transpose of left bidiagonalizing vectors of L.
* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
*
CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK,
$ WORK( ITAUQ ), B, LDB, WORK( NWORK ),
$ LWORK-NWORK+1, INFO )
*
* Solve the bidiagonal least squares problem.
*
CALL DLALSD( 'U', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB,
$ RCOND, RANK, WORK( NWORK ), IWORK, INFO )
IF( INFO.NE.0 ) THEN
GO TO 10
END IF
*
* Multiply B by right bidiagonalizing vectors of L.
*
CALL DORMBR( 'P', 'L', 'N', M, NRHS, M, WORK( IL ), LDWORK,
$ WORK( ITAUP ), B, LDB, WORK( NWORK ),
$ LWORK-NWORK+1, INFO )
*
* Zero out below first M rows of B.
*
CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
NWORK = ITAU + M
*
* Multiply transpose(Q) by B.
* (Workspace: need M+NRHS, prefer M+NRHS*NB)
*
CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B,
$ LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
*
ELSE
*
* Path 2 - remaining underdetermined cases.
*
IE = 1
ITAUQ = IE + M
ITAUP = ITAUQ + M
NWORK = ITAUP + M
*
* Bidiagonalize A.
* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
*
CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
$ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
$ INFO )
*
* Multiply B by transpose of left bidiagonalizing vectors.
* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
*
CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ),
$ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
*
* Solve the bidiagonal least squares problem.
*
CALL DLALSD( 'L', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB,
$ RCOND, RANK, WORK( NWORK ), IWORK, INFO )
IF( INFO.NE.0 ) THEN
GO TO 10
END IF
*
* Multiply B by right bidiagonalizing vectors of A.
*
CALL DORMBR( 'P', 'L', 'N', N, NRHS, M, A, LDA, WORK( ITAUP ),
$ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
*
END IF
*
* Undo scaling.
*
IF( IASCL.EQ.1 ) THEN
CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
$ INFO )
ELSE IF( IASCL.EQ.2 ) THEN
CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
$ INFO )
END IF
IF( IBSCL.EQ.1 ) THEN
CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
ELSE IF( IBSCL.EQ.2 ) THEN
CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
END IF
*
10 CONTINUE
WORK( 1 ) = MAXWRK
IWORK( 1 ) = LIWORK
RETURN
*
* End of DGELSD
*
END

466
lib/linalg/dgelss.cpp Normal file
View File

@ -0,0 +1,466 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__6 = 6;
static integer c_n1 = -1;
static integer c__0 = 0;
static doublereal c_b46 = 0.;
static integer c__1 = 1;
static doublereal c_b79 = 1.;
int dgelss_(integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b,
integer *ldb, doublereal *s, doublereal *rcond, integer *rank, doublereal *work,
integer *lwork, integer *info)
{
integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
doublereal d__1;
integer i__, bl, ie, il, mm;
doublereal dum[1], eps, thr, anrm, bnrm;
integer itau, lwork_dgebrd__, lwork_dgelqf__, lwork_dgeqrf__, lwork_dorgbr__, lwork_dormbr__,
lwork_dormlq__, lwork_dormqr__;
extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
ftnlen, ftnlen);
integer iascl, ibscl;
extern int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *,
doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen),
drscl_(integer *, doublereal *, doublereal *, integer *);
integer chunk;
doublereal sfmin;
integer minmn;
extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *);
integer maxmn, itaup, itauq, mnthr, iwork;
extern int dlabad_(doublereal *, doublereal *),
dgebrd_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
doublereal *, doublereal *, doublereal *, integer *, integer *);
extern doublereal dlamch_(char *, ftnlen),
dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *, ftnlen);
integer bdspac;
extern int dgelqf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
integer *, integer *),
dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *,
doublereal *, integer *, integer *, ftnlen),
dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
integer *, integer *),
dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *,
ftnlen),
dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *,
ftnlen),
xerbla_(char *, integer *, ftnlen),
dbdsqr_(char *, integer *, integer *, integer *, integer *, doublereal *, doublereal *,
doublereal *, integer *, doublereal *, integer *, doublereal *, integer *,
doublereal *, integer *, ftnlen),
dorgbr_(char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *,
doublereal *, integer *, integer *, ftnlen);
doublereal bignum;
extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
ftnlen, ftnlen);
extern int dormbr_(char *, char *, char *, integer *, integer *, integer *, doublereal *,
integer *, doublereal *, doublereal *, integer *, doublereal *, integer *,
integer *, ftnlen, ftnlen, ftnlen),
dormlq_(char *, char *, integer *, integer *, integer *, doublereal *, integer *,
doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, ftnlen,
ftnlen);
integer ldwork;
extern int dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *,
doublereal *, doublereal *, integer *, doublereal *, integer *, integer *,
ftnlen, ftnlen);
integer minwrk, maxwrk;
doublereal smlnum;
logical lquery;
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
b_dim1 = *ldb;
b_offset = 1 + b_dim1;
b -= b_offset;
--s;
--work;
*info = 0;
minmn = min(*m, *n);
maxmn = max(*m, *n);
lquery = *lwork == -1;
if (*m < 0) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*nrhs < 0) {
*info = -3;
} else if (*lda < max(1, *m)) {
*info = -5;
} else if (*ldb < max(1, maxmn)) {
*info = -7;
}
if (*info == 0) {
minwrk = 1;
maxwrk = 1;
if (minmn > 0) {
mm = *m;
mnthr = ilaenv_(&c__6, (char *)"DGELSS", (char *)" ", m, n, nrhs, &c_n1, (ftnlen)6, (ftnlen)1);
if (*m >= *n && *m >= mnthr) {
dgeqrf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, info);
lwork_dgeqrf__ = (integer)dum[0];
dormqr_((char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, dum, &b[b_offset], ldb, dum, &c_n1,
info, (ftnlen)1, (ftnlen)1);
lwork_dormqr__ = (integer)dum[0];
mm = *n;
i__1 = maxwrk, i__2 = *n + lwork_dgeqrf__;
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *n + lwork_dormqr__;
maxwrk = max(i__1, i__2);
}
if (*m >= *n) {
i__1 = 1, i__2 = *n * 5;
bdspac = max(i__1, i__2);
dgebrd_(&mm, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1, info);
lwork_dgebrd__ = (integer)dum[0];
dormbr_((char *)"Q", (char *)"L", (char *)"T", &mm, nrhs, n, &a[a_offset], lda, dum, &b[b_offset], ldb, dum,
&c_n1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
lwork_dormbr__ = (integer)dum[0];
dorgbr_((char *)"P", n, n, n, &a[a_offset], lda, dum, dum, &c_n1, info, (ftnlen)1);
lwork_dorgbr__ = (integer)dum[0];
i__1 = maxwrk, i__2 = *n * 3 + lwork_dgebrd__;
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *n * 3 + lwork_dormbr__;
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *n * 3 + lwork_dorgbr__;
maxwrk = max(i__1, i__2);
maxwrk = max(maxwrk, bdspac);
i__1 = maxwrk, i__2 = *n * *nrhs;
maxwrk = max(i__1, i__2);
i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1, i__2);
minwrk = max(i__1, bdspac);
maxwrk = max(minwrk, maxwrk);
}
if (*n > *m) {
i__1 = 1, i__2 = *m * 5;
bdspac = max(i__1, i__2);
i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *n, i__1 = max(i__1, i__2);
minwrk = max(i__1, bdspac);
if (*n >= mnthr) {
dgelqf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, info);
lwork_dgelqf__ = (integer)dum[0];
dgebrd_(m, m, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1, info);
lwork_dgebrd__ = (integer)dum[0];
dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, dum, &b[b_offset], ldb,
dum, &c_n1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
lwork_dormbr__ = (integer)dum[0];
dorgbr_((char *)"P", m, m, m, &a[a_offset], lda, dum, dum, &c_n1, info, (ftnlen)1);
lwork_dorgbr__ = (integer)dum[0];
dormlq_((char *)"L", (char *)"T", n, nrhs, m, &a[a_offset], lda, dum, &b[b_offset], ldb, dum,
&c_n1, info, (ftnlen)1, (ftnlen)1);
lwork_dormlq__ = (integer)dum[0];
maxwrk = *m + lwork_dgelqf__;
i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + lwork_dgebrd__;
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + lwork_dormbr__;
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + lwork_dorgbr__;
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *m * *m + *m + bdspac;
maxwrk = max(i__1, i__2);
if (*nrhs > 1) {
i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
maxwrk = max(i__1, i__2);
} else {
i__1 = maxwrk, i__2 = *m * *m + (*m << 1);
maxwrk = max(i__1, i__2);
}
i__1 = maxwrk, i__2 = *m + lwork_dormlq__;
maxwrk = max(i__1, i__2);
} else {
dgebrd_(m, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1, info);
lwork_dgebrd__ = (integer)dum[0];
dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, m, &a[a_offset], lda, dum, &b[b_offset], ldb,
dum, &c_n1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
lwork_dormbr__ = (integer)dum[0];
dorgbr_((char *)"P", m, n, m, &a[a_offset], lda, dum, dum, &c_n1, info, (ftnlen)1);
lwork_dorgbr__ = (integer)dum[0];
maxwrk = *m * 3 + lwork_dgebrd__;
i__1 = maxwrk, i__2 = *m * 3 + lwork_dormbr__;
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *m * 3 + lwork_dorgbr__;
maxwrk = max(i__1, i__2);
maxwrk = max(maxwrk, bdspac);
i__1 = maxwrk, i__2 = *n * *nrhs;
maxwrk = max(i__1, i__2);
}
}
maxwrk = max(minwrk, maxwrk);
}
work[1] = (doublereal)maxwrk;
if (*lwork < minwrk && !lquery) {
*info = -12;
}
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGELSS", &i__1, (ftnlen)6);
return 0;
} else if (lquery) {
return 0;
}
if (*m == 0 || *n == 0) {
*rank = 0;
return 0;
}
eps = dlamch_((char *)"P", (ftnlen)1);
sfmin = dlamch_((char *)"S", (ftnlen)1);
smlnum = sfmin / eps;
bignum = 1. / smlnum;
dlabad_(&smlnum, &bignum);
anrm = dlange_((char *)"M", m, n, &a[a_offset], lda, &work[1], (ftnlen)1);
iascl = 0;
if (anrm > 0. && anrm < smlnum) {
dlascl_((char *)"G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, info, (ftnlen)1);
iascl = 1;
} else if (anrm > bignum) {
dlascl_((char *)"G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, info, (ftnlen)1);
iascl = 2;
} else if (anrm == 0.) {
i__1 = max(*m, *n);
dlaset_((char *)"F", &i__1, nrhs, &c_b46, &c_b46, &b[b_offset], ldb, (ftnlen)1);
dlaset_((char *)"F", &minmn, &c__1, &c_b46, &c_b46, &s[1], &minmn, (ftnlen)1);
*rank = 0;
goto L70;
}
bnrm = dlange_((char *)"M", m, nrhs, &b[b_offset], ldb, &work[1], (ftnlen)1);
ibscl = 0;
if (bnrm > 0. && bnrm < smlnum) {
dlascl_((char *)"G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
ibscl = 1;
} else if (bnrm > bignum) {
dlascl_((char *)"G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
ibscl = 2;
}
if (*m >= *n) {
mm = *m;
if (*m >= mnthr) {
mm = *n;
itau = 1;
iwork = itau + *n;
i__1 = *lwork - iwork + 1;
dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__1, info);
i__1 = *lwork - iwork + 1;
dormqr_((char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[b_offset], ldb,
&work[iwork], &i__1, info, (ftnlen)1, (ftnlen)1);
if (*n > 1) {
i__1 = *n - 1;
i__2 = *n - 1;
dlaset_((char *)"L", &i__1, &i__2, &c_b46, &c_b46, &a[a_dim1 + 2], lda, (ftnlen)1);
}
}
ie = 1;
itauq = ie + *n;
itaup = itauq + *n;
iwork = itaup + *n;
i__1 = *lwork - iwork + 1;
dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup],
&work[iwork], &i__1, info);
i__1 = *lwork - iwork + 1;
dormbr_((char *)"Q", (char *)"L", (char *)"T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], &b[b_offset], ldb,
&work[iwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
i__1 = *lwork - iwork + 1;
dorgbr_((char *)"P", n, n, n, &a[a_offset], lda, &work[itaup], &work[iwork], &i__1, info,
(ftnlen)1);
iwork = ie + *n;
dbdsqr_((char *)"U", n, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], lda, dum, &c__1,
&b[b_offset], ldb, &work[iwork], info, (ftnlen)1);
if (*info != 0) {
goto L70;
}
d__1 = *rcond * s[1];
thr = max(d__1, sfmin);
if (*rcond < 0.) {
d__1 = eps * s[1];
thr = max(d__1, sfmin);
}
*rank = 0;
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
if (s[i__] > thr) {
drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb);
++(*rank);
} else {
dlaset_((char *)"F", &c__1, nrhs, &c_b46, &c_b46, &b[i__ + b_dim1], ldb, (ftnlen)1);
}
}
if (*lwork >= *ldb * *nrhs && *nrhs > 1) {
dgemm_((char *)"T", (char *)"N", n, nrhs, n, &c_b79, &a[a_offset], lda, &b[b_offset], ldb, &c_b46,
&work[1], ldb, (ftnlen)1, (ftnlen)1);
dlacpy_((char *)"G", n, nrhs, &work[1], ldb, &b[b_offset], ldb, (ftnlen)1);
} else if (*nrhs > 1) {
chunk = *lwork / *n;
i__1 = *nrhs;
i__2 = chunk;
for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
i__3 = *nrhs - i__ + 1;
bl = min(i__3, chunk);
dgemm_((char *)"T", (char *)"N", n, &bl, n, &c_b79, &a[a_offset], lda, &b[i__ * b_dim1 + 1], ldb,
&c_b46, &work[1], n, (ftnlen)1, (ftnlen)1);
dlacpy_((char *)"G", n, &bl, &work[1], n, &b[i__ * b_dim1 + 1], ldb, (ftnlen)1);
}
} else {
dgemv_((char *)"T", n, n, &c_b79, &a[a_offset], lda, &b[b_offset], &c__1, &c_b46, &work[1],
&c__1, (ftnlen)1);
dcopy_(n, &work[1], &c__1, &b[b_offset], &c__1);
}
} else {
i__2 = *m, i__1 = (*m << 1) - 4, i__2 = max(i__2, i__1), i__2 = max(i__2, *nrhs),
i__1 = *n - *m * 3;
if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__2, i__1)) {
ldwork = *m;
i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3, i__4), i__3 = max(i__3, *nrhs),
i__4 = *n - *m * 3;
i__2 = (*m << 2) + *m * *lda + max(i__3, i__4), i__1 = *m * *lda + *m + *m * *nrhs;
if (*lwork >= max(i__2, i__1)) {
ldwork = *lda;
}
itau = 1;
iwork = *m + 1;
i__2 = *lwork - iwork + 1;
dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__2, info);
il = iwork;
dlacpy_((char *)"L", m, m, &a[a_offset], lda, &work[il], &ldwork, (ftnlen)1);
i__2 = *m - 1;
i__1 = *m - 1;
dlaset_((char *)"U", &i__2, &i__1, &c_b46, &c_b46, &work[il + ldwork], &ldwork, (ftnlen)1);
ie = il + ldwork * *m;
itauq = ie + *m;
itaup = itauq + *m;
iwork = itaup + *m;
i__2 = *lwork - iwork + 1;
dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], &work[itaup],
&work[iwork], &i__2, info);
i__2 = *lwork - iwork + 1;
dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, m, &work[il], &ldwork, &work[itauq], &b[b_offset], ldb,
&work[iwork], &i__2, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
i__2 = *lwork - iwork + 1;
dorgbr_((char *)"P", m, m, m, &work[il], &ldwork, &work[itaup], &work[iwork], &i__2, info,
(ftnlen)1);
iwork = ie + *m;
dbdsqr_((char *)"U", m, m, &c__0, nrhs, &s[1], &work[ie], &work[il], &ldwork, &a[a_offset], lda,
&b[b_offset], ldb, &work[iwork], info, (ftnlen)1);
if (*info != 0) {
goto L70;
}
d__1 = *rcond * s[1];
thr = max(d__1, sfmin);
if (*rcond < 0.) {
d__1 = eps * s[1];
thr = max(d__1, sfmin);
}
*rank = 0;
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
if (s[i__] > thr) {
drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb);
++(*rank);
} else {
dlaset_((char *)"F", &c__1, nrhs, &c_b46, &c_b46, &b[i__ + b_dim1], ldb, (ftnlen)1);
}
}
iwork = ie;
if (*lwork >= *ldb * *nrhs + iwork - 1 && *nrhs > 1) {
dgemm_((char *)"T", (char *)"N", m, nrhs, m, &c_b79, &work[il], &ldwork, &b[b_offset], ldb, &c_b46,
&work[iwork], ldb, (ftnlen)1, (ftnlen)1);
dlacpy_((char *)"G", m, nrhs, &work[iwork], ldb, &b[b_offset], ldb, (ftnlen)1);
} else if (*nrhs > 1) {
chunk = (*lwork - iwork + 1) / *m;
i__2 = *nrhs;
i__1 = chunk;
for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
i__3 = *nrhs - i__ + 1;
bl = min(i__3, chunk);
dgemm_((char *)"T", (char *)"N", m, &bl, m, &c_b79, &work[il], &ldwork, &b[i__ * b_dim1 + 1],
ldb, &c_b46, &work[iwork], m, (ftnlen)1, (ftnlen)1);
dlacpy_((char *)"G", m, &bl, &work[iwork], m, &b[i__ * b_dim1 + 1], ldb, (ftnlen)1);
}
} else {
dgemv_((char *)"T", m, m, &c_b79, &work[il], &ldwork, &b[b_dim1 + 1], &c__1, &c_b46,
&work[iwork], &c__1, (ftnlen)1);
dcopy_(m, &work[iwork], &c__1, &b[b_dim1 + 1], &c__1);
}
i__1 = *n - *m;
dlaset_((char *)"F", &i__1, nrhs, &c_b46, &c_b46, &b[*m + 1 + b_dim1], ldb, (ftnlen)1);
iwork = itau + *m;
i__1 = *lwork - iwork + 1;
dormlq_((char *)"L", (char *)"T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[b_offset], ldb,
&work[iwork], &i__1, info, (ftnlen)1, (ftnlen)1);
} else {
ie = 1;
itauq = ie + *m;
itaup = itauq + *m;
iwork = itaup + *m;
i__1 = *lwork - iwork + 1;
dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup],
&work[iwork], &i__1, info);
i__1 = *lwork - iwork + 1;
dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, &work[itauq], &b[b_offset], ldb,
&work[iwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
i__1 = *lwork - iwork + 1;
dorgbr_((char *)"P", m, n, m, &a[a_offset], lda, &work[itaup], &work[iwork], &i__1, info,
(ftnlen)1);
iwork = ie + *m;
dbdsqr_((char *)"L", m, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], lda, dum, &c__1,
&b[b_offset], ldb, &work[iwork], info, (ftnlen)1);
if (*info != 0) {
goto L70;
}
d__1 = *rcond * s[1];
thr = max(d__1, sfmin);
if (*rcond < 0.) {
d__1 = eps * s[1];
thr = max(d__1, sfmin);
}
*rank = 0;
i__1 = *m;
for (i__ = 1; i__ <= i__1; ++i__) {
if (s[i__] > thr) {
drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb);
++(*rank);
} else {
dlaset_((char *)"F", &c__1, nrhs, &c_b46, &c_b46, &b[i__ + b_dim1], ldb, (ftnlen)1);
}
}
if (*lwork >= *ldb * *nrhs && *nrhs > 1) {
dgemm_((char *)"T", (char *)"N", n, nrhs, m, &c_b79, &a[a_offset], lda, &b[b_offset], ldb, &c_b46,
&work[1], ldb, (ftnlen)1, (ftnlen)1);
dlacpy_((char *)"F", n, nrhs, &work[1], ldb, &b[b_offset], ldb, (ftnlen)1);
} else if (*nrhs > 1) {
chunk = *lwork / *n;
i__1 = *nrhs;
i__2 = chunk;
for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
i__3 = *nrhs - i__ + 1;
bl = min(i__3, chunk);
dgemm_((char *)"T", (char *)"N", n, &bl, m, &c_b79, &a[a_offset], lda, &b[i__ * b_dim1 + 1],
ldb, &c_b46, &work[1], n, (ftnlen)1, (ftnlen)1);
dlacpy_((char *)"F", n, &bl, &work[1], n, &b[i__ * b_dim1 + 1], ldb, (ftnlen)1);
}
} else {
dgemv_((char *)"T", m, n, &c_b79, &a[a_offset], lda, &b[b_offset], &c__1, &c_b46, &work[1],
&c__1, (ftnlen)1);
dcopy_(n, &work[1], &c__1, &b[b_offset], &c__1);
}
}
}
if (iascl == 1) {
dlascl_((char *)"G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
dlascl_((char *)"G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &minmn, info, (ftnlen)1);
} else if (iascl == 2) {
dlascl_((char *)"G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
dlascl_((char *)"G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &minmn, info, (ftnlen)1);
}
if (ibscl == 1) {
dlascl_((char *)"G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
} else if (ibscl == 2) {
dlascl_((char *)"G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
}
L70:
work[1] = (doublereal)maxwrk;
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,744 +0,0 @@
*> \brief <b> DGELSS solves overdetermined or underdetermined systems for GE matrices</b>
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGELSS + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelss.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelss.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelss.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
* WORK, LWORK, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
* DOUBLE PRECISION RCOND
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGELSS computes the minimum norm solution to a real linear least
*> squares problem:
*>
*> Minimize 2-norm(| b - A*x |).
*>
*> using the singular value decomposition (SVD) of A. A is an M-by-N
*> matrix which may be rank-deficient.
*>
*> Several right hand side vectors b and solution vectors x can be
*> handled in a single call; they are stored as the columns of the
*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
*> X.
*>
*> The effective rank of A is determined by treating as zero those
*> singular values which are less than RCOND times the largest singular
*> value.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in] NRHS
*> \verbatim
*> NRHS is INTEGER
*> The number of right hand sides, i.e., the number of columns
*> of the matrices B and X. NRHS >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the M-by-N matrix A.
*> On exit, the first min(m,n) rows of A are overwritten with
*> its right singular vectors, stored rowwise.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[in,out] B
*> \verbatim
*> B is DOUBLE PRECISION array, dimension (LDB,NRHS)
*> On entry, the M-by-NRHS right hand side matrix B.
*> On exit, B is overwritten by the N-by-NRHS solution
*> matrix X. If m >= n and RANK = n, the residual
*> sum-of-squares for the solution in the i-th column is given
*> by the sum of squares of elements n+1:m in that column.
*> \endverbatim
*>
*> \param[in] LDB
*> \verbatim
*> LDB is INTEGER
*> The leading dimension of the array B. LDB >= max(1,max(M,N)).
*> \endverbatim
*>
*> \param[out] S
*> \verbatim
*> S is DOUBLE PRECISION array, dimension (min(M,N))
*> The singular values of A in decreasing order.
*> The condition number of A in the 2-norm = S(1)/S(min(m,n)).
*> \endverbatim
*>
*> \param[in] RCOND
*> \verbatim
*> RCOND is DOUBLE PRECISION
*> RCOND is used to determine the effective rank of A.
*> Singular values S(i) <= RCOND*S(1) are treated as zero.
*> If RCOND < 0, machine precision is used instead.
*> \endverbatim
*>
*> \param[out] RANK
*> \verbatim
*> RANK is INTEGER
*> The effective rank of A, i.e., the number of singular values
*> which are greater than RCOND*S(1).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> \verbatim
*> LWORK is INTEGER
*> The dimension of the array WORK. LWORK >= 1, and also:
*> LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS )
*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
*> this value as the first entry of the WORK array, and no error
*> message related to LWORK is issued by XERBLA.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value.
*> > 0: the algorithm for computing the SVD failed to converge;
*> if INFO = i, i off-diagonal elements of an intermediate
*> bidiagonal form did not converge to zero.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEsolve
*
* =====================================================================
SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
$ WORK, LWORK, INFO )
*
* -- LAPACK driver routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
DOUBLE PRECISION RCOND
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER BDSPAC, BL, CHUNK, I, IASCL, IBSCL, IE, IL,
$ ITAU, ITAUP, ITAUQ, IWORK, LDWORK, MAXMN,
$ MAXWRK, MINMN, MINWRK, MM, MNTHR
INTEGER LWORK_DGEQRF, LWORK_DORMQR, LWORK_DGEBRD,
$ LWORK_DORMBR, LWORK_DORGBR, LWORK_DORMLQ,
$ LWORK_DGELQF
DOUBLE PRECISION ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM, THR
* ..
* .. Local Arrays ..
DOUBLE PRECISION DUM( 1 )
* ..
* .. External Subroutines ..
EXTERNAL DBDSQR, DCOPY, DGEBRD, DGELQF, DGEMM, DGEMV,
$ DGEQRF, DLABAD, DLACPY, DLASCL, DLASET, DORGBR,
$ DORMBR, DORMLQ, DORMQR, DRSCL, XERBLA
* ..
* .. External Functions ..
INTEGER ILAENV
DOUBLE PRECISION DLAMCH, DLANGE
EXTERNAL ILAENV, DLAMCH, DLANGE
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input arguments
*
INFO = 0
MINMN = MIN( M, N )
MAXMN = MAX( M, N )
LQUERY = ( LWORK.EQ.-1 )
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( NRHS.LT.0 ) THEN
INFO = -3
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -5
ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN
INFO = -7
END IF
*
* Compute workspace
* (Note: Comments in the code beginning "Workspace:" describe the
* minimal amount of workspace needed at that point in the code,
* as well as the preferred amount for good performance.
* NB refers to the optimal block size for the immediately
* following subroutine, as returned by ILAENV.)
*
IF( INFO.EQ.0 ) THEN
MINWRK = 1
MAXWRK = 1
IF( MINMN.GT.0 ) THEN
MM = M
MNTHR = ILAENV( 6, 'DGELSS', ' ', M, N, NRHS, -1 )
IF( M.GE.N .AND. M.GE.MNTHR ) THEN
*
* Path 1a - overdetermined, with many more rows than
* columns
*
* Compute space needed for DGEQRF
CALL DGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, INFO )
LWORK_DGEQRF = INT( DUM(1) )
* Compute space needed for DORMQR
CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, DUM(1), B,
$ LDB, DUM(1), -1, INFO )
LWORK_DORMQR = INT( DUM(1) )
MM = N
MAXWRK = MAX( MAXWRK, N + LWORK_DGEQRF )
MAXWRK = MAX( MAXWRK, N + LWORK_DORMQR )
END IF
IF( M.GE.N ) THEN
*
* Path 1 - overdetermined or exactly determined
*
* Compute workspace needed for DBDSQR
*
BDSPAC = MAX( 1, 5*N )
* Compute space needed for DGEBRD
CALL DGEBRD( MM, N, A, LDA, S, DUM(1), DUM(1),
$ DUM(1), DUM(1), -1, INFO )
LWORK_DGEBRD = INT( DUM(1) )
* Compute space needed for DORMBR
CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, DUM(1),
$ B, LDB, DUM(1), -1, INFO )
LWORK_DORMBR = INT( DUM(1) )
* Compute space needed for DORGBR
CALL DORGBR( 'P', N, N, N, A, LDA, DUM(1),
$ DUM(1), -1, INFO )
LWORK_DORGBR = INT( DUM(1) )
* Compute total workspace needed
MAXWRK = MAX( MAXWRK, 3*N + LWORK_DGEBRD )
MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORMBR )
MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR )
MAXWRK = MAX( MAXWRK, BDSPAC )
MAXWRK = MAX( MAXWRK, N*NRHS )
MINWRK = MAX( 3*N + MM, 3*N + NRHS, BDSPAC )
MAXWRK = MAX( MINWRK, MAXWRK )
END IF
IF( N.GT.M ) THEN
*
* Compute workspace needed for DBDSQR
*
BDSPAC = MAX( 1, 5*M )
MINWRK = MAX( 3*M+NRHS, 3*M+N, BDSPAC )
IF( N.GE.MNTHR ) THEN
*
* Path 2a - underdetermined, with many more columns
* than rows
*
* Compute space needed for DGELQF
CALL DGELQF( M, N, A, LDA, DUM(1), DUM(1),
$ -1, INFO )
LWORK_DGELQF = INT( DUM(1) )
* Compute space needed for DGEBRD
CALL DGEBRD( M, M, A, LDA, S, DUM(1), DUM(1),
$ DUM(1), DUM(1), -1, INFO )
LWORK_DGEBRD = INT( DUM(1) )
* Compute space needed for DORMBR
CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA,
$ DUM(1), B, LDB, DUM(1), -1, INFO )
LWORK_DORMBR = INT( DUM(1) )
* Compute space needed for DORGBR
CALL DORGBR( 'P', M, M, M, A, LDA, DUM(1),
$ DUM(1), -1, INFO )
LWORK_DORGBR = INT( DUM(1) )
* Compute space needed for DORMLQ
CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, DUM(1),
$ B, LDB, DUM(1), -1, INFO )
LWORK_DORMLQ = INT( DUM(1) )
* Compute total workspace needed
MAXWRK = M + LWORK_DGELQF
MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_DGEBRD )
MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_DORMBR )
MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_DORGBR )
MAXWRK = MAX( MAXWRK, M*M + M + BDSPAC )
IF( NRHS.GT.1 ) THEN
MAXWRK = MAX( MAXWRK, M*M + M + M*NRHS )
ELSE
MAXWRK = MAX( MAXWRK, M*M + 2*M )
END IF
MAXWRK = MAX( MAXWRK, M + LWORK_DORMLQ )
ELSE
*
* Path 2 - underdetermined
*
* Compute space needed for DGEBRD
CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
$ DUM(1), DUM(1), -1, INFO )
LWORK_DGEBRD = INT( DUM(1) )
* Compute space needed for DORMBR
CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, A, LDA,
$ DUM(1), B, LDB, DUM(1), -1, INFO )
LWORK_DORMBR = INT( DUM(1) )
* Compute space needed for DORGBR
CALL DORGBR( 'P', M, N, M, A, LDA, DUM(1),
$ DUM(1), -1, INFO )
LWORK_DORGBR = INT( DUM(1) )
MAXWRK = 3*M + LWORK_DGEBRD
MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORMBR )
MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR )
MAXWRK = MAX( MAXWRK, BDSPAC )
MAXWRK = MAX( MAXWRK, N*NRHS )
END IF
END IF
MAXWRK = MAX( MINWRK, MAXWRK )
END IF
WORK( 1 ) = MAXWRK
*
IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY )
$ INFO = -12
END IF
*
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGELSS', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
RETURN
END IF
*
* Quick return if possible
*
IF( M.EQ.0 .OR. N.EQ.0 ) THEN
RANK = 0
RETURN
END IF
*
* Get machine parameters
*
EPS = DLAMCH( 'P' )
SFMIN = DLAMCH( 'S' )
SMLNUM = SFMIN / EPS
BIGNUM = ONE / SMLNUM
CALL DLABAD( SMLNUM, BIGNUM )
*
* Scale A if max element outside range [SMLNUM,BIGNUM]
*
ANRM = DLANGE( 'M', M, N, A, LDA, WORK )
IASCL = 0
IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
*
* Scale matrix norm up to SMLNUM
*
CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
IASCL = 1
ELSE IF( ANRM.GT.BIGNUM ) THEN
*
* Scale matrix norm down to BIGNUM
*
CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
IASCL = 2
ELSE IF( ANRM.EQ.ZERO ) THEN
*
* Matrix all zero. Return zero solution.
*
CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, MINMN )
RANK = 0
GO TO 70
END IF
*
* Scale B if max element outside range [SMLNUM,BIGNUM]
*
BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK )
IBSCL = 0
IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
*
* Scale matrix norm up to SMLNUM
*
CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
IBSCL = 1
ELSE IF( BNRM.GT.BIGNUM ) THEN
*
* Scale matrix norm down to BIGNUM
*
CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
IBSCL = 2
END IF
*
* Overdetermined case
*
IF( M.GE.N ) THEN
*
* Path 1 - overdetermined or exactly determined
*
MM = M
IF( M.GE.MNTHR ) THEN
*
* Path 1a - overdetermined, with many more rows than columns
*
MM = N
ITAU = 1
IWORK = ITAU + N
*
* Compute A=Q*R
* (Workspace: need 2*N, prefer N+N*NB)
*
CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
$ LWORK-IWORK+1, INFO )
*
* Multiply B by transpose(Q)
* (Workspace: need N+NRHS, prefer N+NRHS*NB)
*
CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B,
$ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
*
* Zero out below R
*
IF( N.GT.1 )
$ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
END IF
*
IE = 1
ITAUQ = IE + N
ITAUP = ITAUQ + N
IWORK = ITAUP + N
*
* Bidiagonalize R in A
* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
*
CALL DGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
$ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
$ INFO )
*
* Multiply B by transpose of left bidiagonalizing vectors of R
* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
*
CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ),
$ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
*
* Generate right bidiagonalizing vectors of R in A
* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
*
CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, INFO )
IWORK = IE + N
*
* Perform bidiagonal QR iteration
* multiply B by transpose of left singular vectors
* compute right singular vectors in A
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'U', N, N, 0, NRHS, S, WORK( IE ), A, LDA, DUM,
$ 1, B, LDB, WORK( IWORK ), INFO )
IF( INFO.NE.0 )
$ GO TO 70
*
* Multiply B by reciprocals of singular values
*
THR = MAX( RCOND*S( 1 ), SFMIN )
IF( RCOND.LT.ZERO )
$ THR = MAX( EPS*S( 1 ), SFMIN )
RANK = 0
DO 10 I = 1, N
IF( S( I ).GT.THR ) THEN
CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
RANK = RANK + 1
ELSE
CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
END IF
10 CONTINUE
*
* Multiply B by right singular vectors
* (Workspace: need N, prefer N*NRHS)
*
IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, A, LDA, B, LDB, ZERO,
$ WORK, LDB )
CALL DLACPY( 'G', N, NRHS, WORK, LDB, B, LDB )
ELSE IF( NRHS.GT.1 ) THEN
CHUNK = LWORK / N
DO 20 I = 1, NRHS, CHUNK
BL = MIN( NRHS-I+1, CHUNK )
CALL DGEMM( 'T', 'N', N, BL, N, ONE, A, LDA, B( 1, I ),
$ LDB, ZERO, WORK, N )
CALL DLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB )
20 CONTINUE
ELSE
CALL DGEMV( 'T', N, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
CALL DCOPY( N, WORK, 1, B, 1 )
END IF
*
ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+
$ MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN
*
* Path 2a - underdetermined, with many more columns than rows
* and sufficient workspace for an efficient algorithm
*
LDWORK = M
IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ),
$ M*LDA+M+M*NRHS ) )LDWORK = LDA
ITAU = 1
IWORK = M + 1
*
* Compute A=L*Q
* (Workspace: need 2*M, prefer M+M*NB)
*
CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
$ LWORK-IWORK+1, INFO )
IL = IWORK
*
* Copy L to WORK(IL), zeroing out above it
*
CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK )
CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ),
$ LDWORK )
IE = IL + LDWORK*M
ITAUQ = IE + M
ITAUP = ITAUQ + M
IWORK = ITAUP + M
*
* Bidiagonalize L in WORK(IL)
* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
*
CALL DGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ),
$ WORK( ITAUQ ), WORK( ITAUP ), WORK( IWORK ),
$ LWORK-IWORK+1, INFO )
*
* Multiply B by transpose of left bidiagonalizing vectors of L
* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
*
CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK,
$ WORK( ITAUQ ), B, LDB, WORK( IWORK ),
$ LWORK-IWORK+1, INFO )
*
* Generate right bidiagonalizing vectors of R in WORK(IL)
* (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB)
*
CALL DORGBR( 'P', M, M, M, WORK( IL ), LDWORK, WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, INFO )
IWORK = IE + M
*
* Perform bidiagonal QR iteration,
* computing right singular vectors of L in WORK(IL) and
* multiplying B by transpose of left singular vectors
* (Workspace: need M*M+M+BDSPAC)
*
CALL DBDSQR( 'U', M, M, 0, NRHS, S, WORK( IE ), WORK( IL ),
$ LDWORK, A, LDA, B, LDB, WORK( IWORK ), INFO )
IF( INFO.NE.0 )
$ GO TO 70
*
* Multiply B by reciprocals of singular values
*
THR = MAX( RCOND*S( 1 ), SFMIN )
IF( RCOND.LT.ZERO )
$ THR = MAX( EPS*S( 1 ), SFMIN )
RANK = 0
DO 30 I = 1, M
IF( S( I ).GT.THR ) THEN
CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
RANK = RANK + 1
ELSE
CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
END IF
30 CONTINUE
IWORK = IE
*
* Multiply B by right singular vectors of L in WORK(IL)
* (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS)
*
IF( LWORK.GE.LDB*NRHS+IWORK-1 .AND. NRHS.GT.1 ) THEN
CALL DGEMM( 'T', 'N', M, NRHS, M, ONE, WORK( IL ), LDWORK,
$ B, LDB, ZERO, WORK( IWORK ), LDB )
CALL DLACPY( 'G', M, NRHS, WORK( IWORK ), LDB, B, LDB )
ELSE IF( NRHS.GT.1 ) THEN
CHUNK = ( LWORK-IWORK+1 ) / M
DO 40 I = 1, NRHS, CHUNK
BL = MIN( NRHS-I+1, CHUNK )
CALL DGEMM( 'T', 'N', M, BL, M, ONE, WORK( IL ), LDWORK,
$ B( 1, I ), LDB, ZERO, WORK( IWORK ), M )
CALL DLACPY( 'G', M, BL, WORK( IWORK ), M, B( 1, I ),
$ LDB )
40 CONTINUE
ELSE
CALL DGEMV( 'T', M, M, ONE, WORK( IL ), LDWORK, B( 1, 1 ),
$ 1, ZERO, WORK( IWORK ), 1 )
CALL DCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 )
END IF
*
* Zero out below first M rows of B
*
CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
IWORK = ITAU + M
*
* Multiply transpose(Q) by B
* (Workspace: need M+NRHS, prefer M+NRHS*NB)
*
CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B,
$ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
*
ELSE
*
* Path 2 - remaining underdetermined cases
*
IE = 1
ITAUQ = IE + M
ITAUP = ITAUQ + M
IWORK = ITAUP + M
*
* Bidiagonalize A
* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
*
CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
$ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
$ INFO )
*
* Multiply B by transpose of left bidiagonalizing vectors
* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
*
CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ),
$ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
*
* Generate right bidiagonalizing vectors in A
* (Workspace: need 4*M, prefer 3*M+M*NB)
*
CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, INFO )
IWORK = IE + M
*
* Perform bidiagonal QR iteration,
* computing right singular vectors of A in A and
* multiplying B by transpose of left singular vectors
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'L', M, N, 0, NRHS, S, WORK( IE ), A, LDA, DUM,
$ 1, B, LDB, WORK( IWORK ), INFO )
IF( INFO.NE.0 )
$ GO TO 70
*
* Multiply B by reciprocals of singular values
*
THR = MAX( RCOND*S( 1 ), SFMIN )
IF( RCOND.LT.ZERO )
$ THR = MAX( EPS*S( 1 ), SFMIN )
RANK = 0
DO 50 I = 1, M
IF( S( I ).GT.THR ) THEN
CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
RANK = RANK + 1
ELSE
CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
END IF
50 CONTINUE
*
* Multiply B by right singular vectors of A
* (Workspace: need N, prefer N*NRHS)
*
IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
CALL DGEMM( 'T', 'N', N, NRHS, M, ONE, A, LDA, B, LDB, ZERO,
$ WORK, LDB )
CALL DLACPY( 'F', N, NRHS, WORK, LDB, B, LDB )
ELSE IF( NRHS.GT.1 ) THEN
CHUNK = LWORK / N
DO 60 I = 1, NRHS, CHUNK
BL = MIN( NRHS-I+1, CHUNK )
CALL DGEMM( 'T', 'N', N, BL, M, ONE, A, LDA, B( 1, I ),
$ LDB, ZERO, WORK, N )
CALL DLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB )
60 CONTINUE
ELSE
CALL DGEMV( 'T', M, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
CALL DCOPY( N, WORK, 1, B, 1 )
END IF
END IF
*
* Undo scaling
*
IF( IASCL.EQ.1 ) THEN
CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
$ INFO )
ELSE IF( IASCL.EQ.2 ) THEN
CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
$ INFO )
END IF
IF( IBSCL.EQ.1 ) THEN
CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
ELSE IF( IBSCL.EQ.2 ) THEN
CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
END IF
*
70 CONTINUE
WORK( 1 ) = MAXWRK
RETURN
*
* End of DGELSS
*
END

173
lib/linalg/dgemm.cpp Normal file
View File

@ -0,0 +1,173 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dgemm_(char *transa, char *transb, integer *m, integer *n, integer *k, doublereal *alpha,
doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *beta,
doublereal *c__, integer *ldc, ftnlen transa_len, ftnlen transb_len)
{
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3;
integer i__, j, l, info;
logical nota, notb;
doublereal temp;
extern logical lsame_(char *, char *, ftnlen, ftnlen);
integer nrowa, nrowb;
extern int xerbla_(char *, integer *, ftnlen);
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
b_dim1 = *ldb;
b_offset = 1 + b_dim1;
b -= b_offset;
c_dim1 = *ldc;
c_offset = 1 + c_dim1;
c__ -= c_offset;
nota = lsame_(transa, (char *)"N", (ftnlen)1, (ftnlen)1);
notb = lsame_(transb, (char *)"N", (ftnlen)1, (ftnlen)1);
if (nota) {
nrowa = *m;
} else {
nrowa = *k;
}
if (notb) {
nrowb = *k;
} else {
nrowb = *n;
}
info = 0;
if (!nota && !lsame_(transa, (char *)"C", (ftnlen)1, (ftnlen)1) &&
!lsame_(transa, (char *)"T", (ftnlen)1, (ftnlen)1)) {
info = 1;
} else if (!notb && !lsame_(transb, (char *)"C", (ftnlen)1, (ftnlen)1) &&
!lsame_(transb, (char *)"T", (ftnlen)1, (ftnlen)1)) {
info = 2;
} else if (*m < 0) {
info = 3;
} else if (*n < 0) {
info = 4;
} else if (*k < 0) {
info = 5;
} else if (*lda < max(1, nrowa)) {
info = 8;
} else if (*ldb < max(1, nrowb)) {
info = 10;
} else if (*ldc < max(1, *m)) {
info = 13;
}
if (info != 0) {
xerbla_((char *)"DGEMM ", &info, (ftnlen)6);
return 0;
}
if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
return 0;
}
if (*alpha == 0.) {
if (*beta == 0.) {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
c__[i__ + j * c_dim1] = 0.;
}
}
} else {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
}
}
}
return 0;
}
if (notb) {
if (nota) {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
if (*beta == 0.) {
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
c__[i__ + j * c_dim1] = 0.;
}
} else if (*beta != 1.) {
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
}
}
i__2 = *k;
for (l = 1; l <= i__2; ++l) {
temp = *alpha * b[l + j * b_dim1];
i__3 = *m;
for (i__ = 1; i__ <= i__3; ++i__) {
c__[i__ + j * c_dim1] += temp * a[i__ + l * a_dim1];
}
}
}
} else {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
temp = 0.;
i__3 = *k;
for (l = 1; l <= i__3; ++l) {
temp += a[l + i__ * a_dim1] * b[l + j * b_dim1];
}
if (*beta == 0.) {
c__[i__ + j * c_dim1] = *alpha * temp;
} else {
c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[i__ + j * c_dim1];
}
}
}
}
} else {
if (nota) {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
if (*beta == 0.) {
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
c__[i__ + j * c_dim1] = 0.;
}
} else if (*beta != 1.) {
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
}
}
i__2 = *k;
for (l = 1; l <= i__2; ++l) {
temp = *alpha * b[j + l * b_dim1];
i__3 = *m;
for (i__ = 1; i__ <= i__3; ++i__) {
c__[i__ + j * c_dim1] += temp * a[i__ + l * a_dim1];
}
}
}
} else {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
temp = 0.;
i__3 = *k;
for (l = 1; l <= i__3; ++l) {
temp += a[l + i__ * a_dim1] * b[j + l * b_dim1];
}
if (*beta == 0.) {
c__[i__ + j * c_dim1] = *alpha * temp;
} else {
c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[i__ + j * c_dim1];
}
}
}
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,379 +0,0 @@
*> \brief \b DGEMM
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
*
* .. Scalar Arguments ..
* DOUBLE PRECISION ALPHA,BETA
* INTEGER K,LDA,LDB,LDC,M,N
* CHARACTER TRANSA,TRANSB
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGEMM performs one of the matrix-matrix operations
*>
*> C := alpha*op( A )*op( B ) + beta*C,
*>
*> where op( X ) is one of
*>
*> op( X ) = X or op( X ) = X**T,
*>
*> alpha and beta are scalars, and A, B and C are matrices, with op( A )
*> an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] TRANSA
*> \verbatim
*> TRANSA is CHARACTER*1
*> On entry, TRANSA specifies the form of op( A ) to be used in
*> the matrix multiplication as follows:
*>
*> TRANSA = 'N' or 'n', op( A ) = A.
*>
*> TRANSA = 'T' or 't', op( A ) = A**T.
*>
*> TRANSA = 'C' or 'c', op( A ) = A**T.
*> \endverbatim
*>
*> \param[in] TRANSB
*> \verbatim
*> TRANSB is CHARACTER*1
*> On entry, TRANSB specifies the form of op( B ) to be used in
*> the matrix multiplication as follows:
*>
*> TRANSB = 'N' or 'n', op( B ) = B.
*>
*> TRANSB = 'T' or 't', op( B ) = B**T.
*>
*> TRANSB = 'C' or 'c', op( B ) = B**T.
*> \endverbatim
*>
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> On entry, M specifies the number of rows of the matrix
*> op( A ) and of the matrix C. M must be at least zero.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> On entry, N specifies the number of columns of the matrix
*> op( B ) and the number of columns of the matrix C. N must be
*> at least zero.
*> \endverbatim
*>
*> \param[in] K
*> \verbatim
*> K is INTEGER
*> On entry, K specifies the number of columns of the matrix
*> op( A ) and the number of rows of the matrix op( B ). K must
*> be at least zero.
*> \endverbatim
*>
*> \param[in] ALPHA
*> \verbatim
*> ALPHA is DOUBLE PRECISION.
*> On entry, ALPHA specifies the scalar alpha.
*> \endverbatim
*>
*> \param[in] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension ( LDA, ka ), where ka is
*> k when TRANSA = 'N' or 'n', and is m otherwise.
*> Before entry with TRANSA = 'N' or 'n', the leading m by k
*> part of the array A must contain the matrix A, otherwise
*> the leading k by m part of the array A must contain the
*> matrix A.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> On entry, LDA specifies the first dimension of A as declared
*> in the calling (sub) program. When TRANSA = 'N' or 'n' then
*> LDA must be at least max( 1, m ), otherwise LDA must be at
*> least max( 1, k ).
*> \endverbatim
*>
*> \param[in] B
*> \verbatim
*> B is DOUBLE PRECISION array, dimension ( LDB, kb ), where kb is
*> n when TRANSB = 'N' or 'n', and is k otherwise.
*> Before entry with TRANSB = 'N' or 'n', the leading k by n
*> part of the array B must contain the matrix B, otherwise
*> the leading n by k part of the array B must contain the
*> matrix B.
*> \endverbatim
*>
*> \param[in] LDB
*> \verbatim
*> LDB is INTEGER
*> On entry, LDB specifies the first dimension of B as declared
*> in the calling (sub) program. When TRANSB = 'N' or 'n' then
*> LDB must be at least max( 1, k ), otherwise LDB must be at
*> least max( 1, n ).
*> \endverbatim
*>
*> \param[in] BETA
*> \verbatim
*> BETA is DOUBLE PRECISION.
*> On entry, BETA specifies the scalar beta. When BETA is
*> supplied as zero then C need not be set on input.
*> \endverbatim
*>
*> \param[in,out] C
*> \verbatim
*> C is DOUBLE PRECISION array, dimension ( LDC, N )
*> Before entry, the leading m by n part of the array C must
*> contain the matrix C, except when beta is zero, in which
*> case C need not be set on entry.
*> On exit, the array C is overwritten by the m by n matrix
*> ( alpha*op( A )*op( B ) + beta*C ).
*> \endverbatim
*>
*> \param[in] LDC
*> \verbatim
*> LDC is INTEGER
*> On entry, LDC specifies the first dimension of C as declared
*> in the calling (sub) program. LDC must be at least
*> max( 1, m ).
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup double_blas_level3
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> Level 3 Blas routine.
*>
*> -- Written on 8-February-1989.
*> Jack Dongarra, Argonne National Laboratory.
*> Iain Duff, AERE Harwell.
*> Jeremy Du Croz, Numerical Algorithms Group Ltd.
*> Sven Hammarling, Numerical Algorithms Group Ltd.
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
*
* -- Reference BLAS level3 routine --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
DOUBLE PRECISION ALPHA,BETA
INTEGER K,LDA,LDB,LDC,M,N
CHARACTER TRANSA,TRANSB
* ..
* .. Array Arguments ..
DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
* ..
*
* =====================================================================
*
* .. External Functions ..
LOGICAL LSAME
EXTERNAL LSAME
* ..
* .. External Subroutines ..
EXTERNAL XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX
* ..
* .. Local Scalars ..
DOUBLE PRECISION TEMP
INTEGER I,INFO,J,L,NROWA,NROWB
LOGICAL NOTA,NOTB
* ..
* .. Parameters ..
DOUBLE PRECISION ONE,ZERO
PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
* ..
*
* Set NOTA and NOTB as true if A and B respectively are not
* transposed and set NROWA and NROWB as the number of rows of A
* and B respectively.
*
NOTA = LSAME(TRANSA,'N')
NOTB = LSAME(TRANSB,'N')
IF (NOTA) THEN
NROWA = M
ELSE
NROWA = K
END IF
IF (NOTB) THEN
NROWB = K
ELSE
NROWB = N
END IF
*
* Test the input parameters.
*
INFO = 0
IF ((.NOT.NOTA) .AND. (.NOT.LSAME(TRANSA,'C')) .AND.
+ (.NOT.LSAME(TRANSA,'T'))) THEN
INFO = 1
ELSE IF ((.NOT.NOTB) .AND. (.NOT.LSAME(TRANSB,'C')) .AND.
+ (.NOT.LSAME(TRANSB,'T'))) THEN
INFO = 2
ELSE IF (M.LT.0) THEN
INFO = 3
ELSE IF (N.LT.0) THEN
INFO = 4
ELSE IF (K.LT.0) THEN
INFO = 5
ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
INFO = 8
ELSE IF (LDB.LT.MAX(1,NROWB)) THEN
INFO = 10
ELSE IF (LDC.LT.MAX(1,M)) THEN
INFO = 13
END IF
IF (INFO.NE.0) THEN
CALL XERBLA('DGEMM ',INFO)
RETURN
END IF
*
* Quick return if possible.
*
IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
+ (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
*
* And if alpha.eq.zero.
*
IF (ALPHA.EQ.ZERO) THEN
IF (BETA.EQ.ZERO) THEN
DO 20 J = 1,N
DO 10 I = 1,M
C(I,J) = ZERO
10 CONTINUE
20 CONTINUE
ELSE
DO 40 J = 1,N
DO 30 I = 1,M
C(I,J) = BETA*C(I,J)
30 CONTINUE
40 CONTINUE
END IF
RETURN
END IF
*
* Start the operations.
*
IF (NOTB) THEN
IF (NOTA) THEN
*
* Form C := alpha*A*B + beta*C.
*
DO 90 J = 1,N
IF (BETA.EQ.ZERO) THEN
DO 50 I = 1,M
C(I,J) = ZERO
50 CONTINUE
ELSE IF (BETA.NE.ONE) THEN
DO 60 I = 1,M
C(I,J) = BETA*C(I,J)
60 CONTINUE
END IF
DO 80 L = 1,K
TEMP = ALPHA*B(L,J)
DO 70 I = 1,M
C(I,J) = C(I,J) + TEMP*A(I,L)
70 CONTINUE
80 CONTINUE
90 CONTINUE
ELSE
*
* Form C := alpha*A**T*B + beta*C
*
DO 120 J = 1,N
DO 110 I = 1,M
TEMP = ZERO
DO 100 L = 1,K
TEMP = TEMP + A(L,I)*B(L,J)
100 CONTINUE
IF (BETA.EQ.ZERO) THEN
C(I,J) = ALPHA*TEMP
ELSE
C(I,J) = ALPHA*TEMP + BETA*C(I,J)
END IF
110 CONTINUE
120 CONTINUE
END IF
ELSE
IF (NOTA) THEN
*
* Form C := alpha*A*B**T + beta*C
*
DO 170 J = 1,N
IF (BETA.EQ.ZERO) THEN
DO 130 I = 1,M
C(I,J) = ZERO
130 CONTINUE
ELSE IF (BETA.NE.ONE) THEN
DO 140 I = 1,M
C(I,J) = BETA*C(I,J)
140 CONTINUE
END IF
DO 160 L = 1,K
TEMP = ALPHA*B(J,L)
DO 150 I = 1,M
C(I,J) = C(I,J) + TEMP*A(I,L)
150 CONTINUE
160 CONTINUE
170 CONTINUE
ELSE
*
* Form C := alpha*A**T*B**T + beta*C
*
DO 200 J = 1,N
DO 190 I = 1,M
TEMP = ZERO
DO 180 L = 1,K
TEMP = TEMP + A(L,I)*B(J,L)
180 CONTINUE
IF (BETA.EQ.ZERO) THEN
C(I,J) = ALPHA*TEMP
ELSE
C(I,J) = ALPHA*TEMP + BETA*C(I,J)
END IF
190 CONTINUE
200 CONTINUE
END IF
END IF
*
RETURN
*
* End of DGEMM
*
END

149
lib/linalg/dgemv.cpp Normal file
View File

@ -0,0 +1,149 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dgemv_(char *trans, integer *m, integer *n, doublereal *alpha, doublereal *a, integer *lda,
doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy,
ftnlen trans_len)
{
integer a_dim1, a_offset, i__1, i__2;
integer i__, j, ix, iy, jx, jy, kx, ky, info;
doublereal temp;
integer lenx, leny;
extern logical lsame_(char *, char *, ftnlen, ftnlen);
extern int xerbla_(char *, integer *, ftnlen);
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--x;
--y;
info = 0;
if (!lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && !lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1) &&
!lsame_(trans, (char *)"C", (ftnlen)1, (ftnlen)1)) {
info = 1;
} else if (*m < 0) {
info = 2;
} else if (*n < 0) {
info = 3;
} else if (*lda < max(1, *m)) {
info = 6;
} else if (*incx == 0) {
info = 8;
} else if (*incy == 0) {
info = 11;
}
if (info != 0) {
xerbla_((char *)"DGEMV ", &info, (ftnlen)6);
return 0;
}
if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) {
return 0;
}
if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) {
lenx = *n;
leny = *m;
} else {
lenx = *m;
leny = *n;
}
if (*incx > 0) {
kx = 1;
} else {
kx = 1 - (lenx - 1) * *incx;
}
if (*incy > 0) {
ky = 1;
} else {
ky = 1 - (leny - 1) * *incy;
}
if (*beta != 1.) {
if (*incy == 1) {
if (*beta == 0.) {
i__1 = leny;
for (i__ = 1; i__ <= i__1; ++i__) {
y[i__] = 0.;
}
} else {
i__1 = leny;
for (i__ = 1; i__ <= i__1; ++i__) {
y[i__] = *beta * y[i__];
}
}
} else {
iy = ky;
if (*beta == 0.) {
i__1 = leny;
for (i__ = 1; i__ <= i__1; ++i__) {
y[iy] = 0.;
iy += *incy;
}
} else {
i__1 = leny;
for (i__ = 1; i__ <= i__1; ++i__) {
y[iy] = *beta * y[iy];
iy += *incy;
}
}
}
}
if (*alpha == 0.) {
return 0;
}
if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) {
jx = kx;
if (*incy == 1) {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
temp = *alpha * x[jx];
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
y[i__] += temp * a[i__ + j * a_dim1];
}
jx += *incx;
}
} else {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
temp = *alpha * x[jx];
iy = ky;
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
y[iy] += temp * a[i__ + j * a_dim1];
iy += *incy;
}
jx += *incx;
}
}
} else {
jy = ky;
if (*incx == 1) {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
temp = 0.;
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
temp += a[i__ + j * a_dim1] * x[i__];
}
y[jy] += *alpha * temp;
jy += *incy;
}
} else {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
temp = 0.;
ix = kx;
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
temp += a[i__ + j * a_dim1] * x[ix];
ix += *incx;
}
y[jy] += *alpha * temp;
jy += *incy;
}
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,327 +0,0 @@
*> \brief \b DGEMV
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* SUBROUTINE DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
*
* .. Scalar Arguments ..
* DOUBLE PRECISION ALPHA,BETA
* INTEGER INCX,INCY,LDA,M,N
* CHARACTER TRANS
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A(LDA,*),X(*),Y(*)
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGEMV performs one of the matrix-vector operations
*>
*> y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y,
*>
*> where alpha and beta are scalars, x and y are vectors and A is an
*> m by n matrix.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] TRANS
*> \verbatim
*> TRANS is CHARACTER*1
*> On entry, TRANS specifies the operation to be performed as
*> follows:
*>
*> TRANS = 'N' or 'n' y := alpha*A*x + beta*y.
*>
*> TRANS = 'T' or 't' y := alpha*A**T*x + beta*y.
*>
*> TRANS = 'C' or 'c' y := alpha*A**T*x + beta*y.
*> \endverbatim
*>
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> On entry, M specifies the number of rows of the matrix A.
*> M must be at least zero.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> On entry, N specifies the number of columns of the matrix A.
*> N must be at least zero.
*> \endverbatim
*>
*> \param[in] ALPHA
*> \verbatim
*> ALPHA is DOUBLE PRECISION.
*> On entry, ALPHA specifies the scalar alpha.
*> \endverbatim
*>
*> \param[in] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension ( LDA, N )
*> Before entry, the leading m by n part of the array A must
*> contain the matrix of coefficients.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> On entry, LDA specifies the first dimension of A as declared
*> in the calling (sub) program. LDA must be at least
*> max( 1, m ).
*> \endverbatim
*>
*> \param[in] X
*> \verbatim
*> X is DOUBLE PRECISION array, dimension at least
*> ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
*> and at least
*> ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
*> Before entry, the incremented array X must contain the
*> vector x.
*> \endverbatim
*>
*> \param[in] INCX
*> \verbatim
*> INCX is INTEGER
*> On entry, INCX specifies the increment for the elements of
*> X. INCX must not be zero.
*> \endverbatim
*>
*> \param[in] BETA
*> \verbatim
*> BETA is DOUBLE PRECISION.
*> On entry, BETA specifies the scalar beta. When BETA is
*> supplied as zero then Y need not be set on input.
*> \endverbatim
*>
*> \param[in,out] Y
*> \verbatim
*> Y is DOUBLE PRECISION array, dimension at least
*> ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
*> and at least
*> ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
*> Before entry with BETA non-zero, the incremented array Y
*> must contain the vector y. On exit, Y is overwritten by the
*> updated vector y.
*> \endverbatim
*>
*> \param[in] INCY
*> \verbatim
*> INCY is INTEGER
*> On entry, INCY specifies the increment for the elements of
*> Y. INCY must not be zero.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup double_blas_level2
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> Level 2 Blas routine.
*> The vector and matrix arguments are not referenced when N = 0, or M = 0
*>
*> -- Written on 22-October-1986.
*> Jack Dongarra, Argonne National Lab.
*> Jeremy Du Croz, Nag Central Office.
*> Sven Hammarling, Nag Central Office.
*> Richard Hanson, Sandia National Labs.
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
*
* -- Reference BLAS level2 routine --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
DOUBLE PRECISION ALPHA,BETA
INTEGER INCX,INCY,LDA,M,N
CHARACTER TRANS
* ..
* .. Array Arguments ..
DOUBLE PRECISION A(LDA,*),X(*),Y(*)
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE,ZERO
PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
* ..
* .. Local Scalars ..
DOUBLE PRECISION TEMP
INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY,LENX,LENY
* ..
* .. External Functions ..
LOGICAL LSAME
EXTERNAL LSAME
* ..
* .. External Subroutines ..
EXTERNAL XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX
* ..
*
* Test the input parameters.
*
INFO = 0
IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
+ .NOT.LSAME(TRANS,'C')) THEN
INFO = 1
ELSE IF (M.LT.0) THEN
INFO = 2
ELSE IF (N.LT.0) THEN
INFO = 3
ELSE IF (LDA.LT.MAX(1,M)) THEN
INFO = 6
ELSE IF (INCX.EQ.0) THEN
INFO = 8
ELSE IF (INCY.EQ.0) THEN
INFO = 11
END IF
IF (INFO.NE.0) THEN
CALL XERBLA('DGEMV ',INFO)
RETURN
END IF
*
* Quick return if possible.
*
IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
+ ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
*
* Set LENX and LENY, the lengths of the vectors x and y, and set
* up the start points in X and Y.
*
IF (LSAME(TRANS,'N')) THEN
LENX = N
LENY = M
ELSE
LENX = M
LENY = N
END IF
IF (INCX.GT.0) THEN
KX = 1
ELSE
KX = 1 - (LENX-1)*INCX
END IF
IF (INCY.GT.0) THEN
KY = 1
ELSE
KY = 1 - (LENY-1)*INCY
END IF
*
* Start the operations. In this version the elements of A are
* accessed sequentially with one pass through A.
*
* First form y := beta*y.
*
IF (BETA.NE.ONE) THEN
IF (INCY.EQ.1) THEN
IF (BETA.EQ.ZERO) THEN
DO 10 I = 1,LENY
Y(I) = ZERO
10 CONTINUE
ELSE
DO 20 I = 1,LENY
Y(I) = BETA*Y(I)
20 CONTINUE
END IF
ELSE
IY = KY
IF (BETA.EQ.ZERO) THEN
DO 30 I = 1,LENY
Y(IY) = ZERO
IY = IY + INCY
30 CONTINUE
ELSE
DO 40 I = 1,LENY
Y(IY) = BETA*Y(IY)
IY = IY + INCY
40 CONTINUE
END IF
END IF
END IF
IF (ALPHA.EQ.ZERO) RETURN
IF (LSAME(TRANS,'N')) THEN
*
* Form y := alpha*A*x + y.
*
JX = KX
IF (INCY.EQ.1) THEN
DO 60 J = 1,N
TEMP = ALPHA*X(JX)
DO 50 I = 1,M
Y(I) = Y(I) + TEMP*A(I,J)
50 CONTINUE
JX = JX + INCX
60 CONTINUE
ELSE
DO 80 J = 1,N
TEMP = ALPHA*X(JX)
IY = KY
DO 70 I = 1,M
Y(IY) = Y(IY) + TEMP*A(I,J)
IY = IY + INCY
70 CONTINUE
JX = JX + INCX
80 CONTINUE
END IF
ELSE
*
* Form y := alpha*A**T*x + y.
*
JY = KY
IF (INCX.EQ.1) THEN
DO 100 J = 1,N
TEMP = ZERO
DO 90 I = 1,M
TEMP = TEMP + A(I,J)*X(I)
90 CONTINUE
Y(JY) = Y(JY) + ALPHA*TEMP
JY = JY + INCY
100 CONTINUE
ELSE
DO 120 J = 1,N
TEMP = ZERO
IX = KX
DO 110 I = 1,M
TEMP = TEMP + A(I,J)*X(IX)
IX = IX + INCX
110 CONTINUE
Y(JY) = Y(JY) + ALPHA*TEMP
JY = JY + INCY
120 CONTINUE
END IF
END IF
*
RETURN
*
* End of DGEMV
*
END

54
lib/linalg/dgeqr2.cpp Normal file
View File

@ -0,0 +1,54 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
int dgeqr2_(integer *m, integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal *work,
integer *info)
{
integer a_dim1, a_offset, i__1, i__2, i__3;
integer i__, k;
doublereal aii;
extern int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *,
doublereal *, integer *, doublereal *, ftnlen),
dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *),
xerbla_(char *, integer *, ftnlen);
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--tau;
--work;
*info = 0;
if (*m < 0) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*lda < max(1, *m)) {
*info = -4;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGEQR2", &i__1, (ftnlen)6);
return 0;
}
k = min(*m, *n);
i__1 = k;
for (i__ = 1; i__ <= i__1; ++i__) {
i__2 = *m - i__ + 1;
i__3 = i__ + 1;
dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3, *m) + i__ * a_dim1], &c__1, &tau[i__]);
if (i__ < *n) {
aii = a[i__ + i__ * a_dim1];
a[i__ + i__ * a_dim1] = 1.;
i__2 = *m - i__ + 1;
i__3 = *n - i__;
dlarf_((char *)"Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[i__],
&a[i__ + (i__ + 1) * a_dim1], lda, &work[1], (ftnlen)4);
a[i__ + i__ * a_dim1] = aii;
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,198 +0,0 @@
*> \brief \b DGEQR2 computes the QR factorization of a general rectangular matrix using an unblocked algorithm.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGEQR2 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqr2.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqr2.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqr2.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGEQR2( M, N, A, LDA, TAU, WORK, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGEQR2 computes a QR factorization of a real m-by-n matrix A:
*>
*> A = Q * ( R ),
*> ( 0 )
*>
*> where:
*>
*> Q is a m-by-m orthogonal matrix;
*> R is an upper-triangular n-by-n matrix;
*> 0 is a (m-n)-by-n zero matrix, if m > n.
*>
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the m by n matrix A.
*> On exit, the elements on and above the diagonal of the array
*> contain the min(m,n) by n upper trapezoidal matrix R (R is
*> upper triangular if m >= n); the elements below the diagonal,
*> with the array TAU, represent the orthogonal matrix Q as a
*> product of elementary reflectors (see Further Details).
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] TAU
*> \verbatim
*> TAU is DOUBLE PRECISION array, dimension (min(M,N))
*> The scalar factors of the elementary reflectors (see Further
*> Details).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (N)
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEcomputational
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> The matrix Q is represented as a product of elementary reflectors
*>
*> Q = H(1) H(2) . . . H(k), where k = min(m,n).
*>
*> Each H(i) has the form
*>
*> H(i) = I - tau * v * v**T
*>
*> where tau is a real scalar, and v is a real vector with
*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
*> and tau in TAU(i).
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DGEQR2( M, N, A, LDA, TAU, WORK, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
INTEGER I, K
DOUBLE PRECISION AII
* ..
* .. External Subroutines ..
EXTERNAL DLARF, DLARFG, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input arguments
*
INFO = 0
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -4
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGEQR2', -INFO )
RETURN
END IF
*
K = MIN( M, N )
*
DO 10 I = 1, K
*
* Generate elementary reflector H(i) to annihilate A(i+1:m,i)
*
CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1,
$ TAU( I ) )
IF( I.LT.N ) THEN
*
* Apply H(i) to A(i:m,i+1:n) from the left
*
AII = A( I, I )
A( I, I ) = ONE
CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ),
$ A( I, I+1 ), LDA, WORK )
A( I, I ) = AII
END IF
10 CONTINUE
RETURN
*
* End of DGEQR2
*
END

113
lib/linalg/dgeqrf.cpp Normal file
View File

@ -0,0 +1,113 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
static integer c_n1 = -1;
static integer c__3 = 3;
static integer c__2 = 2;
int dgeqrf_(integer *m, integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal *work,
integer *lwork, integer *info)
{
integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
extern int dgeqr2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
integer *),
dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *,
integer *, doublereal *, integer *, doublereal *, integer *, doublereal *,
integer *, ftnlen, ftnlen, ftnlen, ftnlen),
dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *,
doublereal *, integer *, ftnlen, ftnlen),
xerbla_(char *, integer *, ftnlen);
extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
ftnlen, ftnlen);
integer ldwork, lwkopt;
logical lquery;
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--tau;
--work;
k = min(*m, *n);
*info = 0;
nb = ilaenv_(&c__1, (char *)"DGEQRF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
lquery = *lwork == -1;
if (*m < 0) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*lda < max(1, *m)) {
*info = -4;
} else if (!lquery) {
if (*lwork <= 0 || *m > 0 && *lwork < max(1, *n)) {
*info = -7;
}
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGEQRF", &i__1, (ftnlen)6);
return 0;
} else if (lquery) {
if (k == 0) {
lwkopt = 1;
} else {
lwkopt = *n * nb;
}
work[1] = (doublereal)lwkopt;
return 0;
}
if (k == 0) {
work[1] = 1.;
return 0;
}
nbmin = 2;
nx = 0;
iws = *n;
if (nb > 1 && nb < k) {
i__1 = 0, i__2 = ilaenv_(&c__3, (char *)"DGEQRF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
nx = max(i__1, i__2);
if (nx < k) {
ldwork = *n;
iws = ldwork * nb;
if (*lwork < iws) {
nb = *lwork / ldwork;
i__1 = 2,
i__2 = ilaenv_(&c__2, (char *)"DGEQRF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
nbmin = max(i__1, i__2);
}
}
}
if (nb >= nbmin && nb < k && nx < k) {
i__1 = k - nx;
i__2 = nb;
for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
i__3 = k - i__ + 1;
ib = min(i__3, nb);
i__3 = *m - i__ + 1;
dgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1], &iinfo);
if (i__ + ib <= *n) {
i__3 = *m - i__ + 1;
dlarft_((char *)"Forward", (char *)"Columnwise", &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__],
&work[1], &ldwork, (ftnlen)7, (ftnlen)10);
i__3 = *m - i__ + 1;
i__4 = *n - i__ - ib + 1;
dlarfb_((char *)"Left", (char *)"Transpose", (char *)"Forward", (char *)"Columnwise", &i__3, &i__4, &ib,
&a[i__ + i__ * a_dim1], lda, &work[1], &ldwork,
&a[i__ + (i__ + ib) * a_dim1], lda, &work[ib + 1], &ldwork, (ftnlen)4,
(ftnlen)9, (ftnlen)7, (ftnlen)10);
}
}
} else {
i__ = 1;
}
if (i__ <= k) {
i__2 = *m - i__ + 1;
i__1 = *n - i__ + 1;
dgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1], &iinfo);
}
work[1] = (doublereal)iws;
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,282 +0,0 @@
*> \brief \b DGEQRF
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGEQRF + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqrf.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqrf.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqrf.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, LWORK, M, N
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGEQRF computes a QR factorization of a real M-by-N matrix A:
*>
*> A = Q * ( R ),
*> ( 0 )
*>
*> where:
*>
*> Q is a M-by-M orthogonal matrix;
*> R is an upper-triangular N-by-N matrix;
*> 0 is a (M-N)-by-N zero matrix, if M > N.
*>
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the M-by-N matrix A.
*> On exit, the elements on and above the diagonal of the array
*> contain the min(M,N)-by-N upper trapezoidal matrix R (R is
*> upper triangular if m >= n); the elements below the diagonal,
*> with the array TAU, represent the orthogonal matrix Q as a
*> product of min(m,n) elementary reflectors (see Further
*> Details).
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] TAU
*> \verbatim
*> TAU is DOUBLE PRECISION array, dimension (min(M,N))
*> The scalar factors of the elementary reflectors (see Further
*> Details).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> \verbatim
*> LWORK is INTEGER
*> The dimension of the array WORK.
*> LWORK >= 1, if MIN(M,N) = 0, and LWORK >= N, otherwise.
*> For optimum performance LWORK >= N*NB, where NB is
*> the optimal blocksize.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
*> this value as the first entry of the WORK array, and no error
*> message related to LWORK is issued by XERBLA.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEcomputational
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> The matrix Q is represented as a product of elementary reflectors
*>
*> Q = H(1) H(2) . . . H(k), where k = min(m,n).
*>
*> Each H(i) has the form
*>
*> H(i) = I - tau * v * v**T
*>
*> where tau is a real scalar, and v is a real vector with
*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
*> and tau in TAU(i).
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LWORK, M, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB,
$ NBMIN, NX
* ..
* .. External Subroutines ..
EXTERNAL DGEQR2, DLARFB, DLARFT, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. External Functions ..
INTEGER ILAENV
EXTERNAL ILAENV
* ..
* .. Executable Statements ..
*
* Test the input arguments
*
K = MIN( M, N )
INFO = 0
NB = ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
LQUERY = ( LWORK.EQ.-1 )
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -4
ELSE IF( .NOT.LQUERY ) THEN
IF( LWORK.LE.0 .OR. ( M.GT.0 .AND. LWORK.LT.MAX( 1, N ) ) )
$ INFO = -7
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGEQRF', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
IF( K.EQ.0 ) THEN
LWKOPT = 1
ELSE
LWKOPT = N*NB
END IF
WORK( 1 ) = LWKOPT
RETURN
END IF
*
* Quick return if possible
*
IF( K.EQ.0 ) THEN
WORK( 1 ) = 1
RETURN
END IF
*
NBMIN = 2
NX = 0
IWS = N
IF( NB.GT.1 .AND. NB.LT.K ) THEN
*
* Determine when to cross over from blocked to unblocked code.
*
NX = MAX( 0, ILAENV( 3, 'DGEQRF', ' ', M, N, -1, -1 ) )
IF( NX.LT.K ) THEN
*
* Determine if workspace is large enough for blocked code.
*
LDWORK = N
IWS = LDWORK*NB
IF( LWORK.LT.IWS ) THEN
*
* Not enough workspace to use optimal NB: reduce NB and
* determine the minimum value of NB.
*
NB = LWORK / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'DGEQRF', ' ', M, N, -1,
$ -1 ) )
END IF
END IF
END IF
*
IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
*
* Use blocked code initially
*
DO 10 I = 1, K - NX, NB
IB = MIN( K-I+1, NB )
*
* Compute the QR factorization of the current block
* A(i:m,i:i+ib-1)
*
CALL DGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), WORK,
$ IINFO )
IF( I+IB.LE.N ) THEN
*
* Form the triangular factor of the block reflector
* H = H(i) H(i+1) . . . H(i+ib-1)
*
CALL DLARFT( 'Forward', 'Columnwise', M-I+1, IB,
$ A( I, I ), LDA, TAU( I ), WORK, LDWORK )
*
* Apply H**T to A(i:m,i+ib:n) from the left
*
CALL DLARFB( 'Left', 'Transpose', 'Forward',
$ 'Columnwise', M-I+1, N-I-IB+1, IB,
$ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ),
$ LDA, WORK( IB+1 ), LDWORK )
END IF
10 CONTINUE
ELSE
I = 1
END IF
*
* Use unblocked code to factor the last or only block.
*
IF( I.LE.K )
$ CALL DGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
$ IINFO )
*
WORK( 1 ) = IWS
RETURN
*
* End of DGEQRF
*
END

77
lib/linalg/dger.cpp Normal file
View File

@ -0,0 +1,77 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dger_(integer *m, integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *y,
integer *incy, doublereal *a, integer *lda)
{
integer a_dim1, a_offset, i__1, i__2;
integer i__, j, ix, jy, kx, info;
doublereal temp;
extern int xerbla_(char *, integer *, ftnlen);
--x;
--y;
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
info = 0;
if (*m < 0) {
info = 1;
} else if (*n < 0) {
info = 2;
} else if (*incx == 0) {
info = 5;
} else if (*incy == 0) {
info = 7;
} else if (*lda < max(1, *m)) {
info = 9;
}
if (info != 0) {
xerbla_((char *)"DGER ", &info, (ftnlen)6);
return 0;
}
if (*m == 0 || *n == 0 || *alpha == 0.) {
return 0;
}
if (*incy > 0) {
jy = 1;
} else {
jy = 1 - (*n - 1) * *incy;
}
if (*incx == 1) {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
if (y[jy] != 0.) {
temp = *alpha * y[jy];
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
a[i__ + j * a_dim1] += x[i__] * temp;
}
}
jy += *incy;
}
} else {
if (*incx > 0) {
kx = 1;
} else {
kx = 1 - (*m - 1) * *incx;
}
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
if (y[jy] != 0.) {
temp = *alpha * y[jy];
ix = kx;
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
a[i__ + j * a_dim1] += x[ix] * temp;
ix += *incx;
}
}
jy += *incy;
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,224 +0,0 @@
*> \brief \b DGER
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* SUBROUTINE DGER(M,N,ALPHA,X,INCX,Y,INCY,A,LDA)
*
* .. Scalar Arguments ..
* DOUBLE PRECISION ALPHA
* INTEGER INCX,INCY,LDA,M,N
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A(LDA,*),X(*),Y(*)
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGER performs the rank 1 operation
*>
*> A := alpha*x*y**T + A,
*>
*> where alpha is a scalar, x is an m element vector, y is an n element
*> vector and A is an m by n matrix.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> On entry, M specifies the number of rows of the matrix A.
*> M must be at least zero.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> On entry, N specifies the number of columns of the matrix A.
*> N must be at least zero.
*> \endverbatim
*>
*> \param[in] ALPHA
*> \verbatim
*> ALPHA is DOUBLE PRECISION.
*> On entry, ALPHA specifies the scalar alpha.
*> \endverbatim
*>
*> \param[in] X
*> \verbatim
*> X is DOUBLE PRECISION array, dimension at least
*> ( 1 + ( m - 1 )*abs( INCX ) ).
*> Before entry, the incremented array X must contain the m
*> element vector x.
*> \endverbatim
*>
*> \param[in] INCX
*> \verbatim
*> INCX is INTEGER
*> On entry, INCX specifies the increment for the elements of
*> X. INCX must not be zero.
*> \endverbatim
*>
*> \param[in] Y
*> \verbatim
*> Y is DOUBLE PRECISION array, dimension at least
*> ( 1 + ( n - 1 )*abs( INCY ) ).
*> Before entry, the incremented array Y must contain the n
*> element vector y.
*> \endverbatim
*>
*> \param[in] INCY
*> \verbatim
*> INCY is INTEGER
*> On entry, INCY specifies the increment for the elements of
*> Y. INCY must not be zero.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension ( LDA, N )
*> Before entry, the leading m by n part of the array A must
*> contain the matrix of coefficients. On exit, A is
*> overwritten by the updated matrix.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> On entry, LDA specifies the first dimension of A as declared
*> in the calling (sub) program. LDA must be at least
*> max( 1, m ).
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup double_blas_level2
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> Level 2 Blas routine.
*>
*> -- Written on 22-October-1986.
*> Jack Dongarra, Argonne National Lab.
*> Jeremy Du Croz, Nag Central Office.
*> Sven Hammarling, Nag Central Office.
*> Richard Hanson, Sandia National Labs.
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DGER(M,N,ALPHA,X,INCX,Y,INCY,A,LDA)
*
* -- Reference BLAS level2 routine --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
DOUBLE PRECISION ALPHA
INTEGER INCX,INCY,LDA,M,N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A(LDA,*),X(*),Y(*)
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO
PARAMETER (ZERO=0.0D+0)
* ..
* .. Local Scalars ..
DOUBLE PRECISION TEMP
INTEGER I,INFO,IX,J,JY,KX
* ..
* .. External Subroutines ..
EXTERNAL XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX
* ..
*
* Test the input parameters.
*
INFO = 0
IF (M.LT.0) THEN
INFO = 1
ELSE IF (N.LT.0) THEN
INFO = 2
ELSE IF (INCX.EQ.0) THEN
INFO = 5
ELSE IF (INCY.EQ.0) THEN
INFO = 7
ELSE IF (LDA.LT.MAX(1,M)) THEN
INFO = 9
END IF
IF (INFO.NE.0) THEN
CALL XERBLA('DGER ',INFO)
RETURN
END IF
*
* Quick return if possible.
*
IF ((M.EQ.0) .OR. (N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
*
* Start the operations. In this version the elements of A are
* accessed sequentially with one pass through A.
*
IF (INCY.GT.0) THEN
JY = 1
ELSE
JY = 1 - (N-1)*INCY
END IF
IF (INCX.EQ.1) THEN
DO 20 J = 1,N
IF (Y(JY).NE.ZERO) THEN
TEMP = ALPHA*Y(JY)
DO 10 I = 1,M
A(I,J) = A(I,J) + X(I)*TEMP
10 CONTINUE
END IF
JY = JY + INCY
20 CONTINUE
ELSE
IF (INCX.GT.0) THEN
KX = 1
ELSE
KX = 1 - (M-1)*INCX
END IF
DO 40 J = 1,N
IF (Y(JY).NE.ZERO) THEN
TEMP = ALPHA*Y(JY)
IX = KX
DO 30 I = 1,M
A(I,J) = A(I,J) + X(IX)*TEMP
IX = IX + INCX
30 CONTINUE
END IF
JY = JY + INCY
40 CONTINUE
END IF
*
RETURN
*
* End of DGER
*
END

44
lib/linalg/dgesv.cpp Normal file
View File

@ -0,0 +1,44 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dgesv_(integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, doublereal *b,
integer *ldb, integer *info)
{
integer a_dim1, a_offset, b_dim1, b_offset, i__1;
extern int dgetrf_(integer *, integer *, doublereal *, integer *, integer *, integer *),
xerbla_(char *, integer *, ftnlen),
dgetrs_(char *, integer *, integer *, doublereal *, integer *, integer *, doublereal *,
integer *, integer *, ftnlen);
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--ipiv;
b_dim1 = *ldb;
b_offset = 1 + b_dim1;
b -= b_offset;
*info = 0;
if (*n < 0) {
*info = -1;
} else if (*nrhs < 0) {
*info = -2;
} else if (*lda < max(1, *n)) {
*info = -4;
} else if (*ldb < max(1, *n)) {
*info = -7;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGESV ", &i__1, (ftnlen)6);
return 0;
}
dgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info);
if (*info == 0) {
dgetrs_((char *)"No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], ldb, info,
(ftnlen)12);
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,176 +0,0 @@
*> \brief <b> DGESV computes the solution to system of linear equations A * X = B for GE matrices</b>
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGESV + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesv.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesv.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesv.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, LDB, N, NRHS
* ..
* .. Array Arguments ..
* INTEGER IPIV( * )
* DOUBLE PRECISION A( LDA, * ), B( LDB, * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGESV computes the solution to a real system of linear equations
*> A * X = B,
*> where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
*>
*> The LU decomposition with partial pivoting and row interchanges is
*> used to factor A as
*> A = P * L * U,
*> where P is a permutation matrix, L is unit lower triangular, and U is
*> upper triangular. The factored form of A is then used to solve the
*> system of equations A * X = B.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of linear equations, i.e., the order of the
*> matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in] NRHS
*> \verbatim
*> NRHS is INTEGER
*> The number of right hand sides, i.e., the number of columns
*> of the matrix B. NRHS >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the N-by-N coefficient matrix A.
*> On exit, the factors L and U from the factorization
*> A = P*L*U; the unit diagonal elements of L are not stored.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,N).
*> \endverbatim
*>
*> \param[out] IPIV
*> \verbatim
*> IPIV is INTEGER array, dimension (N)
*> The pivot indices that define the permutation matrix P;
*> row i of the matrix was interchanged with row IPIV(i).
*> \endverbatim
*>
*> \param[in,out] B
*> \verbatim
*> B is DOUBLE PRECISION array, dimension (LDB,NRHS)
*> On entry, the N-by-NRHS matrix of right hand side matrix B.
*> On exit, if INFO = 0, the N-by-NRHS solution matrix X.
*> \endverbatim
*>
*> \param[in] LDB
*> \verbatim
*> LDB is INTEGER
*> The leading dimension of the array B. LDB >= max(1,N).
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> > 0: if INFO = i, U(i,i) is exactly zero. The factorization
*> has been completed, but the factor U is exactly
*> singular, so the solution could not be computed.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEsolve
*
* =====================================================================
SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO )
*
* -- LAPACK driver routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LDB, N, NRHS
* ..
* .. Array Arguments ..
INTEGER IPIV( * )
DOUBLE PRECISION A( LDA, * ), B( LDB, * )
* ..
*
* =====================================================================
*
* .. External Subroutines ..
EXTERNAL DGETRF, DGETRS, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
IF( N.LT.0 ) THEN
INFO = -1
ELSE IF( NRHS.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
INFO = -4
ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
INFO = -7
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGESV ', -INFO )
RETURN
END IF
*
* Compute the LU factorization of A.
*
CALL DGETRF( N, N, A, LDA, IPIV, INFO )
IF( INFO.EQ.0 ) THEN
*
* Solve the system A*X = B, overwriting B with X.
*
CALL DGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, B, LDB,
$ INFO )
END IF
RETURN
*
* End of DGESV
*
END

1906
lib/linalg/dgesvd.cpp Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

76
lib/linalg/dgetf2.cpp Normal file
View File

@ -0,0 +1,76 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
static doublereal c_b8 = -1.;
int dgetf2_(integer *m, integer *n, doublereal *a, integer *lda, integer *ipiv, integer *info)
{
integer a_dim1, a_offset, i__1, i__2, i__3;
doublereal d__1;
integer i__, j, jp;
extern int dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *,
integer *, doublereal *, integer *),
dscal_(integer *, doublereal *, doublereal *, integer *);
doublereal sfmin;
extern int dswap_(integer *, doublereal *, integer *, doublereal *, integer *);
extern doublereal dlamch_(char *, ftnlen);
extern integer idamax_(integer *, doublereal *, integer *);
extern int xerbla_(char *, integer *, ftnlen);
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--ipiv;
*info = 0;
if (*m < 0) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*lda < max(1, *m)) {
*info = -4;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGETF2", &i__1, (ftnlen)6);
return 0;
}
if (*m == 0 || *n == 0) {
return 0;
}
sfmin = dlamch_((char *)"S", (ftnlen)1);
i__1 = min(*m, *n);
for (j = 1; j <= i__1; ++j) {
i__2 = *m - j + 1;
jp = j - 1 + idamax_(&i__2, &a[j + j * a_dim1], &c__1);
ipiv[j] = jp;
if (a[jp + j * a_dim1] != 0.) {
if (jp != j) {
dswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda);
}
if (j < *m) {
if ((d__1 = a[j + j * a_dim1], abs(d__1)) >= sfmin) {
i__2 = *m - j;
d__1 = 1. / a[j + j * a_dim1];
dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1);
} else {
i__2 = *m - j;
for (i__ = 1; i__ <= i__2; ++i__) {
a[j + i__ + j * a_dim1] /= a[j + j * a_dim1];
}
}
}
} else if (*info == 0) {
*info = j;
}
if (j < min(*m, *n)) {
i__2 = *m - j;
i__3 = *n - j;
dger_(&i__2, &i__3, &c_b8, &a[j + 1 + j * a_dim1], &c__1, &a[j + (j + 1) * a_dim1], lda,
&a[j + 1 + (j + 1) * a_dim1], lda);
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,210 +0,0 @@
*> \brief \b DGETF2 computes the LU factorization of a general m-by-n matrix using partial pivoting with row interchanges (unblocked algorithm).
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGETF2 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetf2.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetf2.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetf2.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGETF2( M, N, A, LDA, IPIV, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
* INTEGER IPIV( * )
* DOUBLE PRECISION A( LDA, * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGETF2 computes an LU factorization of a general m-by-n matrix A
*> using partial pivoting with row interchanges.
*>
*> The factorization has the form
*> A = P * L * U
*> where P is a permutation matrix, L is lower triangular with unit
*> diagonal elements (lower trapezoidal if m > n), and U is upper
*> triangular (upper trapezoidal if m < n).
*>
*> This is the right-looking Level 2 BLAS version of the algorithm.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the m by n matrix to be factored.
*> On exit, the factors L and U from the factorization
*> A = P*L*U; the unit diagonal elements of L are not stored.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] IPIV
*> \verbatim
*> IPIV is INTEGER array, dimension (min(M,N))
*> The pivot indices; for 1 <= i <= min(M,N), row i of the
*> matrix was interchanged with row IPIV(i).
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -k, the k-th argument had an illegal value
*> > 0: if INFO = k, U(k,k) is exactly zero. The factorization
*> has been completed, but the factor U is exactly
*> singular, and division by zero will occur if it is used
*> to solve a system of equations.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEcomputational
*
* =====================================================================
SUBROUTINE DGETF2( M, N, A, LDA, IPIV, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
INTEGER IPIV( * )
DOUBLE PRECISION A( LDA, * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE, ZERO
PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 )
* ..
* .. Local Scalars ..
DOUBLE PRECISION SFMIN
INTEGER I, J, JP
* ..
* .. External Functions ..
DOUBLE PRECISION DLAMCH
INTEGER IDAMAX
EXTERNAL DLAMCH, IDAMAX
* ..
* .. External Subroutines ..
EXTERNAL DGER, DSCAL, DSWAP, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -4
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGETF2', -INFO )
RETURN
END IF
*
* Quick return if possible
*
IF( M.EQ.0 .OR. N.EQ.0 )
$ RETURN
*
* Compute machine safe minimum
*
SFMIN = DLAMCH('S')
*
DO 10 J = 1, MIN( M, N )
*
* Find pivot and test for singularity.
*
JP = J - 1 + IDAMAX( M-J+1, A( J, J ), 1 )
IPIV( J ) = JP
IF( A( JP, J ).NE.ZERO ) THEN
*
* Apply the interchange to columns 1:N.
*
IF( JP.NE.J )
$ CALL DSWAP( N, A( J, 1 ), LDA, A( JP, 1 ), LDA )
*
* Compute elements J+1:M of J-th column.
*
IF( J.LT.M ) THEN
IF( ABS(A( J, J )) .GE. SFMIN ) THEN
CALL DSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 )
ELSE
DO 20 I = 1, M-J
A( J+I, J ) = A( J+I, J ) / A( J, J )
20 CONTINUE
END IF
END IF
*
ELSE IF( INFO.EQ.0 ) THEN
*
INFO = J
END IF
*
IF( J.LT.MIN( M, N ) ) THEN
*
* Update trailing submatrix.
*
CALL DGER( M-J, N-J, -ONE, A( J+1, J ), 1, A( J, J+1 ), LDA,
$ A( J+1, J+1 ), LDA )
END IF
10 CONTINUE
RETURN
*
* End of DGETF2
*
END

90
lib/linalg/dgetrf.cpp Normal file
View File

@ -0,0 +1,90 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
static integer c_n1 = -1;
static doublereal c_b16 = 1.;
static doublereal c_b19 = -1.;
int dgetrf_(integer *m, integer *n, doublereal *a, integer *lda, integer *ipiv, integer *info)
{
integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
integer i__, j, jb, nb;
extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
ftnlen, ftnlen);
integer iinfo;
extern int dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *,
doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen,
ftnlen),
xerbla_(char *, integer *, ftnlen);
extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
ftnlen, ftnlen);
extern int dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *,
integer *),
dgetrf2_(integer *, integer *, doublereal *, integer *, integer *, integer *);
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--ipiv;
*info = 0;
if (*m < 0) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*lda < max(1, *m)) {
*info = -4;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGETRF", &i__1, (ftnlen)6);
return 0;
}
if (*m == 0 || *n == 0) {
return 0;
}
nb = ilaenv_(&c__1, (char *)"DGETRF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
if (nb <= 1 || nb >= min(*m, *n)) {
dgetrf2_(m, n, &a[a_offset], lda, &ipiv[1], info);
} else {
i__1 = min(*m, *n);
i__2 = nb;
for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
i__3 = min(*m, *n) - j + 1;
jb = min(i__3, nb);
i__3 = *m - j + 1;
dgetrf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo);
if (*info == 0 && iinfo > 0) {
*info = iinfo + j - 1;
}
i__4 = *m, i__5 = j + jb - 1;
i__3 = min(i__4, i__5);
for (i__ = j; i__ <= i__3; ++i__) {
ipiv[i__] = j - 1 + ipiv[i__];
}
i__3 = j - 1;
i__4 = j + jb - 1;
dlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1);
if (j + jb <= *n) {
i__3 = *n - j - jb + 1;
i__4 = j + jb - 1;
dlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, &ipiv[1], &c__1);
i__3 = *n - j - jb + 1;
dtrsm_((char *)"Left", (char *)"Lower", (char *)"No transpose", (char *)"Unit", &jb, &i__3, &c_b16,
&a[j + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda, (ftnlen)4,
(ftnlen)5, (ftnlen)12, (ftnlen)4);
if (j + jb <= *m) {
i__3 = *m - j - jb + 1;
i__4 = *n - j - jb + 1;
dgemm_((char *)"No transpose", (char *)"No transpose", &i__3, &i__4, &jb, &c_b19,
&a[j + jb + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda, &c_b16,
&a[j + jb + (j + jb) * a_dim1], lda, (ftnlen)12, (ftnlen)12);
}
}
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,222 +0,0 @@
*> \brief \b DGETRF
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGETRF + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetrf.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetrf.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetrf.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
* INTEGER IPIV( * )
* DOUBLE PRECISION A( LDA, * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGETRF computes an LU factorization of a general M-by-N matrix A
*> using partial pivoting with row interchanges.
*>
*> The factorization has the form
*> A = P * L * U
*> where P is a permutation matrix, L is lower triangular with unit
*> diagonal elements (lower trapezoidal if m > n), and U is upper
*> triangular (upper trapezoidal if m < n).
*>
*> This is the right-looking Level 3 BLAS version of the algorithm.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the M-by-N matrix to be factored.
*> On exit, the factors L and U from the factorization
*> A = P*L*U; the unit diagonal elements of L are not stored.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] IPIV
*> \verbatim
*> IPIV is INTEGER array, dimension (min(M,N))
*> The pivot indices; for 1 <= i <= min(M,N), row i of the
*> matrix was interchanged with row IPIV(i).
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> > 0: if INFO = i, U(i,i) is exactly zero. The factorization
*> has been completed, but the factor U is exactly
*> singular, and division by zero will occur if it is used
*> to solve a system of equations.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEcomputational
*
* =====================================================================
SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
INTEGER IPIV( * )
DOUBLE PRECISION A( LDA, * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
INTEGER I, IINFO, J, JB, NB
* ..
* .. External Subroutines ..
EXTERNAL DGEMM, DGETRF2, DLASWP, DTRSM, XERBLA
* ..
* .. External Functions ..
INTEGER ILAENV
EXTERNAL ILAENV
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -4
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGETRF', -INFO )
RETURN
END IF
*
* Quick return if possible
*
IF( M.EQ.0 .OR. N.EQ.0 )
$ RETURN
*
* Determine the block size for this environment.
*
NB = ILAENV( 1, 'DGETRF', ' ', M, N, -1, -1 )
IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
*
* Use unblocked code.
*
CALL DGETRF2( M, N, A, LDA, IPIV, INFO )
ELSE
*
* Use blocked code.
*
DO 20 J = 1, MIN( M, N ), NB
JB = MIN( MIN( M, N )-J+1, NB )
*
* Factor diagonal and subdiagonal blocks and test for exact
* singularity.
*
CALL DGETRF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
*
* Adjust INFO and the pivot indices.
*
IF( INFO.EQ.0 .AND. IINFO.GT.0 )
$ INFO = IINFO + J - 1
DO 10 I = J, MIN( M, J+JB-1 )
IPIV( I ) = J - 1 + IPIV( I )
10 CONTINUE
*
* Apply interchanges to columns 1:J-1.
*
CALL DLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 )
*
IF( J+JB.LE.N ) THEN
*
* Apply interchanges to columns J+JB:N.
*
CALL DLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1,
$ IPIV, 1 )
*
* Compute block row of U.
*
CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB,
$ N-J-JB+1, ONE, A( J, J ), LDA, A( J, J+JB ),
$ LDA )
IF( J+JB.LE.M ) THEN
*
* Update trailing submatrix.
*
CALL DGEMM( 'No transpose', 'No transpose', M-J-JB+1,
$ N-J-JB+1, JB, -ONE, A( J+JB, J ), LDA,
$ A( J, J+JB ), LDA, ONE, A( J+JB, J+JB ),
$ LDA )
END IF
END IF
20 CONTINUE
END IF
RETURN
*
* End of DGETRF
*
END

106
lib/linalg/dgetrf2.cpp Normal file
View File

@ -0,0 +1,106 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
static doublereal c_b13 = 1.;
static doublereal c_b16 = -1.;
int dgetrf2_(integer *m, integer *n, doublereal *a, integer *lda, integer *ipiv, integer *info)
{
integer a_dim1, a_offset, i__1, i__2;
doublereal d__1;
integer i__, n1, n2;
doublereal temp;
extern int dscal_(integer *, doublereal *, doublereal *, integer *),
dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen,
ftnlen);
integer iinfo;
doublereal sfmin;
extern int dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *,
doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen,
ftnlen);
extern doublereal dlamch_(char *, ftnlen);
extern integer idamax_(integer *, doublereal *, integer *);
extern int xerbla_(char *, integer *, ftnlen),
dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *);
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--ipiv;
*info = 0;
if (*m < 0) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*lda < max(1, *m)) {
*info = -4;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGETRF2", &i__1, (ftnlen)7);
return 0;
}
if (*m == 0 || *n == 0) {
return 0;
}
if (*m == 1) {
ipiv[1] = 1;
if (a[a_dim1 + 1] == 0.) {
*info = 1;
}
} else if (*n == 1) {
sfmin = dlamch_((char *)"S", (ftnlen)1);
i__ = idamax_(m, &a[a_dim1 + 1], &c__1);
ipiv[1] = i__;
if (a[i__ + a_dim1] != 0.) {
if (i__ != 1) {
temp = a[a_dim1 + 1];
a[a_dim1 + 1] = a[i__ + a_dim1];
a[i__ + a_dim1] = temp;
}
if ((d__1 = a[a_dim1 + 1], abs(d__1)) >= sfmin) {
i__1 = *m - 1;
d__1 = 1. / a[a_dim1 + 1];
dscal_(&i__1, &d__1, &a[a_dim1 + 2], &c__1);
} else {
i__1 = *m - 1;
for (i__ = 1; i__ <= i__1; ++i__) {
a[i__ + 1 + a_dim1] /= a[a_dim1 + 1];
}
}
} else {
*info = 1;
}
} else {
n1 = min(*m, *n) / 2;
n2 = *n - n1;
dgetrf2_(m, &n1, &a[a_offset], lda, &ipiv[1], &iinfo);
if (*info == 0 && iinfo > 0) {
*info = iinfo;
}
dlaswp_(&n2, &a[(n1 + 1) * a_dim1 + 1], lda, &c__1, &n1, &ipiv[1], &c__1);
dtrsm_((char *)"L", (char *)"L", (char *)"N", (char *)"U", &n1, &n2, &c_b13, &a[a_offset], lda, &a[(n1 + 1) * a_dim1 + 1],
lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1);
i__1 = *m - n1;
dgemm_((char *)"N", (char *)"N", &i__1, &n2, &n1, &c_b16, &a[n1 + 1 + a_dim1], lda,
&a[(n1 + 1) * a_dim1 + 1], lda, &c_b13, &a[n1 + 1 + (n1 + 1) * a_dim1], lda,
(ftnlen)1, (ftnlen)1);
i__1 = *m - n1;
dgetrf2_(&i__1, &n2, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, &ipiv[n1 + 1], &iinfo);
if (*info == 0 && iinfo > 0) {
*info = iinfo + n1;
}
i__1 = min(*m, *n);
for (i__ = n1 + 1; i__ <= i__1; ++i__) {
ipiv[i__] += n1;
}
i__1 = n1 + 1;
i__2 = min(*m, *n);
dlaswp_(&n1, &a[a_dim1 + 1], lda, &i__1, &i__2, &ipiv[1], &c__1);
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,269 +0,0 @@
*> \brief \b DGETRF2
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* RECURSIVE SUBROUTINE DGETRF2( M, N, A, LDA, IPIV, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
* INTEGER IPIV( * )
* DOUBLE PRECISION A( LDA, * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGETRF2 computes an LU factorization of a general M-by-N matrix A
*> using partial pivoting with row interchanges.
*>
*> The factorization has the form
*> A = P * L * U
*> where P is a permutation matrix, L is lower triangular with unit
*> diagonal elements (lower trapezoidal if m > n), and U is upper
*> triangular (upper trapezoidal if m < n).
*>
*> This is the recursive version of the algorithm. It divides
*> the matrix into four submatrices:
*>
*> [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2
*> A = [ -----|----- ] with n1 = min(m,n)/2
*> [ A21 | A22 ] n2 = n-n1
*>
*> [ A11 ]
*> The subroutine calls itself to factor [ --- ],
*> [ A12 ]
*> [ A12 ]
*> do the swaps on [ --- ], solve A12, update A22,
*> [ A22 ]
*>
*> then calls itself to factor A22 and do the swaps on A21.
*>
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the M-by-N matrix to be factored.
*> On exit, the factors L and U from the factorization
*> A = P*L*U; the unit diagonal elements of L are not stored.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] IPIV
*> \verbatim
*> IPIV is INTEGER array, dimension (min(M,N))
*> The pivot indices; for 1 <= i <= min(M,N), row i of the
*> matrix was interchanged with row IPIV(i).
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> > 0: if INFO = i, U(i,i) is exactly zero. The factorization
*> has been completed, but the factor U is exactly
*> singular, and division by zero will occur if it is used
*> to solve a system of equations.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEcomputational
*
* =====================================================================
RECURSIVE SUBROUTINE DGETRF2( M, N, A, LDA, IPIV, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
INTEGER IPIV( * )
DOUBLE PRECISION A( LDA, * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE, ZERO
PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 )
* ..
* .. Local Scalars ..
DOUBLE PRECISION SFMIN, TEMP
INTEGER I, IINFO, N1, N2
* ..
* .. External Functions ..
DOUBLE PRECISION DLAMCH
INTEGER IDAMAX
EXTERNAL DLAMCH, IDAMAX
* ..
* .. External Subroutines ..
EXTERNAL DGEMM, DSCAL, DLASWP, DTRSM, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input parameters
*
INFO = 0
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -4
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGETRF2', -INFO )
RETURN
END IF
*
* Quick return if possible
*
IF( M.EQ.0 .OR. N.EQ.0 )
$ RETURN
IF ( M.EQ.1 ) THEN
*
* Use unblocked code for one row case
* Just need to handle IPIV and INFO
*
IPIV( 1 ) = 1
IF ( A(1,1).EQ.ZERO )
$ INFO = 1
*
ELSE IF( N.EQ.1 ) THEN
*
* Use unblocked code for one column case
*
*
* Compute machine safe minimum
*
SFMIN = DLAMCH('S')
*
* Find pivot and test for singularity
*
I = IDAMAX( M, A( 1, 1 ), 1 )
IPIV( 1 ) = I
IF( A( I, 1 ).NE.ZERO ) THEN
*
* Apply the interchange
*
IF( I.NE.1 ) THEN
TEMP = A( 1, 1 )
A( 1, 1 ) = A( I, 1 )
A( I, 1 ) = TEMP
END IF
*
* Compute elements 2:M of the column
*
IF( ABS(A( 1, 1 )) .GE. SFMIN ) THEN
CALL DSCAL( M-1, ONE / A( 1, 1 ), A( 2, 1 ), 1 )
ELSE
DO 10 I = 1, M-1
A( 1+I, 1 ) = A( 1+I, 1 ) / A( 1, 1 )
10 CONTINUE
END IF
*
ELSE
INFO = 1
END IF
*
ELSE
*
* Use recursive code
*
N1 = MIN( M, N ) / 2
N2 = N-N1
*
* [ A11 ]
* Factor [ --- ]
* [ A21 ]
*
CALL DGETRF2( M, N1, A, LDA, IPIV, IINFO )
IF ( INFO.EQ.0 .AND. IINFO.GT.0 )
$ INFO = IINFO
*
* [ A12 ]
* Apply interchanges to [ --- ]
* [ A22 ]
*
CALL DLASWP( N2, A( 1, N1+1 ), LDA, 1, N1, IPIV, 1 )
*
* Solve A12
*
CALL DTRSM( 'L', 'L', 'N', 'U', N1, N2, ONE, A, LDA,
$ A( 1, N1+1 ), LDA )
*
* Update A22
*
CALL DGEMM( 'N', 'N', M-N1, N2, N1, -ONE, A( N1+1, 1 ), LDA,
$ A( 1, N1+1 ), LDA, ONE, A( N1+1, N1+1 ), LDA )
*
* Factor A22
*
CALL DGETRF2( M-N1, N2, A( N1+1, N1+1 ), LDA, IPIV( N1+1 ),
$ IINFO )
*
* Adjust INFO and the pivot indices
*
IF ( INFO.EQ.0 .AND. IINFO.GT.0 )
$ INFO = IINFO + N1
DO 20 I = N1+1, MIN( M, N )
IPIV( I ) = IPIV( I ) + N1
20 CONTINUE
*
* Apply interchanges to A21
*
CALL DLASWP( N1, A( 1, 1 ), LDA, N1+1, MIN( M, N), IPIV, 1 )
*
END IF
RETURN
*
* End of DGETRF2
*
END

125
lib/linalg/dgetri.cpp Normal file
View File

@ -0,0 +1,125 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
static integer c_n1 = -1;
static integer c__2 = 2;
static doublereal c_b20 = -1.;
static doublereal c_b22 = 1.;
int dgetri_(integer *n, doublereal *a, integer *lda, integer *ipiv, doublereal *work,
integer *lwork, integer *info)
{
integer a_dim1, a_offset, i__1, i__2, i__3;
integer i__, j, jb, nb, jj, jp, nn, iws;
extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
ftnlen, ftnlen),
dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *,
integer *, doublereal *, doublereal *, integer *, ftnlen);
integer nbmin;
extern int dswap_(integer *, doublereal *, integer *, doublereal *, integer *),
dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *,
integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen),
xerbla_(char *, integer *, ftnlen);
extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
ftnlen, ftnlen);
integer ldwork;
extern int dtrtri_(char *, char *, integer *, doublereal *, integer *, integer *, ftnlen,
ftnlen);
integer lwkopt;
logical lquery;
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--ipiv;
--work;
*info = 0;
nb = ilaenv_(&c__1, (char *)"DGETRI", (char *)" ", n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
lwkopt = *n * nb;
work[1] = (doublereal)lwkopt;
lquery = *lwork == -1;
if (*n < 0) {
*info = -1;
} else if (*lda < max(1, *n)) {
*info = -3;
} else if (*lwork < max(1, *n) && !lquery) {
*info = -6;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGETRI", &i__1, (ftnlen)6);
return 0;
} else if (lquery) {
return 0;
}
if (*n == 0) {
return 0;
}
dtrtri_((char *)"Upper", (char *)"Non-unit", n, &a[a_offset], lda, info, (ftnlen)5, (ftnlen)8);
if (*info > 0) {
return 0;
}
nbmin = 2;
ldwork = *n;
if (nb > 1 && nb < *n) {
i__1 = ldwork * nb;
iws = max(i__1, 1);
if (*lwork < iws) {
nb = *lwork / ldwork;
i__1 = 2,
i__2 = ilaenv_(&c__2, (char *)"DGETRI", (char *)" ", n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
nbmin = max(i__1, i__2);
}
} else {
iws = *n;
}
if (nb < nbmin || nb >= *n) {
for (j = *n; j >= 1; --j) {
i__1 = *n;
for (i__ = j + 1; i__ <= i__1; ++i__) {
work[i__] = a[i__ + j * a_dim1];
a[i__ + j * a_dim1] = 0.;
}
if (j < *n) {
i__1 = *n - j;
dgemv_((char *)"No transpose", n, &i__1, &c_b20, &a[(j + 1) * a_dim1 + 1], lda,
&work[j + 1], &c__1, &c_b22, &a[j * a_dim1 + 1], &c__1, (ftnlen)12);
}
}
} else {
nn = (*n - 1) / nb * nb + 1;
i__1 = -nb;
for (j = nn; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) {
i__2 = nb, i__3 = *n - j + 1;
jb = min(i__2, i__3);
i__2 = j + jb - 1;
for (jj = j; jj <= i__2; ++jj) {
i__3 = *n;
for (i__ = jj + 1; i__ <= i__3; ++i__) {
work[i__ + (jj - j) * ldwork] = a[i__ + jj * a_dim1];
a[i__ + jj * a_dim1] = 0.;
}
}
if (j + jb <= *n) {
i__2 = *n - j - jb + 1;
dgemm_((char *)"No transpose", (char *)"No transpose", n, &jb, &i__2, &c_b20,
&a[(j + jb) * a_dim1 + 1], lda, &work[j + jb], &ldwork, &c_b22,
&a[j * a_dim1 + 1], lda, (ftnlen)12, (ftnlen)12);
}
dtrsm_((char *)"Right", (char *)"Lower", (char *)"No transpose", (char *)"Unit", n, &jb, &c_b22, &work[j], &ldwork,
&a[j * a_dim1 + 1], lda, (ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)4);
}
}
for (j = *n - 1; j >= 1; --j) {
jp = ipiv[j];
if (jp != j) {
dswap_(n, &a[j * a_dim1 + 1], &c__1, &a[jp * a_dim1 + 1], &c__1);
}
}
work[1] = (doublereal)iws;
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,258 +0,0 @@
*> \brief \b DGETRI
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGETRI + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetri.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetri.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetri.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, LWORK, N
* ..
* .. Array Arguments ..
* INTEGER IPIV( * )
* DOUBLE PRECISION A( LDA, * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGETRI computes the inverse of a matrix using the LU factorization
*> computed by DGETRF.
*>
*> This method inverts U and then computes inv(A) by solving the system
*> inv(A)*L = inv(U) for inv(A).
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The order of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the factors L and U from the factorization
*> A = P*L*U as computed by DGETRF.
*> On exit, if INFO = 0, the inverse of the original matrix A.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,N).
*> \endverbatim
*>
*> \param[in] IPIV
*> \verbatim
*> IPIV is INTEGER array, dimension (N)
*> The pivot indices from DGETRF; for 1<=i<=N, row i of the
*> matrix was interchanged with row IPIV(i).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
*> On exit, if INFO=0, then WORK(1) returns the optimal LWORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> \verbatim
*> LWORK is INTEGER
*> The dimension of the array WORK. LWORK >= max(1,N).
*> For optimal performance LWORK >= N*NB, where NB is
*> the optimal blocksize returned by ILAENV.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
*> this value as the first entry of the WORK array, and no error
*> message related to LWORK is issued by XERBLA.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> > 0: if INFO = i, U(i,i) is exactly zero; the matrix is
*> singular and its inverse could not be computed.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEcomputational
*
* =====================================================================
SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LWORK, N
* ..
* .. Array Arguments ..
INTEGER IPIV( * )
DOUBLE PRECISION A( LDA, * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB,
$ NBMIN, NN
* ..
* .. External Functions ..
INTEGER ILAENV
EXTERNAL ILAENV
* ..
* .. External Subroutines ..
EXTERNAL DGEMM, DGEMV, DSWAP, DTRSM, DTRTRI, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
NB = ILAENV( 1, 'DGETRI', ' ', N, -1, -1, -1 )
LWKOPT = N*NB
WORK( 1 ) = LWKOPT
LQUERY = ( LWORK.EQ.-1 )
IF( N.LT.0 ) THEN
INFO = -1
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
INFO = -3
ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
INFO = -6
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGETRI', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
RETURN
END IF
*
* Quick return if possible
*
IF( N.EQ.0 )
$ RETURN
*
* Form inv(U). If INFO > 0 from DTRTRI, then U is singular,
* and the inverse is not computed.
*
CALL DTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO )
IF( INFO.GT.0 )
$ RETURN
*
NBMIN = 2
LDWORK = N
IF( NB.GT.1 .AND. NB.LT.N ) THEN
IWS = MAX( LDWORK*NB, 1 )
IF( LWORK.LT.IWS ) THEN
NB = LWORK / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'DGETRI', ' ', N, -1, -1, -1 ) )
END IF
ELSE
IWS = N
END IF
*
* Solve the equation inv(A)*L = inv(U) for inv(A).
*
IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN
*
* Use unblocked code.
*
DO 20 J = N, 1, -1
*
* Copy current column of L to WORK and replace with zeros.
*
DO 10 I = J + 1, N
WORK( I ) = A( I, J )
A( I, J ) = ZERO
10 CONTINUE
*
* Compute current column of inv(A).
*
IF( J.LT.N )
$ CALL DGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ),
$ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 )
20 CONTINUE
ELSE
*
* Use blocked code.
*
NN = ( ( N-1 ) / NB )*NB + 1
DO 50 J = NN, 1, -NB
JB = MIN( NB, N-J+1 )
*
* Copy current block column of L to WORK and replace with
* zeros.
*
DO 40 JJ = J, J + JB - 1
DO 30 I = JJ + 1, N
WORK( I+( JJ-J )*LDWORK ) = A( I, JJ )
A( I, JJ ) = ZERO
30 CONTINUE
40 CONTINUE
*
* Compute current block column of inv(A).
*
IF( J+JB.LE.N )
$ CALL DGEMM( 'No transpose', 'No transpose', N, JB,
$ N-J-JB+1, -ONE, A( 1, J+JB ), LDA,
$ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA )
CALL DTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB,
$ ONE, WORK( J ), LDWORK, A( 1, J ), LDA )
50 CONTINUE
END IF
*
* Apply column interchanges.
*
DO 60 J = N - 1, 1, -1
JP = IPIV( J )
IF( JP.NE.J )
$ CALL DSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 )
60 CONTINUE
*
WORK( 1 ) = IWS
RETURN
*
* End of DGETRI
*
END

65
lib/linalg/dgetrs.cpp Normal file
View File

@ -0,0 +1,65 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
static doublereal c_b12 = 1.;
static integer c_n1 = -1;
int dgetrs_(char *trans, integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv,
doublereal *b, integer *ldb, integer *info, ftnlen trans_len)
{
integer a_dim1, a_offset, b_dim1, b_offset, i__1;
extern logical lsame_(char *, char *, ftnlen, ftnlen);
extern int dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *,
doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen,
ftnlen),
xerbla_(char *, integer *, ftnlen),
dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *);
logical notran;
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--ipiv;
b_dim1 = *ldb;
b_offset = 1 + b_dim1;
b -= b_offset;
*info = 0;
notran = lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1);
if (!notran && !lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1) &&
!lsame_(trans, (char *)"C", (ftnlen)1, (ftnlen)1)) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*nrhs < 0) {
*info = -3;
} else if (*lda < max(1, *n)) {
*info = -5;
} else if (*ldb < max(1, *n)) {
*info = -8;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGETRS", &i__1, (ftnlen)6);
return 0;
}
if (*n == 0 || *nrhs == 0) {
return 0;
}
if (notran) {
dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1);
dtrsm_((char *)"Left", (char *)"Lower", (char *)"No transpose", (char *)"Unit", n, nrhs, &c_b12, &a[a_offset], lda,
&b[b_offset], ldb, (ftnlen)4, (ftnlen)5, (ftnlen)12, (ftnlen)4);
dtrsm_((char *)"Left", (char *)"Upper", (char *)"No transpose", (char *)"Non-unit", n, nrhs, &c_b12, &a[a_offset], lda,
&b[b_offset], ldb, (ftnlen)4, (ftnlen)5, (ftnlen)12, (ftnlen)8);
} else {
dtrsm_((char *)"Left", (char *)"Upper", (char *)"Transpose", (char *)"Non-unit", n, nrhs, &c_b12, &a[a_offset], lda,
&b[b_offset], ldb, (ftnlen)4, (ftnlen)5, (ftnlen)9, (ftnlen)8);
dtrsm_((char *)"Left", (char *)"Lower", (char *)"Transpose", (char *)"Unit", n, nrhs, &c_b12, &a[a_offset], lda,
&b[b_offset], ldb, (ftnlen)4, (ftnlen)5, (ftnlen)9, (ftnlen)4);
dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1);
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,222 +0,0 @@
*> \brief \b DGETRS
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGETRS + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetrs.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetrs.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetrs.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
*
* .. Scalar Arguments ..
* CHARACTER TRANS
* INTEGER INFO, LDA, LDB, N, NRHS
* ..
* .. Array Arguments ..
* INTEGER IPIV( * )
* DOUBLE PRECISION A( LDA, * ), B( LDB, * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGETRS solves a system of linear equations
*> A * X = B or A**T * X = B
*> with a general N-by-N matrix A using the LU factorization computed
*> by DGETRF.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] TRANS
*> \verbatim
*> TRANS is CHARACTER*1
*> Specifies the form of the system of equations:
*> = 'N': A * X = B (No transpose)
*> = 'T': A**T* X = B (Transpose)
*> = 'C': A**T* X = B (Conjugate transpose = Transpose)
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The order of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in] NRHS
*> \verbatim
*> NRHS is INTEGER
*> The number of right hand sides, i.e., the number of columns
*> of the matrix B. NRHS >= 0.
*> \endverbatim
*>
*> \param[in] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> The factors L and U from the factorization A = P*L*U
*> as computed by DGETRF.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,N).
*> \endverbatim
*>
*> \param[in] IPIV
*> \verbatim
*> IPIV is INTEGER array, dimension (N)
*> The pivot indices from DGETRF; for 1<=i<=N, row i of the
*> matrix was interchanged with row IPIV(i).
*> \endverbatim
*>
*> \param[in,out] B
*> \verbatim
*> B is DOUBLE PRECISION array, dimension (LDB,NRHS)
*> On entry, the right hand side matrix B.
*> On exit, the solution matrix X.
*> \endverbatim
*>
*> \param[in] LDB
*> \verbatim
*> LDB is INTEGER
*> The leading dimension of the array B. LDB >= max(1,N).
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEcomputational
*
* =====================================================================
SUBROUTINE DGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
CHARACTER TRANS
INTEGER INFO, LDA, LDB, N, NRHS
* ..
* .. Array Arguments ..
INTEGER IPIV( * )
DOUBLE PRECISION A( LDA, * ), B( LDB, * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
LOGICAL NOTRAN
* ..
* .. External Functions ..
LOGICAL LSAME
EXTERNAL LSAME
* ..
* .. External Subroutines ..
EXTERNAL DLASWP, DTRSM, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
NOTRAN = LSAME( TRANS, 'N' )
IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
$ LSAME( TRANS, 'C' ) ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( NRHS.LT.0 ) THEN
INFO = -3
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
INFO = -5
ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
INFO = -8
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGETRS', -INFO )
RETURN
END IF
*
* Quick return if possible
*
IF( N.EQ.0 .OR. NRHS.EQ.0 )
$ RETURN
*
IF( NOTRAN ) THEN
*
* Solve A * X = B.
*
* Apply row interchanges to the right hand sides.
*
CALL DLASWP( NRHS, B, LDB, 1, N, IPIV, 1 )
*
* Solve L*X = B, overwriting B with X.
*
CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', N, NRHS,
$ ONE, A, LDA, B, LDB )
*
* Solve U*X = B, overwriting B with X.
*
CALL DTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N,
$ NRHS, ONE, A, LDA, B, LDB )
ELSE
*
* Solve A**T * X = B.
*
* Solve U**T *X = B, overwriting B with X.
*
CALL DTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, NRHS,
$ ONE, A, LDA, B, LDB )
*
* Solve L**T *X = B, overwriting B with X.
*
CALL DTRSM( 'Left', 'Lower', 'Transpose', 'Unit', N, NRHS, ONE,
$ A, LDA, B, LDB )
*
* Apply row interchanges to the solution vectors.
*
CALL DLASWP( NRHS, B, LDB, 1, N, IPIV, -1 )
END IF
*
RETURN
*
* End of DGETRS
*
END

14
lib/linalg/disnan.cpp Normal file
View File

@ -0,0 +1,14 @@
#include <cmath>
extern "C" {
#include "lmp_f2c.h"
logical disnan_(const doublereal *din)
{
if (!din) return TRUE_;
return std::isnan(*din) ? TRUE_ : FALSE_;
}
}

View File

@ -1,77 +0,0 @@
*> \brief \b DISNAN tests input for NaN.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DISNAN + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/disnan.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/disnan.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/disnan.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* LOGICAL FUNCTION DISNAN( DIN )
*
* .. Scalar Arguments ..
* DOUBLE PRECISION, INTENT(IN) :: DIN
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DISNAN returns .TRUE. if its argument is NaN, and .FALSE.
*> otherwise. To be replaced by the Fortran 2003 intrinsic in the
*> future.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] DIN
*> \verbatim
*> DIN is DOUBLE PRECISION
*> Input to test for NaN.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup OTHERauxiliary
*
* =====================================================================
LOGICAL FUNCTION DISNAN( DIN )
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
DOUBLE PRECISION, INTENT(IN) :: DIN
* ..
*
* =====================================================================
*
* .. External Functions ..
LOGICAL DLAISNAN
EXTERNAL DLAISNAN
* ..
* .. Executable Statements ..
DISNAN = DLAISNAN(DIN,DIN)
RETURN
END

16
lib/linalg/dlabad.cpp Normal file
View File

@ -0,0 +1,16 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dlabad_(doublereal *small, doublereal *large)
{
double d_lmp_lg10(doublereal *), sqrt(doublereal);
if (d_lmp_lg10(large) > 2e3) {
*small = sqrt(*small);
*large = sqrt(*large);
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,102 +0,0 @@
*> \brief \b DLABAD
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLABAD + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlabad.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlabad.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlabad.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLABAD( SMALL, LARGE )
*
* .. Scalar Arguments ..
* DOUBLE PRECISION LARGE, SMALL
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DLABAD takes as input the values computed by DLAMCH for underflow and
*> overflow, and returns the square root of each of these values if the
*> log of LARGE is sufficiently large. This subroutine is intended to
*> identify machines with a large exponent range, such as the Crays, and
*> redefine the underflow and overflow limits to be the square roots of
*> the values computed by DLAMCH. This subroutine is needed because
*> DLAMCH does not compensate for poor arithmetic in the upper half of
*> the exponent range, as is found on a Cray.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in,out] SMALL
*> \verbatim
*> SMALL is DOUBLE PRECISION
*> On entry, the underflow threshold as computed by DLAMCH.
*> On exit, if LOG10(LARGE) is sufficiently large, the square
*> root of SMALL, otherwise unchanged.
*> \endverbatim
*>
*> \param[in,out] LARGE
*> \verbatim
*> LARGE is DOUBLE PRECISION
*> On entry, the overflow threshold as computed by DLAMCH.
*> On exit, if LOG10(LARGE) is sufficiently large, the square
*> root of LARGE, otherwise unchanged.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup OTHERauxiliary
*
* =====================================================================
SUBROUTINE DLABAD( SMALL, LARGE )
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
DOUBLE PRECISION LARGE, SMALL
* ..
*
* =====================================================================
*
* .. Intrinsic Functions ..
INTRINSIC LOG10, SQRT
* ..
* .. Executable Statements ..
*
* If it looks like we're on a Cray, take the square root of
* SMALL and LARGE to avoid overflow and underflow problems.
*
IF( LOG10( LARGE ).GT.2000.D0 ) THEN
SMALL = SQRT( SMALL )
LARGE = SQRT( LARGE )
END IF
*
RETURN
*
* End of DLABAD
*
END

210
lib/linalg/dlabrd.cpp Normal file
View File

@ -0,0 +1,210 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static doublereal c_b4 = -1.;
static doublereal c_b5 = 1.;
static integer c__1 = 1;
static doublereal c_b16 = 0.;
int dlabrd_(integer *m, integer *n, integer *nb, doublereal *a, integer *lda, doublereal *d__,
doublereal *e, doublereal *tauq, doublereal *taup, doublereal *x, integer *ldx,
doublereal *y, integer *ldy)
{
integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2, i__3;
integer i__;
extern int dscal_(integer *, doublereal *, doublereal *, integer *),
dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *,
integer *, doublereal *, doublereal *, integer *, ftnlen),
dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *);
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--d__;
--e;
--tauq;
--taup;
x_dim1 = *ldx;
x_offset = 1 + x_dim1;
x -= x_offset;
y_dim1 = *ldy;
y_offset = 1 + y_dim1;
y -= y_offset;
if (*m <= 0 || *n <= 0) {
return 0;
}
if (*m >= *n) {
i__1 = *nb;
for (i__ = 1; i__ <= i__1; ++i__) {
i__2 = *m - i__ + 1;
i__3 = i__ - 1;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &a[i__ + a_dim1], lda, &y[i__ + y_dim1],
ldy, &c_b5, &a[i__ + i__ * a_dim1], &c__1, (ftnlen)12);
i__2 = *m - i__ + 1;
i__3 = i__ - 1;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &x[i__ + x_dim1], ldx, &a[i__ * a_dim1 + 1],
&c__1, &c_b5, &a[i__ + i__ * a_dim1], &c__1, (ftnlen)12);
i__2 = *m - i__ + 1;
i__3 = i__ + 1;
dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3, *m) + i__ * a_dim1], &c__1,
&tauq[i__]);
d__[i__] = a[i__ + i__ * a_dim1];
if (i__ < *n) {
a[i__ + i__ * a_dim1] = 1.;
i__2 = *m - i__ + 1;
i__3 = *n - i__;
dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[i__ + (i__ + 1) * a_dim1], lda,
&a[i__ + i__ * a_dim1], &c__1, &c_b16, &y[i__ + 1 + i__ * y_dim1], &c__1,
(ftnlen)9);
i__2 = *m - i__ + 1;
i__3 = i__ - 1;
dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[i__ + a_dim1], lda,
&a[i__ + i__ * a_dim1], &c__1, &c_b16, &y[i__ * y_dim1 + 1], &c__1,
(ftnlen)9);
i__2 = *n - i__;
i__3 = i__ - 1;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &y[i__ + 1 + y_dim1], ldy,
&y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[i__ + 1 + i__ * y_dim1], &c__1,
(ftnlen)12);
i__2 = *m - i__ + 1;
i__3 = i__ - 1;
dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &x[i__ + x_dim1], ldx,
&a[i__ + i__ * a_dim1], &c__1, &c_b16, &y[i__ * y_dim1 + 1], &c__1,
(ftnlen)9);
i__2 = i__ - 1;
i__3 = *n - i__;
dgemv_((char *)"Transpose", &i__2, &i__3, &c_b4, &a[(i__ + 1) * a_dim1 + 1], lda,
&y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[i__ + 1 + i__ * y_dim1], &c__1,
(ftnlen)9);
i__2 = *n - i__;
dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
i__2 = *n - i__;
dgemv_((char *)"No transpose", &i__2, &i__, &c_b4, &y[i__ + 1 + y_dim1], ldy,
&a[i__ + a_dim1], lda, &c_b5, &a[i__ + (i__ + 1) * a_dim1], lda, (ftnlen)12);
i__2 = i__ - 1;
i__3 = *n - i__;
dgemv_((char *)"Transpose", &i__2, &i__3, &c_b4, &a[(i__ + 1) * a_dim1 + 1], lda,
&x[i__ + x_dim1], ldx, &c_b5, &a[i__ + (i__ + 1) * a_dim1], lda, (ftnlen)9);
i__2 = *n - i__;
i__3 = i__ + 2;
dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min(i__3, *n) * a_dim1], lda,
&taup[i__]);
e[i__] = a[i__ + (i__ + 1) * a_dim1];
a[i__ + (i__ + 1) * a_dim1] = 1.;
i__2 = *m - i__;
i__3 = *n - i__;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + (i__ + 1) * a_dim1], lda,
&a[i__ + (i__ + 1) * a_dim1], lda, &c_b16, &x[i__ + 1 + i__ * x_dim1], &c__1,
(ftnlen)12);
i__2 = *n - i__;
dgemv_((char *)"Transpose", &i__2, &i__, &c_b5, &y[i__ + 1 + y_dim1], ldy,
&a[i__ + (i__ + 1) * a_dim1], lda, &c_b16, &x[i__ * x_dim1 + 1], &c__1,
(ftnlen)9);
i__2 = *m - i__;
dgemv_((char *)"No transpose", &i__2, &i__, &c_b4, &a[i__ + 1 + a_dim1], lda,
&x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[i__ + 1 + i__ * x_dim1], &c__1,
(ftnlen)12);
i__2 = i__ - 1;
i__3 = *n - i__;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &a[(i__ + 1) * a_dim1 + 1], lda,
&a[i__ + (i__ + 1) * a_dim1], lda, &c_b16, &x[i__ * x_dim1 + 1], &c__1,
(ftnlen)12);
i__2 = *m - i__;
i__3 = i__ - 1;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &x[i__ + 1 + x_dim1], ldx,
&x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[i__ + 1 + i__ * x_dim1], &c__1,
(ftnlen)12);
i__2 = *m - i__;
dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
}
}
} else {
i__1 = *nb;
for (i__ = 1; i__ <= i__1; ++i__) {
i__2 = *n - i__ + 1;
i__3 = i__ - 1;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &y[i__ + y_dim1], ldy, &a[i__ + a_dim1],
lda, &c_b5, &a[i__ + i__ * a_dim1], lda, (ftnlen)12);
i__2 = i__ - 1;
i__3 = *n - i__ + 1;
dgemv_((char *)"Transpose", &i__2, &i__3, &c_b4, &a[i__ * a_dim1 + 1], lda, &x[i__ + x_dim1],
ldx, &c_b5, &a[i__ + i__ * a_dim1], lda, (ftnlen)9);
i__2 = *n - i__ + 1;
i__3 = i__ + 1;
dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3, *n) * a_dim1], lda,
&taup[i__]);
d__[i__] = a[i__ + i__ * a_dim1];
if (i__ < *m) {
a[i__ + i__ * a_dim1] = 1.;
i__2 = *m - i__;
i__3 = *n - i__ + 1;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + i__ * a_dim1], lda,
&a[i__ + i__ * a_dim1], lda, &c_b16, &x[i__ + 1 + i__ * x_dim1], &c__1,
(ftnlen)12);
i__2 = *n - i__ + 1;
i__3 = i__ - 1;
dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &y[i__ + y_dim1], ldy,
&a[i__ + i__ * a_dim1], lda, &c_b16, &x[i__ * x_dim1 + 1], &c__1, (ftnlen)9);
i__2 = *m - i__;
i__3 = i__ - 1;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &a[i__ + 1 + a_dim1], lda,
&x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[i__ + 1 + i__ * x_dim1], &c__1,
(ftnlen)12);
i__2 = i__ - 1;
i__3 = *n - i__ + 1;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &a[i__ * a_dim1 + 1], lda,
&a[i__ + i__ * a_dim1], lda, &c_b16, &x[i__ * x_dim1 + 1], &c__1,
(ftnlen)12);
i__2 = *m - i__;
i__3 = i__ - 1;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &x[i__ + 1 + x_dim1], ldx,
&x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[i__ + 1 + i__ * x_dim1], &c__1,
(ftnlen)12);
i__2 = *m - i__;
dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
i__2 = *m - i__;
i__3 = i__ - 1;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &a[i__ + 1 + a_dim1], lda,
&y[i__ + y_dim1], ldy, &c_b5, &a[i__ + 1 + i__ * a_dim1], &c__1, (ftnlen)12);
i__2 = *m - i__;
dgemv_((char *)"No transpose", &i__2, &i__, &c_b4, &x[i__ + 1 + x_dim1], ldx,
&a[i__ * a_dim1 + 1], &c__1, &c_b5, &a[i__ + 1 + i__ * a_dim1], &c__1,
(ftnlen)12);
i__2 = *m - i__;
i__3 = i__ + 2;
dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3, *m) + i__ * a_dim1], &c__1,
&tauq[i__]);
e[i__] = a[i__ + 1 + i__ * a_dim1];
a[i__ + 1 + i__ * a_dim1] = 1.;
i__2 = *m - i__;
i__3 = *n - i__;
dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + (i__ + 1) * a_dim1], lda,
&a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &y[i__ + 1 + i__ * y_dim1], &c__1,
(ftnlen)9);
i__2 = *m - i__;
i__3 = i__ - 1;
dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + a_dim1], lda,
&a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &y[i__ * y_dim1 + 1], &c__1,
(ftnlen)9);
i__2 = *n - i__;
i__3 = i__ - 1;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &y[i__ + 1 + y_dim1], ldy,
&y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[i__ + 1 + i__ * y_dim1], &c__1,
(ftnlen)12);
i__2 = *m - i__;
dgemv_((char *)"Transpose", &i__2, &i__, &c_b5, &x[i__ + 1 + x_dim1], ldx,
&a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &y[i__ * y_dim1 + 1], &c__1,
(ftnlen)9);
i__2 = *n - i__;
dgemv_((char *)"Transpose", &i__, &i__2, &c_b4, &a[(i__ + 1) * a_dim1 + 1], lda,
&y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[i__ + 1 + i__ * y_dim1], &c__1,
(ftnlen)9);
i__2 = *n - i__;
dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
}
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,378 +0,0 @@
*> \brief \b DLABRD reduces the first nb rows and columns of a general matrix to a bidiagonal form.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLABRD + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlabrd.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlabrd.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlabrd.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y,
* LDY )
*
* .. Scalar Arguments ..
* INTEGER LDA, LDX, LDY, M, N, NB
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ),
* $ TAUQ( * ), X( LDX, * ), Y( LDY, * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DLABRD reduces the first NB rows and columns of a real general
*> m by n matrix A to upper or lower bidiagonal form by an orthogonal
*> transformation Q**T * A * P, and returns the matrices X and Y which
*> are needed to apply the transformation to the unreduced part of A.
*>
*> If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower
*> bidiagonal form.
*>
*> This is an auxiliary routine called by DGEBRD
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows in the matrix A.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns in the matrix A.
*> \endverbatim
*>
*> \param[in] NB
*> \verbatim
*> NB is INTEGER
*> The number of leading rows and columns of A to be reduced.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the m by n general matrix to be reduced.
*> On exit, the first NB rows and columns of the matrix are
*> overwritten; the rest of the array is unchanged.
*> If m >= n, elements on and below the diagonal in the first NB
*> columns, with the array TAUQ, represent the orthogonal
*> matrix Q as a product of elementary reflectors; and
*> elements above the diagonal in the first NB rows, with the
*> array TAUP, represent the orthogonal matrix P as a product
*> of elementary reflectors.
*> If m < n, elements below the diagonal in the first NB
*> columns, with the array TAUQ, represent the orthogonal
*> matrix Q as a product of elementary reflectors, and
*> elements on and above the diagonal in the first NB rows,
*> with the array TAUP, represent the orthogonal matrix P as
*> a product of elementary reflectors.
*> See Further Details.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] D
*> \verbatim
*> D is DOUBLE PRECISION array, dimension (NB)
*> The diagonal elements of the first NB rows and columns of
*> the reduced matrix. D(i) = A(i,i).
*> \endverbatim
*>
*> \param[out] E
*> \verbatim
*> E is DOUBLE PRECISION array, dimension (NB)
*> The off-diagonal elements of the first NB rows and columns of
*> the reduced matrix.
*> \endverbatim
*>
*> \param[out] TAUQ
*> \verbatim
*> TAUQ is DOUBLE PRECISION array, dimension (NB)
*> The scalar factors of the elementary reflectors which
*> represent the orthogonal matrix Q. See Further Details.
*> \endverbatim
*>
*> \param[out] TAUP
*> \verbatim
*> TAUP is DOUBLE PRECISION array, dimension (NB)
*> The scalar factors of the elementary reflectors which
*> represent the orthogonal matrix P. See Further Details.
*> \endverbatim
*>
*> \param[out] X
*> \verbatim
*> X is DOUBLE PRECISION array, dimension (LDX,NB)
*> The m-by-nb matrix X required to update the unreduced part
*> of A.
*> \endverbatim
*>
*> \param[in] LDX
*> \verbatim
*> LDX is INTEGER
*> The leading dimension of the array X. LDX >= max(1,M).
*> \endverbatim
*>
*> \param[out] Y
*> \verbatim
*> Y is DOUBLE PRECISION array, dimension (LDY,NB)
*> The n-by-nb matrix Y required to update the unreduced part
*> of A.
*> \endverbatim
*>
*> \param[in] LDY
*> \verbatim
*> LDY is INTEGER
*> The leading dimension of the array Y. LDY >= max(1,N).
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleOTHERauxiliary
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> The matrices Q and P are represented as products of elementary
*> reflectors:
*>
*> Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb)
*>
*> Each H(i) and G(i) has the form:
*>
*> H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T
*>
*> where tauq and taup are real scalars, and v and u are real vectors.
*>
*> If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in
*> A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in
*> A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
*>
*> If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in
*> A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in
*> A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
*>
*> The elements of the vectors v and u together form the m-by-nb matrix
*> V and the nb-by-n matrix U**T which are needed, with X and Y, to apply
*> the transformation to the unreduced part of the matrix, using a block
*> update of the form: A := A - V*Y**T - X*U**T.
*>
*> The contents of A on exit are illustrated by the following examples
*> with nb = 2:
*>
*> m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
*>
*> ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 )
*> ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 )
*> ( v1 v2 a a a ) ( v1 1 a a a a )
*> ( v1 v2 a a a ) ( v1 v2 a a a a )
*> ( v1 v2 a a a ) ( v1 v2 a a a a )
*> ( v1 v2 a a a )
*>
*> where a denotes an element of the original matrix which is unchanged,
*> vi denotes an element of the vector defining H(i), and ui an element
*> of the vector defining G(i).
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y,
$ LDY )
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER LDA, LDX, LDY, M, N, NB
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ),
$ TAUQ( * ), X( LDX, * ), Y( LDY, * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
* ..
* .. Local Scalars ..
INTEGER I
* ..
* .. External Subroutines ..
EXTERNAL DGEMV, DLARFG, DSCAL
* ..
* .. Intrinsic Functions ..
INTRINSIC MIN
* ..
* .. Executable Statements ..
*
* Quick return if possible
*
IF( M.LE.0 .OR. N.LE.0 )
$ RETURN
*
IF( M.GE.N ) THEN
*
* Reduce to upper bidiagonal form
*
DO 10 I = 1, NB
*
* Update A(i:m,i)
*
CALL DGEMV( 'No transpose', M-I+1, I-1, -ONE, A( I, 1 ),
$ LDA, Y( I, 1 ), LDY, ONE, A( I, I ), 1 )
CALL DGEMV( 'No transpose', M-I+1, I-1, -ONE, X( I, 1 ),
$ LDX, A( 1, I ), 1, ONE, A( I, I ), 1 )
*
* Generate reflection Q(i) to annihilate A(i+1:m,i)
*
CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1,
$ TAUQ( I ) )
D( I ) = A( I, I )
IF( I.LT.N ) THEN
A( I, I ) = ONE
*
* Compute Y(i+1:n,i)
*
CALL DGEMV( 'Transpose', M-I+1, N-I, ONE, A( I, I+1 ),
$ LDA, A( I, I ), 1, ZERO, Y( I+1, I ), 1 )
CALL DGEMV( 'Transpose', M-I+1, I-1, ONE, A( I, 1 ), LDA,
$ A( I, I ), 1, ZERO, Y( 1, I ), 1 )
CALL DGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ),
$ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
CALL DGEMV( 'Transpose', M-I+1, I-1, ONE, X( I, 1 ), LDX,
$ A( I, I ), 1, ZERO, Y( 1, I ), 1 )
CALL DGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ),
$ LDA, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
CALL DSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 )
*
* Update A(i,i+1:n)
*
CALL DGEMV( 'No transpose', N-I, I, -ONE, Y( I+1, 1 ),
$ LDY, A( I, 1 ), LDA, ONE, A( I, I+1 ), LDA )
CALL DGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ),
$ LDA, X( I, 1 ), LDX, ONE, A( I, I+1 ), LDA )
*
* Generate reflection P(i) to annihilate A(i,i+2:n)
*
CALL DLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ),
$ LDA, TAUP( I ) )
E( I ) = A( I, I+1 )
A( I, I+1 ) = ONE
*
* Compute X(i+1:m,i)
*
CALL DGEMV( 'No transpose', M-I, N-I, ONE, A( I+1, I+1 ),
$ LDA, A( I, I+1 ), LDA, ZERO, X( I+1, I ), 1 )
CALL DGEMV( 'Transpose', N-I, I, ONE, Y( I+1, 1 ), LDY,
$ A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 )
CALL DGEMV( 'No transpose', M-I, I, -ONE, A( I+1, 1 ),
$ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
CALL DGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ),
$ LDA, A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 )
CALL DGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ),
$ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
CALL DSCAL( M-I, TAUP( I ), X( I+1, I ), 1 )
END IF
10 CONTINUE
ELSE
*
* Reduce to lower bidiagonal form
*
DO 20 I = 1, NB
*
* Update A(i,i:n)
*
CALL DGEMV( 'No transpose', N-I+1, I-1, -ONE, Y( I, 1 ),
$ LDY, A( I, 1 ), LDA, ONE, A( I, I ), LDA )
CALL DGEMV( 'Transpose', I-1, N-I+1, -ONE, A( 1, I ), LDA,
$ X( I, 1 ), LDX, ONE, A( I, I ), LDA )
*
* Generate reflection P(i) to annihilate A(i,i+1:n)
*
CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA,
$ TAUP( I ) )
D( I ) = A( I, I )
IF( I.LT.M ) THEN
A( I, I ) = ONE
*
* Compute X(i+1:m,i)
*
CALL DGEMV( 'No transpose', M-I, N-I+1, ONE, A( I+1, I ),
$ LDA, A( I, I ), LDA, ZERO, X( I+1, I ), 1 )
CALL DGEMV( 'Transpose', N-I+1, I-1, ONE, Y( I, 1 ), LDY,
$ A( I, I ), LDA, ZERO, X( 1, I ), 1 )
CALL DGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ),
$ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
CALL DGEMV( 'No transpose', I-1, N-I+1, ONE, A( 1, I ),
$ LDA, A( I, I ), LDA, ZERO, X( 1, I ), 1 )
CALL DGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ),
$ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
CALL DSCAL( M-I, TAUP( I ), X( I+1, I ), 1 )
*
* Update A(i+1:m,i)
*
CALL DGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ),
$ LDA, Y( I, 1 ), LDY, ONE, A( I+1, I ), 1 )
CALL DGEMV( 'No transpose', M-I, I, -ONE, X( I+1, 1 ),
$ LDX, A( 1, I ), 1, ONE, A( I+1, I ), 1 )
*
* Generate reflection Q(i) to annihilate A(i+2:m,i)
*
CALL DLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1,
$ TAUQ( I ) )
E( I ) = A( I+1, I )
A( I+1, I ) = ONE
*
* Compute Y(i+1:n,i)
*
CALL DGEMV( 'Transpose', M-I, N-I, ONE, A( I+1, I+1 ),
$ LDA, A( I+1, I ), 1, ZERO, Y( I+1, I ), 1 )
CALL DGEMV( 'Transpose', M-I, I-1, ONE, A( I+1, 1 ), LDA,
$ A( I+1, I ), 1, ZERO, Y( 1, I ), 1 )
CALL DGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ),
$ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
CALL DGEMV( 'Transpose', M-I, I, ONE, X( I+1, 1 ), LDX,
$ A( I+1, I ), 1, ZERO, Y( 1, I ), 1 )
CALL DGEMV( 'Transpose', I, N-I, -ONE, A( 1, I+1 ), LDA,
$ Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
CALL DSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 )
END IF
20 CONTINUE
END IF
RETURN
*
* End of DLABRD
*
END

136
lib/linalg/dlacn2.cpp Normal file
View File

@ -0,0 +1,136 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
int dlacn2_(integer *n, doublereal *v, doublereal *x, integer *isgn, doublereal *est, integer *kase,
integer *isave)
{
integer i__1;
doublereal d__1;
integer i_lmp_dnnt(doublereal *);
integer i__;
doublereal xs, temp;
extern doublereal dasum_(integer *, doublereal *, integer *);
integer jlast;
extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *);
extern integer idamax_(integer *, doublereal *, integer *);
doublereal altsgn, estold;
--isave;
--isgn;
--x;
--v;
if (*kase == 0) {
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
x[i__] = 1. / (doublereal)(*n);
}
*kase = 1;
isave[1] = 1;
return 0;
}
switch (isave[1]) {
case 1:
goto L20;
case 2:
goto L40;
case 3:
goto L70;
case 4:
goto L110;
case 5:
goto L140;
}
L20:
if (*n == 1) {
v[1] = x[1];
*est = abs(v[1]);
goto L150;
}
*est = dasum_(n, &x[1], &c__1);
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
if (x[i__] >= 0.) {
x[i__] = 1.;
} else {
x[i__] = -1.;
}
isgn[i__] = i_lmp_dnnt(&x[i__]);
}
*kase = 2;
isave[1] = 2;
return 0;
L40:
isave[2] = idamax_(n, &x[1], &c__1);
isave[3] = 2;
L50:
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
x[i__] = 0.;
}
x[isave[2]] = 1.;
*kase = 1;
isave[1] = 3;
return 0;
L70:
dcopy_(n, &x[1], &c__1, &v[1], &c__1);
estold = *est;
*est = dasum_(n, &v[1], &c__1);
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
if (x[i__] >= 0.) {
xs = 1.;
} else {
xs = -1.;
}
if (i_lmp_dnnt(&xs) != isgn[i__]) {
goto L90;
}
}
goto L120;
L90:
if (*est <= estold) {
goto L120;
}
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
if (x[i__] >= 0.) {
x[i__] = 1.;
} else {
x[i__] = -1.;
}
isgn[i__] = i_lmp_dnnt(&x[i__]);
}
*kase = 2;
isave[1] = 4;
return 0;
L110:
jlast = isave[2];
isave[2] = idamax_(n, &x[1], &c__1);
if (x[jlast] != (d__1 = x[isave[2]], abs(d__1)) && isave[3] < 5) {
++isave[3];
goto L50;
}
L120:
altsgn = 1.;
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
x[i__] = altsgn * ((doublereal)(i__ - 1) / (doublereal)(*n - 1) + 1.);
altsgn = -altsgn;
}
*kase = 1;
isave[1] = 5;
return 0;
L140:
temp = dasum_(n, &x[1], &c__1) / (doublereal)(*n * 3) * 2.;
if (temp > *est) {
dcopy_(n, &x[1], &c__1, &v[1], &c__1);
*est = temp;
}
L150:
*kase = 0;
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,304 +0,0 @@
*> \brief \b DLACN2 estimates the 1-norm of a square matrix, using reverse communication for evaluating matrix-vector products.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLACN2 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlacn2.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlacn2.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlacn2.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLACN2( N, V, X, ISGN, EST, KASE, ISAVE )
*
* .. Scalar Arguments ..
* INTEGER KASE, N
* DOUBLE PRECISION EST
* ..
* .. Array Arguments ..
* INTEGER ISGN( * ), ISAVE( 3 )
* DOUBLE PRECISION V( * ), X( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DLACN2 estimates the 1-norm of a square, real matrix A.
*> Reverse communication is used for evaluating matrix-vector products.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The order of the matrix. N >= 1.
*> \endverbatim
*>
*> \param[out] V
*> \verbatim
*> V is DOUBLE PRECISION array, dimension (N)
*> On the final return, V = A*W, where EST = norm(V)/norm(W)
*> (W is not returned).
*> \endverbatim
*>
*> \param[in,out] X
*> \verbatim
*> X is DOUBLE PRECISION array, dimension (N)
*> On an intermediate return, X should be overwritten by
*> A * X, if KASE=1,
*> A**T * X, if KASE=2,
*> and DLACN2 must be re-called with all the other parameters
*> unchanged.
*> \endverbatim
*>
*> \param[out] ISGN
*> \verbatim
*> ISGN is INTEGER array, dimension (N)
*> \endverbatim
*>
*> \param[in,out] EST
*> \verbatim
*> EST is DOUBLE PRECISION
*> On entry with KASE = 1 or 2 and ISAVE(1) = 3, EST should be
*> unchanged from the previous call to DLACN2.
*> On exit, EST is an estimate (a lower bound) for norm(A).
*> \endverbatim
*>
*> \param[in,out] KASE
*> \verbatim
*> KASE is INTEGER
*> On the initial call to DLACN2, KASE should be 0.
*> On an intermediate return, KASE will be 1 or 2, indicating
*> whether X should be overwritten by A * X or A**T * X.
*> On the final return from DLACN2, KASE will again be 0.
*> \endverbatim
*>
*> \param[in,out] ISAVE
*> \verbatim
*> ISAVE is INTEGER array, dimension (3)
*> ISAVE is used to save variables between calls to DLACN2
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleOTHERauxiliary
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> Originally named SONEST, dated March 16, 1988.
*>
*> This is a thread safe version of DLACON, which uses the array ISAVE
*> in place of a SAVE statement, as follows:
*>
*> DLACON DLACN2
*> JUMP ISAVE(1)
*> J ISAVE(2)
*> ITER ISAVE(3)
*> \endverbatim
*
*> \par Contributors:
* ==================
*>
*> Nick Higham, University of Manchester
*
*> \par References:
* ================
*>
*> N.J. Higham, "FORTRAN codes for estimating the one-norm of
*> a real or complex matrix, with applications to condition estimation",
*> ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.
*>
* =====================================================================
SUBROUTINE DLACN2( N, V, X, ISGN, EST, KASE, ISAVE )
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER KASE, N
DOUBLE PRECISION EST
* ..
* .. Array Arguments ..
INTEGER ISGN( * ), ISAVE( 3 )
DOUBLE PRECISION V( * ), X( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
INTEGER ITMAX
PARAMETER ( ITMAX = 5 )
DOUBLE PRECISION ZERO, ONE, TWO
PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 )
* ..
* .. Local Scalars ..
INTEGER I, JLAST
DOUBLE PRECISION ALTSGN, ESTOLD, TEMP, XS
* ..
* .. External Functions ..
INTEGER IDAMAX
DOUBLE PRECISION DASUM
EXTERNAL IDAMAX, DASUM
* ..
* .. External Subroutines ..
EXTERNAL DCOPY
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, DBLE, NINT
* ..
* .. Executable Statements ..
*
IF( KASE.EQ.0 ) THEN
DO 10 I = 1, N
X( I ) = ONE / DBLE( N )
10 CONTINUE
KASE = 1
ISAVE( 1 ) = 1
RETURN
END IF
*
GO TO ( 20, 40, 70, 110, 140 )ISAVE( 1 )
*
* ................ ENTRY (ISAVE( 1 ) = 1)
* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X.
*
20 CONTINUE
IF( N.EQ.1 ) THEN
V( 1 ) = X( 1 )
EST = ABS( V( 1 ) )
* ... QUIT
GO TO 150
END IF
EST = DASUM( N, X, 1 )
*
DO 30 I = 1, N
IF( X(I).GE.ZERO ) THEN
X(I) = ONE
ELSE
X(I) = -ONE
END IF
ISGN( I ) = NINT( X( I ) )
30 CONTINUE
KASE = 2
ISAVE( 1 ) = 2
RETURN
*
* ................ ENTRY (ISAVE( 1 ) = 2)
* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X.
*
40 CONTINUE
ISAVE( 2 ) = IDAMAX( N, X, 1 )
ISAVE( 3 ) = 2
*
* MAIN LOOP - ITERATIONS 2,3,...,ITMAX.
*
50 CONTINUE
DO 60 I = 1, N
X( I ) = ZERO
60 CONTINUE
X( ISAVE( 2 ) ) = ONE
KASE = 1
ISAVE( 1 ) = 3
RETURN
*
* ................ ENTRY (ISAVE( 1 ) = 3)
* X HAS BEEN OVERWRITTEN BY A*X.
*
70 CONTINUE
CALL DCOPY( N, X, 1, V, 1 )
ESTOLD = EST
EST = DASUM( N, V, 1 )
DO 80 I = 1, N
IF( X(I).GE.ZERO ) THEN
XS = ONE
ELSE
XS = -ONE
END IF
IF( NINT( XS ).NE.ISGN( I ) )
$ GO TO 90
80 CONTINUE
* REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED.
GO TO 120
*
90 CONTINUE
* TEST FOR CYCLING.
IF( EST.LE.ESTOLD )
$ GO TO 120
*
DO 100 I = 1, N
IF( X(I).GE.ZERO ) THEN
X(I) = ONE
ELSE
X(I) = -ONE
END IF
ISGN( I ) = NINT( X( I ) )
100 CONTINUE
KASE = 2
ISAVE( 1 ) = 4
RETURN
*
* ................ ENTRY (ISAVE( 1 ) = 4)
* X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X.
*
110 CONTINUE
JLAST = ISAVE( 2 )
ISAVE( 2 ) = IDAMAX( N, X, 1 )
IF( ( X( JLAST ).NE.ABS( X( ISAVE( 2 ) ) ) ) .AND.
$ ( ISAVE( 3 ).LT.ITMAX ) ) THEN
ISAVE( 3 ) = ISAVE( 3 ) + 1
GO TO 50
END IF
*
* ITERATION COMPLETE. FINAL STAGE.
*
120 CONTINUE
ALTSGN = ONE
DO 130 I = 1, N
X( I ) = ALTSGN*( ONE+DBLE( I-1 ) / DBLE( N-1 ) )
ALTSGN = -ALTSGN
130 CONTINUE
KASE = 1
ISAVE( 1 ) = 5
RETURN
*
* ................ ENTRY (ISAVE( 1 ) = 5)
* X HAS BEEN OVERWRITTEN BY A*X.
*
140 CONTINUE
TEMP = TWO*( DASUM( N, X, 1 ) / DBLE( 3*N ) )
IF( TEMP.GT.EST ) THEN
CALL DCOPY( N, X, 1, V, 1 )
EST = TEMP
END IF
*
150 CONTINUE
KASE = 0
RETURN
*
* End of DLACN2
*
END

46
lib/linalg/dlacpy.cpp Normal file
View File

@ -0,0 +1,46 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dlacpy_(char *uplo, integer *m, integer *n, doublereal *a, integer *lda, doublereal *b,
integer *ldb, ftnlen uplo_len)
{
integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
integer i__, j;
extern logical lsame_(char *, char *, ftnlen, ftnlen);
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
b_dim1 = *ldb;
b_offset = 1 + b_dim1;
b -= b_offset;
if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
i__2 = min(j, *m);
for (i__ = 1; i__ <= i__2; ++i__) {
b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
}
}
} else if (lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
i__2 = *m;
for (i__ = j; i__ <= i__2; ++i__) {
b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
}
}
} else {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
}
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,153 +0,0 @@
*> \brief \b DLACPY copies all or part of one two-dimensional array to another.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLACPY + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlacpy.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlacpy.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlacpy.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLACPY( UPLO, M, N, A, LDA, B, LDB )
*
* .. Scalar Arguments ..
* CHARACTER UPLO
* INTEGER LDA, LDB, M, N
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A( LDA, * ), B( LDB, * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DLACPY copies all or part of a two-dimensional matrix A to another
*> matrix B.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] UPLO
*> \verbatim
*> UPLO is CHARACTER*1
*> Specifies the part of the matrix A to be copied to B.
*> = 'U': Upper triangular part
*> = 'L': Lower triangular part
*> Otherwise: All of the matrix A
*> \endverbatim
*>
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> The m by n matrix A. If UPLO = 'U', only the upper triangle
*> or trapezoid is accessed; if UPLO = 'L', only the lower
*> triangle or trapezoid is accessed.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] B
*> \verbatim
*> B is DOUBLE PRECISION array, dimension (LDB,N)
*> On exit, B = A in the locations specified by UPLO.
*> \endverbatim
*>
*> \param[in] LDB
*> \verbatim
*> LDB is INTEGER
*> The leading dimension of the array B. LDB >= max(1,M).
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup OTHERauxiliary
*
* =====================================================================
SUBROUTINE DLACPY( UPLO, M, N, A, LDA, B, LDB )
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
CHARACTER UPLO
INTEGER LDA, LDB, M, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), B( LDB, * )
* ..
*
* =====================================================================
*
* .. Local Scalars ..
INTEGER I, J
* ..
* .. External Functions ..
LOGICAL LSAME
EXTERNAL LSAME
* ..
* .. Intrinsic Functions ..
INTRINSIC MIN
* ..
* .. Executable Statements ..
*
IF( LSAME( UPLO, 'U' ) ) THEN
DO 20 J = 1, N
DO 10 I = 1, MIN( J, M )
B( I, J ) = A( I, J )
10 CONTINUE
20 CONTINUE
ELSE IF( LSAME( UPLO, 'L' ) ) THEN
DO 40 J = 1, N
DO 30 I = J, M
B( I, J ) = A( I, J )
30 CONTINUE
40 CONTINUE
ELSE
DO 60 J = 1, N
DO 50 I = 1, M
B( I, J ) = A( I, J )
50 CONTINUE
60 CONTINUE
END IF
RETURN
*
* End of DLACPY
*
END

88
lib/linalg/dladiv.cpp Normal file
View File

@ -0,0 +1,88 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dladiv_(doublereal *a, doublereal *b, doublereal *c__, doublereal *d__, doublereal *p,
doublereal *q)
{
doublereal d__1, d__2;
doublereal s, aa, ab, bb, cc, cd, dd, be, un, ov, eps;
extern doublereal dlamch_(char *, ftnlen);
extern int dladiv1_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *,
doublereal *);
aa = *a;
bb = *b;
cc = *c__;
dd = *d__;
d__1 = abs(*a), d__2 = abs(*b);
ab = max(d__1, d__2);
d__1 = abs(*c__), d__2 = abs(*d__);
cd = max(d__1, d__2);
s = 1.;
ov = dlamch_((char *)"Overflow threshold", (ftnlen)18);
un = dlamch_((char *)"Safe minimum", (ftnlen)12);
eps = dlamch_((char *)"Epsilon", (ftnlen)7);
be = 2. / (eps * eps);
if (ab >= ov * .5) {
aa *= .5;
bb *= .5;
s *= 2.;
}
if (cd >= ov * .5) {
cc *= .5;
dd *= .5;
s *= .5;
}
if (ab <= un * 2. / eps) {
aa *= be;
bb *= be;
s /= be;
}
if (cd <= un * 2. / eps) {
cc *= be;
dd *= be;
s *= be;
}
if (abs(*d__) <= abs(*c__)) {
dladiv1_(&aa, &bb, &cc, &dd, p, q);
} else {
dladiv1_(&bb, &aa, &dd, &cc, p, q);
*q = -(*q);
}
*p *= s;
*q *= s;
return 0;
}
int dladiv1_(doublereal *a, doublereal *b, doublereal *c__, doublereal *d__, doublereal *p,
doublereal *q)
{
doublereal r__, t;
extern doublereal dladiv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *,
doublereal *);
r__ = *d__ / *c__;
t = 1. / (*c__ + *d__ * r__);
*p = dladiv2_(a, b, c__, d__, &r__, &t);
*a = -(*a);
*q = dladiv2_(b, a, c__, d__, &r__, &t);
return 0;
}
doublereal dladiv2_(doublereal *a, doublereal *b, doublereal *c__, doublereal *d__, doublereal *r__,
doublereal *t)
{
doublereal ret_val;
doublereal br;
if (*r__ != 0.) {
br = *b * *r__;
if (br != 0.) {
ret_val = (*a + br) * *t;
} else {
ret_val = *a * *t + *b * *t * *r__;
}
} else {
ret_val = (*a + *d__ * (*b / *c__)) * *t;
}
return ret_val;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,251 +0,0 @@
*> \brief \b DLADIV performs complex division in real arithmetic, avoiding unnecessary overflow.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLADIV + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dladiv.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dladiv.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dladiv.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLADIV( A, B, C, D, P, Q )
*
* .. Scalar Arguments ..
* DOUBLE PRECISION A, B, C, D, P, Q
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DLADIV performs complex division in real arithmetic
*>
*> a + i*b
*> p + i*q = ---------
*> c + i*d
*>
*> The algorithm is due to Michael Baudin and Robert L. Smith
*> and can be found in the paper
*> "A Robust Complex Division in Scilab"
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] A
*> \verbatim
*> A is DOUBLE PRECISION
*> \endverbatim
*>
*> \param[in] B
*> \verbatim
*> B is DOUBLE PRECISION
*> \endverbatim
*>
*> \param[in] C
*> \verbatim
*> C is DOUBLE PRECISION
*> \endverbatim
*>
*> \param[in] D
*> \verbatim
*> D is DOUBLE PRECISION
*> The scalars a, b, c, and d in the above expression.
*> \endverbatim
*>
*> \param[out] P
*> \verbatim
*> P is DOUBLE PRECISION
*> \endverbatim
*>
*> \param[out] Q
*> \verbatim
*> Q is DOUBLE PRECISION
*> The scalars p and q in the above expression.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleOTHERauxiliary
*
* =====================================================================
SUBROUTINE DLADIV( A, B, C, D, P, Q )
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
DOUBLE PRECISION A, B, C, D, P, Q
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION BS
PARAMETER ( BS = 2.0D0 )
DOUBLE PRECISION HALF
PARAMETER ( HALF = 0.5D0 )
DOUBLE PRECISION TWO
PARAMETER ( TWO = 2.0D0 )
*
* .. Local Scalars ..
DOUBLE PRECISION AA, BB, CC, DD, AB, CD, S, OV, UN, BE, EPS
* ..
* .. External Functions ..
DOUBLE PRECISION DLAMCH
EXTERNAL DLAMCH
* ..
* .. External Subroutines ..
EXTERNAL DLADIV1
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, MAX
* ..
* .. Executable Statements ..
*
AA = A
BB = B
CC = C
DD = D
AB = MAX( ABS(A), ABS(B) )
CD = MAX( ABS(C), ABS(D) )
S = 1.0D0
OV = DLAMCH( 'Overflow threshold' )
UN = DLAMCH( 'Safe minimum' )
EPS = DLAMCH( 'Epsilon' )
BE = BS / (EPS*EPS)
IF( AB >= HALF*OV ) THEN
AA = HALF * AA
BB = HALF * BB
S = TWO * S
END IF
IF( CD >= HALF*OV ) THEN
CC = HALF * CC
DD = HALF * DD
S = HALF * S
END IF
IF( AB <= UN*BS/EPS ) THEN
AA = AA * BE
BB = BB * BE
S = S / BE
END IF
IF( CD <= UN*BS/EPS ) THEN
CC = CC * BE
DD = DD * BE
S = S * BE
END IF
IF( ABS( D ).LE.ABS( C ) ) THEN
CALL DLADIV1(AA, BB, CC, DD, P, Q)
ELSE
CALL DLADIV1(BB, AA, DD, CC, P, Q)
Q = -Q
END IF
P = P * S
Q = Q * S
*
RETURN
*
* End of DLADIV
*
END
*> \ingroup doubleOTHERauxiliary
SUBROUTINE DLADIV1( A, B, C, D, P, Q )
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
DOUBLE PRECISION A, B, C, D, P, Q
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D0 )
*
* .. Local Scalars ..
DOUBLE PRECISION R, T
* ..
* .. External Functions ..
DOUBLE PRECISION DLADIV2
EXTERNAL DLADIV2
* ..
* .. Executable Statements ..
*
R = D / C
T = ONE / (C + D * R)
P = DLADIV2(A, B, C, D, R, T)
A = -A
Q = DLADIV2(B, A, C, D, R, T)
*
RETURN
*
* End of DLADIV1
*
END
*> \ingroup doubleOTHERauxiliary
DOUBLE PRECISION FUNCTION DLADIV2( A, B, C, D, R, T )
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
DOUBLE PRECISION A, B, C, D, R, T
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO
PARAMETER ( ZERO = 0.0D0 )
*
* .. Local Scalars ..
DOUBLE PRECISION BR
* ..
* .. Executable Statements ..
*
IF( R.NE.ZERO ) THEN
BR = B * R
IF( BR.NE.ZERO ) THEN
DLADIV2 = (A + BR) * T
ELSE
DLADIV2 = A * T + (B * T) * R
END IF
ELSE
DLADIV2 = (A + D * (B / C)) * T
END IF
*
RETURN
*
* End of DLADIV2
*
END

45
lib/linalg/dlae2.cpp Normal file
View File

@ -0,0 +1,45 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dlae2_(doublereal *a, doublereal *b, doublereal *c__, doublereal *rt1, doublereal *rt2)
{
doublereal d__1;
double sqrt(doublereal);
doublereal ab, df, tb, sm, rt, adf, acmn, acmx;
sm = *a + *c__;
df = *a - *c__;
adf = abs(df);
tb = *b + *b;
ab = abs(tb);
if (abs(*a) > abs(*c__)) {
acmx = *a;
acmn = *c__;
} else {
acmx = *c__;
acmn = *a;
}
if (adf > ab) {
d__1 = ab / adf;
rt = adf * sqrt(d__1 * d__1 + 1.);
} else if (adf < ab) {
d__1 = adf / ab;
rt = ab * sqrt(d__1 * d__1 + 1.);
} else {
rt = ab * sqrt(2.);
}
if (sm < 0.) {
*rt1 = (sm - rt) * .5;
*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
} else if (sm > 0.) {
*rt1 = (sm + rt) * .5;
*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
} else {
*rt1 = rt * .5;
*rt2 = rt * -.5;
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,182 +0,0 @@
*> \brief \b DLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLAE2 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlae2.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlae2.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlae2.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLAE2( A, B, C, RT1, RT2 )
*
* .. Scalar Arguments ..
* DOUBLE PRECISION A, B, C, RT1, RT2
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix
*> [ A B ]
*> [ B C ].
*> On return, RT1 is the eigenvalue of larger absolute value, and RT2
*> is the eigenvalue of smaller absolute value.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] A
*> \verbatim
*> A is DOUBLE PRECISION
*> The (1,1) element of the 2-by-2 matrix.
*> \endverbatim
*>
*> \param[in] B
*> \verbatim
*> B is DOUBLE PRECISION
*> The (1,2) and (2,1) elements of the 2-by-2 matrix.
*> \endverbatim
*>
*> \param[in] C
*> \verbatim
*> C is DOUBLE PRECISION
*> The (2,2) element of the 2-by-2 matrix.
*> \endverbatim
*>
*> \param[out] RT1
*> \verbatim
*> RT1 is DOUBLE PRECISION
*> The eigenvalue of larger absolute value.
*> \endverbatim
*>
*> \param[out] RT2
*> \verbatim
*> RT2 is DOUBLE PRECISION
*> The eigenvalue of smaller absolute value.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup OTHERauxiliary
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> RT1 is accurate to a few ulps barring over/underflow.
*>
*> RT2 may be inaccurate if there is massive cancellation in the
*> determinant A*C-B*B; higher precision or correctly rounded or
*> correctly truncated arithmetic would be needed to compute RT2
*> accurately in all cases.
*>
*> Overflow is possible only if RT1 is within a factor of 5 of overflow.
*> Underflow is harmless if the input data is 0 or exceeds
*> underflow_threshold / macheps.
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DLAE2( A, B, C, RT1, RT2 )
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
DOUBLE PRECISION A, B, C, RT1, RT2
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D0 )
DOUBLE PRECISION TWO
PARAMETER ( TWO = 2.0D0 )
DOUBLE PRECISION ZERO
PARAMETER ( ZERO = 0.0D0 )
DOUBLE PRECISION HALF
PARAMETER ( HALF = 0.5D0 )
* ..
* .. Local Scalars ..
DOUBLE PRECISION AB, ACMN, ACMX, ADF, DF, RT, SM, TB
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, SQRT
* ..
* .. Executable Statements ..
*
* Compute the eigenvalues
*
SM = A + C
DF = A - C
ADF = ABS( DF )
TB = B + B
AB = ABS( TB )
IF( ABS( A ).GT.ABS( C ) ) THEN
ACMX = A
ACMN = C
ELSE
ACMX = C
ACMN = A
END IF
IF( ADF.GT.AB ) THEN
RT = ADF*SQRT( ONE+( AB / ADF )**2 )
ELSE IF( ADF.LT.AB ) THEN
RT = AB*SQRT( ONE+( ADF / AB )**2 )
ELSE
*
* Includes case AB=ADF=0
*
RT = AB*SQRT( TWO )
END IF
IF( SM.LT.ZERO ) THEN
RT1 = HALF*( SM-RT )
*
* Order of execution important.
* To get fully accurate smaller eigenvalue,
* next line needs to be executed in higher precision.
*
RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B
ELSE IF( SM.GT.ZERO ) THEN
RT1 = HALF*( SM+RT )
*
* Order of execution important.
* To get fully accurate smaller eigenvalue,
* next line needs to be executed in higher precision.
*
RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B
ELSE
*
* Includes case RT1 = RT2 = 0
*
RT1 = HALF*RT
RT2 = -HALF*RT
END IF
RETURN
*
* End of DLAE2
*
END

236
lib/linalg/dlaed0.cpp Normal file
View File

@ -0,0 +1,236 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__9 = 9;
static integer c__0 = 0;
static integer c__2 = 2;
static doublereal c_b23 = 1.;
static doublereal c_b24 = 0.;
static integer c__1 = 1;
int dlaed0_(integer *icompq, integer *qsiz, integer *n, doublereal *d__, doublereal *e,
doublereal *q, integer *ldq, doublereal *qstore, integer *ldqs, doublereal *work,
integer *iwork, integer *info)
{
integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2;
doublereal d__1;
double log(doublereal);
integer pow_lmp_ii(integer *, integer *);
integer i__, j, k, iq, lgn, msd2, smm1, spm1, spm2;
doublereal temp;
integer curr;
extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
ftnlen, ftnlen);
integer iperm;
extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *);
integer indxq, iwrem;
extern int dlaed1_(integer *, doublereal *, doublereal *, integer *, integer *, doublereal *,
integer *, doublereal *, integer *, integer *);
integer iqptr;
extern int dlaed7_(integer *, integer *, integer *, integer *, integer *, integer *,
doublereal *, doublereal *, integer *, integer *, doublereal *, integer *,
doublereal *, integer *, integer *, integer *, integer *, integer *,
doublereal *, doublereal *, integer *, integer *);
integer tlvls;
extern int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *,
integer *, ftnlen);
integer igivcl;
extern int xerbla_(char *, integer *, ftnlen);
extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
ftnlen, ftnlen);
integer igivnm, submat, curprb, subpbs, igivpt;
extern int dsteqr_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *,
doublereal *, integer *, ftnlen);
integer curlvl, matsiz, iprmpt, smlsiz;
--d__;
--e;
q_dim1 = *ldq;
q_offset = 1 + q_dim1;
q -= q_offset;
qstore_dim1 = *ldqs;
qstore_offset = 1 + qstore_dim1;
qstore -= qstore_offset;
--work;
--iwork;
*info = 0;
if (*icompq < 0 || *icompq > 2) {
*info = -1;
} else if (*icompq == 1 && *qsiz < max(0, *n)) {
*info = -2;
} else if (*n < 0) {
*info = -3;
} else if (*ldq < max(1, *n)) {
*info = -7;
} else if (*ldqs < max(1, *n)) {
*info = -9;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DLAED0", &i__1, (ftnlen)6);
return 0;
}
if (*n == 0) {
return 0;
}
smlsiz = ilaenv_(&c__9, (char *)"DLAED0", (char *)" ", &c__0, &c__0, &c__0, &c__0, (ftnlen)6, (ftnlen)1);
iwork[1] = *n;
subpbs = 1;
tlvls = 0;
L10:
if (iwork[subpbs] > smlsiz) {
for (j = subpbs; j >= 1; --j) {
iwork[j * 2] = (iwork[j] + 1) / 2;
iwork[(j << 1) - 1] = iwork[j] / 2;
}
++tlvls;
subpbs <<= 1;
goto L10;
}
i__1 = subpbs;
for (j = 2; j <= i__1; ++j) {
iwork[j] += iwork[j - 1];
}
spm1 = subpbs - 1;
i__1 = spm1;
for (i__ = 1; i__ <= i__1; ++i__) {
submat = iwork[i__] + 1;
smm1 = submat - 1;
d__[smm1] -= (d__1 = e[smm1], abs(d__1));
d__[submat] -= (d__1 = e[smm1], abs(d__1));
}
indxq = (*n << 2) + 3;
if (*icompq != 2) {
temp = log((doublereal)(*n)) / log(2.);
lgn = (integer)temp;
if (pow_lmp_ii(&c__2, &lgn) < *n) {
++lgn;
}
if (pow_lmp_ii(&c__2, &lgn) < *n) {
++lgn;
}
iprmpt = indxq + *n + 1;
iperm = iprmpt + *n * lgn;
iqptr = iperm + *n * lgn;
igivpt = iqptr + *n + 2;
igivcl = igivpt + *n * lgn;
igivnm = 1;
iq = igivnm + (*n << 1) * lgn;
i__1 = *n;
iwrem = iq + i__1 * i__1 + 1;
i__1 = subpbs;
for (i__ = 0; i__ <= i__1; ++i__) {
iwork[iprmpt + i__] = 1;
iwork[igivpt + i__] = 1;
}
iwork[iqptr] = 1;
}
curr = 0;
i__1 = spm1;
for (i__ = 0; i__ <= i__1; ++i__) {
if (i__ == 0) {
submat = 1;
matsiz = iwork[1];
} else {
submat = iwork[i__] + 1;
matsiz = iwork[i__ + 1] - iwork[i__];
}
if (*icompq == 2) {
dsteqr_((char *)"I", &matsiz, &d__[submat], &e[submat], &q[submat + submat * q_dim1], ldq,
&work[1], info, (ftnlen)1);
if (*info != 0) {
goto L130;
}
} else {
dsteqr_((char *)"I", &matsiz, &d__[submat], &e[submat], &work[iq - 1 + iwork[iqptr + curr]],
&matsiz, &work[1], info, (ftnlen)1);
if (*info != 0) {
goto L130;
}
if (*icompq == 1) {
dgemm_((char *)"N", (char *)"N", qsiz, &matsiz, &matsiz, &c_b23, &q[submat * q_dim1 + 1], ldq,
&work[iq - 1 + iwork[iqptr + curr]], &matsiz, &c_b24,
&qstore[submat * qstore_dim1 + 1], ldqs, (ftnlen)1, (ftnlen)1);
}
i__2 = matsiz;
iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2;
++curr;
}
k = 1;
i__2 = iwork[i__ + 1];
for (j = submat; j <= i__2; ++j) {
iwork[indxq + j] = k;
++k;
}
}
curlvl = 1;
L80:
if (subpbs > 1) {
spm2 = subpbs - 2;
i__1 = spm2;
for (i__ = 0; i__ <= i__1; i__ += 2) {
if (i__ == 0) {
submat = 1;
matsiz = iwork[2];
msd2 = iwork[1];
curprb = 0;
} else {
submat = iwork[i__] + 1;
matsiz = iwork[i__ + 2] - iwork[i__];
msd2 = matsiz / 2;
++curprb;
}
if (*icompq == 2) {
dlaed1_(&matsiz, &d__[submat], &q[submat + submat * q_dim1], ldq,
&iwork[indxq + submat], &e[submat + msd2 - 1], &msd2, &work[1],
&iwork[subpbs + 1], info);
} else {
dlaed7_(icompq, &matsiz, qsiz, &tlvls, &curlvl, &curprb, &d__[submat],
&qstore[submat * qstore_dim1 + 1], ldqs, &iwork[indxq + submat],
&e[submat + msd2 - 1], &msd2, &work[iq], &iwork[iqptr], &iwork[iprmpt],
&iwork[iperm], &iwork[igivpt], &iwork[igivcl], &work[igivnm], &work[iwrem],
&iwork[subpbs + 1], info);
}
if (*info != 0) {
goto L130;
}
iwork[i__ / 2 + 1] = iwork[i__ + 2];
}
subpbs /= 2;
++curlvl;
goto L80;
}
if (*icompq == 1) {
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
j = iwork[indxq + i__];
work[i__] = d__[j];
dcopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1 + 1], &c__1);
}
dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
} else if (*icompq == 2) {
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
j = iwork[indxq + i__];
work[i__] = d__[j];
dcopy_(n, &q[j * q_dim1 + 1], &c__1, &work[*n * i__ + 1], &c__1);
}
dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
dlacpy_((char *)"A", n, n, &work[*n + 1], n, &q[q_offset], ldq, (ftnlen)1);
} else {
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
j = iwork[indxq + i__];
work[i__] = d__[j];
}
dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
}
goto L140;
L130:
*info = submat * (*n + 1) + submat + matsiz - 1;
L140:
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,431 +0,0 @@
*> \brief \b DLAED0 used by DSTEDC. Computes all eigenvalues and corresponding eigenvectors of an unreduced symmetric tridiagonal matrix using the divide and conquer method.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLAED0 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed0.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed0.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed0.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLAED0( ICOMPQ, QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS,
* WORK, IWORK, INFO )
*
* .. Scalar Arguments ..
* INTEGER ICOMPQ, INFO, LDQ, LDQS, N, QSIZ
* ..
* .. Array Arguments ..
* INTEGER IWORK( * )
* DOUBLE PRECISION D( * ), E( * ), Q( LDQ, * ), QSTORE( LDQS, * ),
* $ WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DLAED0 computes all eigenvalues and corresponding eigenvectors of a
*> symmetric tridiagonal matrix using the divide and conquer method.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] ICOMPQ
*> \verbatim
*> ICOMPQ is INTEGER
*> = 0: Compute eigenvalues only.
*> = 1: Compute eigenvectors of original dense symmetric matrix
*> also. On entry, Q contains the orthogonal matrix used
*> to reduce the original matrix to tridiagonal form.
*> = 2: Compute eigenvalues and eigenvectors of tridiagonal
*> matrix.
*> \endverbatim
*>
*> \param[in] QSIZ
*> \verbatim
*> QSIZ is INTEGER
*> The dimension of the orthogonal matrix used to reduce
*> the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The dimension of the symmetric tridiagonal matrix. N >= 0.
*> \endverbatim
*>
*> \param[in,out] D
*> \verbatim
*> D is DOUBLE PRECISION array, dimension (N)
*> On entry, the main diagonal of the tridiagonal matrix.
*> On exit, its eigenvalues.
*> \endverbatim
*>
*> \param[in] E
*> \verbatim
*> E is DOUBLE PRECISION array, dimension (N-1)
*> The off-diagonal elements of the tridiagonal matrix.
*> On exit, E has been destroyed.
*> \endverbatim
*>
*> \param[in,out] Q
*> \verbatim
*> Q is DOUBLE PRECISION array, dimension (LDQ, N)
*> On entry, Q must contain an N-by-N orthogonal matrix.
*> If ICOMPQ = 0 Q is not referenced.
*> If ICOMPQ = 1 On entry, Q is a subset of the columns of the
*> orthogonal matrix used to reduce the full
*> matrix to tridiagonal form corresponding to
*> the subset of the full matrix which is being
*> decomposed at this time.
*> If ICOMPQ = 2 On entry, Q will be the identity matrix.
*> On exit, Q contains the eigenvectors of the
*> tridiagonal matrix.
*> \endverbatim
*>
*> \param[in] LDQ
*> \verbatim
*> LDQ is INTEGER
*> The leading dimension of the array Q. If eigenvectors are
*> desired, then LDQ >= max(1,N). In any case, LDQ >= 1.
*> \endverbatim
*>
*> \param[out] QSTORE
*> \verbatim
*> QSTORE is DOUBLE PRECISION array, dimension (LDQS, N)
*> Referenced only when ICOMPQ = 1. Used to store parts of
*> the eigenvector matrix when the updating matrix multiplies
*> take place.
*> \endverbatim
*>
*> \param[in] LDQS
*> \verbatim
*> LDQS is INTEGER
*> The leading dimension of the array QSTORE. If ICOMPQ = 1,
*> then LDQS >= max(1,N). In any case, LDQS >= 1.
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array,
*> If ICOMPQ = 0 or 1, the dimension of WORK must be at least
*> 1 + 3*N + 2*N*lg N + 3*N**2
*> ( lg( N ) = smallest integer k
*> such that 2^k >= N )
*> If ICOMPQ = 2, the dimension of WORK must be at least
*> 4*N + N**2.
*> \endverbatim
*>
*> \param[out] IWORK
*> \verbatim
*> IWORK is INTEGER array,
*> If ICOMPQ = 0 or 1, the dimension of IWORK must be at least
*> 6 + 6*N + 5*N*lg N.
*> ( lg( N ) = smallest integer k
*> such that 2^k >= N )
*> If ICOMPQ = 2, the dimension of IWORK must be at least
*> 3 + 5*N.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit.
*> < 0: if INFO = -i, the i-th argument had an illegal value.
*> > 0: The algorithm failed to compute an eigenvalue while
*> working on the submatrix lying in rows and columns
*> INFO/(N+1) through mod(INFO,N+1).
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup auxOTHERcomputational
*
*> \par Contributors:
* ==================
*>
*> Jeff Rutter, Computer Science Division, University of California
*> at Berkeley, USA
*
* =====================================================================
SUBROUTINE DLAED0( ICOMPQ, QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS,
$ WORK, IWORK, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER ICOMPQ, INFO, LDQ, LDQS, N, QSIZ
* ..
* .. Array Arguments ..
INTEGER IWORK( * )
DOUBLE PRECISION D( * ), E( * ), Q( LDQ, * ), QSTORE( LDQS, * ),
$ WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO, ONE, TWO
PARAMETER ( ZERO = 0.D0, ONE = 1.D0, TWO = 2.D0 )
* ..
* .. Local Scalars ..
INTEGER CURLVL, CURPRB, CURR, I, IGIVCL, IGIVNM,
$ IGIVPT, INDXQ, IPERM, IPRMPT, IQ, IQPTR, IWREM,
$ J, K, LGN, MATSIZ, MSD2, SMLSIZ, SMM1, SPM1,
$ SPM2, SUBMAT, SUBPBS, TLVLS
DOUBLE PRECISION TEMP
* ..
* .. External Subroutines ..
EXTERNAL DCOPY, DGEMM, DLACPY, DLAED1, DLAED7, DSTEQR,
$ XERBLA
* ..
* .. External Functions ..
INTEGER ILAENV
EXTERNAL ILAENV
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, DBLE, INT, LOG, MAX
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
*
IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.2 ) THEN
INFO = -1
ELSE IF( ( ICOMPQ.EQ.1 ) .AND. ( QSIZ.LT.MAX( 0, N ) ) ) THEN
INFO = -2
ELSE IF( N.LT.0 ) THEN
INFO = -3
ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
INFO = -7
ELSE IF( LDQS.LT.MAX( 1, N ) ) THEN
INFO = -9
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DLAED0', -INFO )
RETURN
END IF
*
* Quick return if possible
*
IF( N.EQ.0 )
$ RETURN
*
SMLSIZ = ILAENV( 9, 'DLAED0', ' ', 0, 0, 0, 0 )
*
* Determine the size and placement of the submatrices, and save in
* the leading elements of IWORK.
*
IWORK( 1 ) = N
SUBPBS = 1
TLVLS = 0
10 CONTINUE
IF( IWORK( SUBPBS ).GT.SMLSIZ ) THEN
DO 20 J = SUBPBS, 1, -1
IWORK( 2*J ) = ( IWORK( J )+1 ) / 2
IWORK( 2*J-1 ) = IWORK( J ) / 2
20 CONTINUE
TLVLS = TLVLS + 1
SUBPBS = 2*SUBPBS
GO TO 10
END IF
DO 30 J = 2, SUBPBS
IWORK( J ) = IWORK( J ) + IWORK( J-1 )
30 CONTINUE
*
* Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1
* using rank-1 modifications (cuts).
*
SPM1 = SUBPBS - 1
DO 40 I = 1, SPM1
SUBMAT = IWORK( I ) + 1
SMM1 = SUBMAT - 1
D( SMM1 ) = D( SMM1 ) - ABS( E( SMM1 ) )
D( SUBMAT ) = D( SUBMAT ) - ABS( E( SMM1 ) )
40 CONTINUE
*
INDXQ = 4*N + 3
IF( ICOMPQ.NE.2 ) THEN
*
* Set up workspaces for eigenvalues only/accumulate new vectors
* routine
*
TEMP = LOG( DBLE( N ) ) / LOG( TWO )
LGN = INT( TEMP )
IF( 2**LGN.LT.N )
$ LGN = LGN + 1
IF( 2**LGN.LT.N )
$ LGN = LGN + 1
IPRMPT = INDXQ + N + 1
IPERM = IPRMPT + N*LGN
IQPTR = IPERM + N*LGN
IGIVPT = IQPTR + N + 2
IGIVCL = IGIVPT + N*LGN
*
IGIVNM = 1
IQ = IGIVNM + 2*N*LGN
IWREM = IQ + N**2 + 1
*
* Initialize pointers
*
DO 50 I = 0, SUBPBS
IWORK( IPRMPT+I ) = 1
IWORK( IGIVPT+I ) = 1
50 CONTINUE
IWORK( IQPTR ) = 1
END IF
*
* Solve each submatrix eigenproblem at the bottom of the divide and
* conquer tree.
*
CURR = 0
DO 70 I = 0, SPM1
IF( I.EQ.0 ) THEN
SUBMAT = 1
MATSIZ = IWORK( 1 )
ELSE
SUBMAT = IWORK( I ) + 1
MATSIZ = IWORK( I+1 ) - IWORK( I )
END IF
IF( ICOMPQ.EQ.2 ) THEN
CALL DSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ),
$ Q( SUBMAT, SUBMAT ), LDQ, WORK, INFO )
IF( INFO.NE.0 )
$ GO TO 130
ELSE
CALL DSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ),
$ WORK( IQ-1+IWORK( IQPTR+CURR ) ), MATSIZ, WORK,
$ INFO )
IF( INFO.NE.0 )
$ GO TO 130
IF( ICOMPQ.EQ.1 ) THEN
CALL DGEMM( 'N', 'N', QSIZ, MATSIZ, MATSIZ, ONE,
$ Q( 1, SUBMAT ), LDQ, WORK( IQ-1+IWORK( IQPTR+
$ CURR ) ), MATSIZ, ZERO, QSTORE( 1, SUBMAT ),
$ LDQS )
END IF
IWORK( IQPTR+CURR+1 ) = IWORK( IQPTR+CURR ) + MATSIZ**2
CURR = CURR + 1
END IF
K = 1
DO 60 J = SUBMAT, IWORK( I+1 )
IWORK( INDXQ+J ) = K
K = K + 1
60 CONTINUE
70 CONTINUE
*
* Successively merge eigensystems of adjacent submatrices
* into eigensystem for the corresponding larger matrix.
*
* while ( SUBPBS > 1 )
*
CURLVL = 1
80 CONTINUE
IF( SUBPBS.GT.1 ) THEN
SPM2 = SUBPBS - 2
DO 90 I = 0, SPM2, 2
IF( I.EQ.0 ) THEN
SUBMAT = 1
MATSIZ = IWORK( 2 )
MSD2 = IWORK( 1 )
CURPRB = 0
ELSE
SUBMAT = IWORK( I ) + 1
MATSIZ = IWORK( I+2 ) - IWORK( I )
MSD2 = MATSIZ / 2
CURPRB = CURPRB + 1
END IF
*
* Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2)
* into an eigensystem of size MATSIZ.
* DLAED1 is used only for the full eigensystem of a tridiagonal
* matrix.
* DLAED7 handles the cases in which eigenvalues only or eigenvalues
* and eigenvectors of a full symmetric matrix (which was reduced to
* tridiagonal form) are desired.
*
IF( ICOMPQ.EQ.2 ) THEN
CALL DLAED1( MATSIZ, D( SUBMAT ), Q( SUBMAT, SUBMAT ),
$ LDQ, IWORK( INDXQ+SUBMAT ),
$ E( SUBMAT+MSD2-1 ), MSD2, WORK,
$ IWORK( SUBPBS+1 ), INFO )
ELSE
CALL DLAED7( ICOMPQ, MATSIZ, QSIZ, TLVLS, CURLVL, CURPRB,
$ D( SUBMAT ), QSTORE( 1, SUBMAT ), LDQS,
$ IWORK( INDXQ+SUBMAT ), E( SUBMAT+MSD2-1 ),
$ MSD2, WORK( IQ ), IWORK( IQPTR ),
$ IWORK( IPRMPT ), IWORK( IPERM ),
$ IWORK( IGIVPT ), IWORK( IGIVCL ),
$ WORK( IGIVNM ), WORK( IWREM ),
$ IWORK( SUBPBS+1 ), INFO )
END IF
IF( INFO.NE.0 )
$ GO TO 130
IWORK( I / 2+1 ) = IWORK( I+2 )
90 CONTINUE
SUBPBS = SUBPBS / 2
CURLVL = CURLVL + 1
GO TO 80
END IF
*
* end while
*
* Re-merge the eigenvalues/vectors which were deflated at the final
* merge step.
*
IF( ICOMPQ.EQ.1 ) THEN
DO 100 I = 1, N
J = IWORK( INDXQ+I )
WORK( I ) = D( J )
CALL DCOPY( QSIZ, QSTORE( 1, J ), 1, Q( 1, I ), 1 )
100 CONTINUE
CALL DCOPY( N, WORK, 1, D, 1 )
ELSE IF( ICOMPQ.EQ.2 ) THEN
DO 110 I = 1, N
J = IWORK( INDXQ+I )
WORK( I ) = D( J )
CALL DCOPY( N, Q( 1, J ), 1, WORK( N*I+1 ), 1 )
110 CONTINUE
CALL DCOPY( N, WORK, 1, D, 1 )
CALL DLACPY( 'A', N, N, WORK( N+1 ), N, Q, LDQ )
ELSE
DO 120 I = 1, N
J = IWORK( INDXQ+I )
WORK( I ) = D( J )
120 CONTINUE
CALL DCOPY( N, WORK, 1, D, 1 )
END IF
GO TO 140
*
130 CONTINUE
INFO = SUBMAT*( N+1 ) + SUBMAT + MATSIZ - 1
*
140 CONTINUE
RETURN
*
* End of DLAED0
*
END

90
lib/linalg/dlaed1.cpp Normal file
View File

@ -0,0 +1,90 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
static integer c_n1 = -1;
int dlaed1_(integer *n, doublereal *d__, doublereal *q, integer *ldq, integer *indxq,
doublereal *rho, integer *cutpnt, doublereal *work, integer *iwork, integer *info)
{
integer q_dim1, q_offset, i__1, i__2;
integer i__, k, n1, n2, is, iw, iz, iq2, zpp1, indx, indxc;
extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *);
integer indxp;
extern int dlaed2_(integer *, integer *, integer *, doublereal *, doublereal *, integer *,
integer *, doublereal *, doublereal *, doublereal *, doublereal *,
doublereal *, integer *, integer *, integer *, integer *, integer *),
dlaed3_(integer *, integer *, integer *, doublereal *, doublereal *, integer *,
doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *,
doublereal *, integer *);
integer idlmda;
extern int dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *),
xerbla_(char *, integer *, ftnlen);
integer coltyp;
--d__;
q_dim1 = *ldq;
q_offset = 1 + q_dim1;
q -= q_offset;
--indxq;
--work;
--iwork;
*info = 0;
if (*n < 0) {
*info = -1;
} else if (*ldq < max(1, *n)) {
*info = -4;
} else {
i__1 = 1, i__2 = *n / 2;
if (min(i__1, i__2) > *cutpnt || *n / 2 < *cutpnt) {
*info = -7;
}
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DLAED1", &i__1, (ftnlen)6);
return 0;
}
if (*n == 0) {
return 0;
}
iz = 1;
idlmda = iz + *n;
iw = idlmda + *n;
iq2 = iw + *n;
indx = 1;
indxc = indx + *n;
coltyp = indxc + *n;
indxp = coltyp + *n;
dcopy_(cutpnt, &q[*cutpnt + q_dim1], ldq, &work[iz], &c__1);
zpp1 = *cutpnt + 1;
i__1 = *n - *cutpnt;
dcopy_(&i__1, &q[zpp1 + zpp1 * q_dim1], ldq, &work[iz + *cutpnt], &c__1);
dlaed2_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, &indxq[1], rho, &work[iz], &work[idlmda],
&work[iw], &work[iq2], &iwork[indx], &iwork[indxc], &iwork[indxp], &iwork[coltyp],
info);
if (*info != 0) {
goto L20;
}
if (k != 0) {
is = (iwork[coltyp] + iwork[coltyp + 1]) * *cutpnt +
(iwork[coltyp + 1] + iwork[coltyp + 2]) * (*n - *cutpnt) + iq2;
dlaed3_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, rho, &work[idlmda], &work[iq2],
&iwork[indxc], &iwork[coltyp], &work[iw], &work[is], info);
if (*info != 0) {
goto L20;
}
n1 = k;
n2 = *n - k;
dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
} else {
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
indxq[i__] = i__;
}
}
L20:
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,271 +0,0 @@
*> \brief \b DLAED1 used by DSTEDC. Computes the updated eigensystem of a diagonal matrix after modification by a rank-one symmetric matrix. Used when the original matrix is tridiagonal.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLAED1 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed1.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed1.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed1.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLAED1( N, D, Q, LDQ, INDXQ, RHO, CUTPNT, WORK, IWORK,
* INFO )
*
* .. Scalar Arguments ..
* INTEGER CUTPNT, INFO, LDQ, N
* DOUBLE PRECISION RHO
* ..
* .. Array Arguments ..
* INTEGER INDXQ( * ), IWORK( * )
* DOUBLE PRECISION D( * ), Q( LDQ, * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DLAED1 computes the updated eigensystem of a diagonal
*> matrix after modification by a rank-one symmetric matrix. This
*> routine is used only for the eigenproblem which requires all
*> eigenvalues and eigenvectors of a tridiagonal matrix. DLAED7 handles
*> the case in which eigenvalues only or eigenvalues and eigenvectors
*> of a full symmetric matrix (which was reduced to tridiagonal form)
*> are desired.
*>
*> T = Q(in) ( D(in) + RHO * Z*Z**T ) Q**T(in) = Q(out) * D(out) * Q**T(out)
*>
*> where Z = Q**T*u, u is a vector of length N with ones in the
*> CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
*>
*> The eigenvectors of the original matrix are stored in Q, and the
*> eigenvalues are in D. The algorithm consists of three stages:
*>
*> The first stage consists of deflating the size of the problem
*> when there are multiple eigenvalues or if there is a zero in
*> the Z vector. For each such occurrence the dimension of the
*> secular equation problem is reduced by one. This stage is
*> performed by the routine DLAED2.
*>
*> The second stage consists of calculating the updated
*> eigenvalues. This is done by finding the roots of the secular
*> equation via the routine DLAED4 (as called by DLAED3).
*> This routine also calculates the eigenvectors of the current
*> problem.
*>
*> The final stage consists of computing the updated eigenvectors
*> directly using the updated eigenvalues. The eigenvectors for
*> the current problem are multiplied with the eigenvectors from
*> the overall problem.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The dimension of the symmetric tridiagonal matrix. N >= 0.
*> \endverbatim
*>
*> \param[in,out] D
*> \verbatim
*> D is DOUBLE PRECISION array, dimension (N)
*> On entry, the eigenvalues of the rank-1-perturbed matrix.
*> On exit, the eigenvalues of the repaired matrix.
*> \endverbatim
*>
*> \param[in,out] Q
*> \verbatim
*> Q is DOUBLE PRECISION array, dimension (LDQ,N)
*> On entry, the eigenvectors of the rank-1-perturbed matrix.
*> On exit, the eigenvectors of the repaired tridiagonal matrix.
*> \endverbatim
*>
*> \param[in] LDQ
*> \verbatim
*> LDQ is INTEGER
*> The leading dimension of the array Q. LDQ >= max(1,N).
*> \endverbatim
*>
*> \param[in,out] INDXQ
*> \verbatim
*> INDXQ is INTEGER array, dimension (N)
*> On entry, the permutation which separately sorts the two
*> subproblems in D into ascending order.
*> On exit, the permutation which will reintegrate the
*> subproblems back into sorted order,
*> i.e. D( INDXQ( I = 1, N ) ) will be in ascending order.
*> \endverbatim
*>
*> \param[in] RHO
*> \verbatim
*> RHO is DOUBLE PRECISION
*> The subdiagonal entry used to create the rank-1 modification.
*> \endverbatim
*>
*> \param[in] CUTPNT
*> \verbatim
*> CUTPNT is INTEGER
*> The location of the last eigenvalue in the leading sub-matrix.
*> min(1,N) <= CUTPNT <= N/2.
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (4*N + N**2)
*> \endverbatim
*>
*> \param[out] IWORK
*> \verbatim
*> IWORK is INTEGER array, dimension (4*N)
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit.
*> < 0: if INFO = -i, the i-th argument had an illegal value.
*> > 0: if INFO = 1, an eigenvalue did not converge
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup auxOTHERcomputational
*
*> \par Contributors:
* ==================
*>
*> Jeff Rutter, Computer Science Division, University of California
*> at Berkeley, USA \n
*> Modified by Francoise Tisseur, University of Tennessee
*>
* =====================================================================
SUBROUTINE DLAED1( N, D, Q, LDQ, INDXQ, RHO, CUTPNT, WORK, IWORK,
$ INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER CUTPNT, INFO, LDQ, N
DOUBLE PRECISION RHO
* ..
* .. Array Arguments ..
INTEGER INDXQ( * ), IWORK( * )
DOUBLE PRECISION D( * ), Q( LDQ, * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Local Scalars ..
INTEGER COLTYP, I, IDLMDA, INDX, INDXC, INDXP, IQ2, IS,
$ IW, IZ, K, N1, N2, ZPP1
* ..
* .. External Subroutines ..
EXTERNAL DCOPY, DLAED2, DLAED3, DLAMRG, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
*
IF( N.LT.0 ) THEN
INFO = -1
ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
INFO = -4
ELSE IF( MIN( 1, N / 2 ).GT.CUTPNT .OR. ( N / 2 ).LT.CUTPNT ) THEN
INFO = -7
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DLAED1', -INFO )
RETURN
END IF
*
* Quick return if possible
*
IF( N.EQ.0 )
$ RETURN
*
* The following values are integer pointers which indicate
* the portion of the workspace
* used by a particular array in DLAED2 and DLAED3.
*
IZ = 1
IDLMDA = IZ + N
IW = IDLMDA + N
IQ2 = IW + N
*
INDX = 1
INDXC = INDX + N
COLTYP = INDXC + N
INDXP = COLTYP + N
*
*
* Form the z-vector which consists of the last row of Q_1 and the
* first row of Q_2.
*
CALL DCOPY( CUTPNT, Q( CUTPNT, 1 ), LDQ, WORK( IZ ), 1 )
ZPP1 = CUTPNT + 1
CALL DCOPY( N-CUTPNT, Q( ZPP1, ZPP1 ), LDQ, WORK( IZ+CUTPNT ), 1 )
*
* Deflate eigenvalues.
*
CALL DLAED2( K, N, CUTPNT, D, Q, LDQ, INDXQ, RHO, WORK( IZ ),
$ WORK( IDLMDA ), WORK( IW ), WORK( IQ2 ),
$ IWORK( INDX ), IWORK( INDXC ), IWORK( INDXP ),
$ IWORK( COLTYP ), INFO )
*
IF( INFO.NE.0 )
$ GO TO 20
*
* Solve Secular Equation.
*
IF( K.NE.0 ) THEN
IS = ( IWORK( COLTYP )+IWORK( COLTYP+1 ) )*CUTPNT +
$ ( IWORK( COLTYP+1 )+IWORK( COLTYP+2 ) )*( N-CUTPNT ) + IQ2
CALL DLAED3( K, N, CUTPNT, D, Q, LDQ, RHO, WORK( IDLMDA ),
$ WORK( IQ2 ), IWORK( INDXC ), IWORK( COLTYP ),
$ WORK( IW ), WORK( IS ), INFO )
IF( INFO.NE.0 )
$ GO TO 20
*
* Prepare the INDXQ sorting permutation.
*
N1 = K
N2 = N - K
CALL DLAMRG( N1, N2, D, 1, -1, INDXQ )
ELSE
DO 10 I = 1, N
INDXQ( I ) = I
10 CONTINUE
END IF
*
20 CONTINUE
RETURN
*
* End of DLAED1
*
END

263
lib/linalg/dlaed2.cpp Normal file
View File

@ -0,0 +1,263 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static doublereal c_b3 = -1.;
static integer c__1 = 1;
int dlaed2_(integer *k, integer *n, integer *n1, doublereal *d__, doublereal *q, integer *ldq,
integer *indxq, doublereal *rho, doublereal *z__, doublereal *dlamda, doublereal *w,
doublereal *q2, integer *indx, integer *indxc, integer *indxp, integer *coltyp,
integer *info)
{
integer q_dim1, q_offset, i__1, i__2;
doublereal d__1, d__2, d__3, d__4;
double sqrt(doublereal);
doublereal c__;
integer i__, j;
doublereal s, t;
integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1;
doublereal eps, tau, tol;
integer psm[4], imax, jmax;
extern int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *,
doublereal *);
integer ctot[4];
extern int dscal_(integer *, doublereal *, doublereal *, integer *),
dcopy_(integer *, doublereal *, integer *, doublereal *, integer *);
extern doublereal dlapy2_(doublereal *, doublereal *), dlamch_(char *, ftnlen);
extern integer idamax_(integer *, doublereal *, integer *);
extern int dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *),
dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *,
ftnlen),
xerbla_(char *, integer *, ftnlen);
--d__;
q_dim1 = *ldq;
q_offset = 1 + q_dim1;
q -= q_offset;
--indxq;
--z__;
--dlamda;
--w;
--q2;
--indx;
--indxc;
--indxp;
--coltyp;
*info = 0;
if (*n < 0) {
*info = -2;
} else if (*ldq < max(1, *n)) {
*info = -6;
} else {
i__1 = 1, i__2 = *n / 2;
if (min(i__1, i__2) > *n1 || *n / 2 < *n1) {
*info = -3;
}
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DLAED2", &i__1, (ftnlen)6);
return 0;
}
if (*n == 0) {
return 0;
}
n2 = *n - *n1;
n1p1 = *n1 + 1;
if (*rho < 0.) {
dscal_(&n2, &c_b3, &z__[n1p1], &c__1);
}
t = 1. / sqrt(2.);
dscal_(n, &t, &z__[1], &c__1);
*rho = (d__1 = *rho * 2., abs(d__1));
i__1 = *n;
for (i__ = n1p1; i__ <= i__1; ++i__) {
indxq[i__] += *n1;
}
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
dlamda[i__] = d__[indxq[i__]];
}
dlamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]);
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
indx[i__] = indxq[indxc[i__]];
}
imax = idamax_(n, &z__[1], &c__1);
jmax = idamax_(n, &d__[1], &c__1);
eps = dlamch_((char *)"Epsilon", (ftnlen)7);
d__3 = (d__1 = d__[jmax], abs(d__1)), d__4 = (d__2 = z__[imax], abs(d__2));
tol = eps * 8. * max(d__3, d__4);
if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) {
*k = 0;
iq2 = 1;
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
i__ = indx[j];
dcopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
dlamda[j] = d__[i__];
iq2 += *n;
}
dlacpy_((char *)"A", n, n, &q2[1], n, &q[q_offset], ldq, (ftnlen)1);
dcopy_(n, &dlamda[1], &c__1, &d__[1], &c__1);
goto L190;
}
i__1 = *n1;
for (i__ = 1; i__ <= i__1; ++i__) {
coltyp[i__] = 1;
}
i__1 = *n;
for (i__ = n1p1; i__ <= i__1; ++i__) {
coltyp[i__] = 3;
}
*k = 0;
k2 = *n + 1;
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
nj = indx[j];
if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) {
--k2;
coltyp[nj] = 4;
indxp[k2] = nj;
if (j == *n) {
goto L100;
}
} else {
pj = nj;
goto L80;
}
}
L80:
++j;
nj = indx[j];
if (j > *n) {
goto L100;
}
if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) {
--k2;
coltyp[nj] = 4;
indxp[k2] = nj;
} else {
s = z__[pj];
c__ = z__[nj];
tau = dlapy2_(&c__, &s);
t = d__[nj] - d__[pj];
c__ /= tau;
s = -s / tau;
if ((d__1 = t * c__ * s, abs(d__1)) <= tol) {
z__[nj] = tau;
z__[pj] = 0.;
if (coltyp[nj] != coltyp[pj]) {
coltyp[nj] = 2;
}
coltyp[pj] = 4;
drot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, &c__, &s);
d__1 = c__;
d__2 = s;
t = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2);
d__1 = s;
d__2 = c__;
d__[nj] = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2);
d__[pj] = t;
--k2;
i__ = 1;
L90:
if (k2 + i__ <= *n) {
if (d__[pj] < d__[indxp[k2 + i__]]) {
indxp[k2 + i__ - 1] = indxp[k2 + i__];
indxp[k2 + i__] = pj;
++i__;
goto L90;
} else {
indxp[k2 + i__ - 1] = pj;
}
} else {
indxp[k2 + i__ - 1] = pj;
}
pj = nj;
} else {
++(*k);
dlamda[*k] = d__[pj];
w[*k] = z__[pj];
indxp[*k] = pj;
pj = nj;
}
}
goto L80;
L100:
++(*k);
dlamda[*k] = d__[pj];
w[*k] = z__[pj];
indxp[*k] = pj;
for (j = 1; j <= 4; ++j) {
ctot[j - 1] = 0;
}
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
ct = coltyp[j];
++ctot[ct - 1];
}
psm[0] = 1;
psm[1] = ctot[0] + 1;
psm[2] = psm[1] + ctot[1];
psm[3] = psm[2] + ctot[2];
*k = *n - ctot[3];
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
js = indxp[j];
ct = coltyp[js];
indx[psm[ct - 1]] = js;
indxc[psm[ct - 1]] = j;
++psm[ct - 1];
}
i__ = 1;
iq1 = 1;
iq2 = (ctot[0] + ctot[1]) * *n1 + 1;
i__1 = ctot[0];
for (j = 1; j <= i__1; ++j) {
js = indx[i__];
dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
z__[i__] = d__[js];
++i__;
iq1 += *n1;
}
i__1 = ctot[1];
for (j = 1; j <= i__1; ++j) {
js = indx[i__];
dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
z__[i__] = d__[js];
++i__;
iq1 += *n1;
iq2 += n2;
}
i__1 = ctot[2];
for (j = 1; j <= i__1; ++j) {
js = indx[i__];
dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
z__[i__] = d__[js];
++i__;
iq2 += n2;
}
iq1 = iq2;
i__1 = ctot[3];
for (j = 1; j <= i__1; ++j) {
js = indx[i__];
dcopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
iq2 += *n;
z__[i__] = d__[js];
++i__;
}
if (*k < *n) {
dlacpy_((char *)"A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq, (ftnlen)1);
i__1 = *n - *k;
dcopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1);
}
for (j = 1; j <= 4; ++j) {
coltyp[j] = ctot[j - 1];
}
L190:
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,536 +0,0 @@
*> \brief \b DLAED2 used by DSTEDC. Merges eigenvalues and deflates secular equation. Used when the original matrix is tridiagonal.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLAED2 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed2.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed2.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed2.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
* Q2, INDX, INDXC, INDXP, COLTYP, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, K, LDQ, N, N1
* DOUBLE PRECISION RHO
* ..
* .. Array Arguments ..
* INTEGER COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),
* $ INDXQ( * )
* DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
* $ W( * ), Z( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DLAED2 merges the two sets of eigenvalues together into a single
*> sorted set. Then it tries to deflate the size of the problem.
*> There are two ways in which deflation can occur: when two or more
*> eigenvalues are close together or if there is a tiny entry in the
*> Z vector. For each such occurrence the order of the related secular
*> equation problem is reduced by one.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[out] K
*> \verbatim
*> K is INTEGER
*> The number of non-deflated eigenvalues, and the order of the
*> related secular equation. 0 <= K <=N.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The dimension of the symmetric tridiagonal matrix. N >= 0.
*> \endverbatim
*>
*> \param[in] N1
*> \verbatim
*> N1 is INTEGER
*> The location of the last eigenvalue in the leading sub-matrix.
*> min(1,N) <= N1 <= N/2.
*> \endverbatim
*>
*> \param[in,out] D
*> \verbatim
*> D is DOUBLE PRECISION array, dimension (N)
*> On entry, D contains the eigenvalues of the two submatrices to
*> be combined.
*> On exit, D contains the trailing (N-K) updated eigenvalues
*> (those which were deflated) sorted into increasing order.
*> \endverbatim
*>
*> \param[in,out] Q
*> \verbatim
*> Q is DOUBLE PRECISION array, dimension (LDQ, N)
*> On entry, Q contains the eigenvectors of two submatrices in
*> the two square blocks with corners at (1,1), (N1,N1)
*> and (N1+1, N1+1), (N,N).
*> On exit, Q contains the trailing (N-K) updated eigenvectors
*> (those which were deflated) in its last N-K columns.
*> \endverbatim
*>
*> \param[in] LDQ
*> \verbatim
*> LDQ is INTEGER
*> The leading dimension of the array Q. LDQ >= max(1,N).
*> \endverbatim
*>
*> \param[in,out] INDXQ
*> \verbatim
*> INDXQ is INTEGER array, dimension (N)
*> The permutation which separately sorts the two sub-problems
*> in D into ascending order. Note that elements in the second
*> half of this permutation must first have N1 added to their
*> values. Destroyed on exit.
*> \endverbatim
*>
*> \param[in,out] RHO
*> \verbatim
*> RHO is DOUBLE PRECISION
*> On entry, the off-diagonal element associated with the rank-1
*> cut which originally split the two submatrices which are now
*> being recombined.
*> On exit, RHO has been modified to the value required by
*> DLAED3.
*> \endverbatim
*>
*> \param[in] Z
*> \verbatim
*> Z is DOUBLE PRECISION array, dimension (N)
*> On entry, Z contains the updating vector (the last
*> row of the first sub-eigenvector matrix and the first row of
*> the second sub-eigenvector matrix).
*> On exit, the contents of Z have been destroyed by the updating
*> process.
*> \endverbatim
*>
*> \param[out] DLAMDA
*> \verbatim
*> DLAMDA is DOUBLE PRECISION array, dimension (N)
*> A copy of the first K eigenvalues which will be used by
*> DLAED3 to form the secular equation.
*> \endverbatim
*>
*> \param[out] W
*> \verbatim
*> W is DOUBLE PRECISION array, dimension (N)
*> The first k values of the final deflation-altered z-vector
*> which will be passed to DLAED3.
*> \endverbatim
*>
*> \param[out] Q2
*> \verbatim
*> Q2 is DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2)
*> A copy of the first K eigenvectors which will be used by
*> DLAED3 in a matrix multiply (DGEMM) to solve for the new
*> eigenvectors.
*> \endverbatim
*>
*> \param[out] INDX
*> \verbatim
*> INDX is INTEGER array, dimension (N)
*> The permutation used to sort the contents of DLAMDA into
*> ascending order.
*> \endverbatim
*>
*> \param[out] INDXC
*> \verbatim
*> INDXC is INTEGER array, dimension (N)
*> The permutation used to arrange the columns of the deflated
*> Q matrix into three groups: the first group contains non-zero
*> elements only at and above N1, the second contains
*> non-zero elements only below N1, and the third is dense.
*> \endverbatim
*>
*> \param[out] INDXP
*> \verbatim
*> INDXP is INTEGER array, dimension (N)
*> The permutation used to place deflated values of D at the end
*> of the array. INDXP(1:K) points to the nondeflated D-values
*> and INDXP(K+1:N) points to the deflated eigenvalues.
*> \endverbatim
*>
*> \param[out] COLTYP
*> \verbatim
*> COLTYP is INTEGER array, dimension (N)
*> During execution, a label which will indicate which of the
*> following types a column in the Q2 matrix is:
*> 1 : non-zero in the upper half only;
*> 2 : dense;
*> 3 : non-zero in the lower half only;
*> 4 : deflated.
*> On exit, COLTYP(i) is the number of columns of type i,
*> for i=1 to 4 only.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit.
*> < 0: if INFO = -i, the i-th argument had an illegal value.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup auxOTHERcomputational
*
*> \par Contributors:
* ==================
*>
*> Jeff Rutter, Computer Science Division, University of California
*> at Berkeley, USA \n
*> Modified by Francoise Tisseur, University of Tennessee
*>
* =====================================================================
SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
$ Q2, INDX, INDXC, INDXP, COLTYP, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, K, LDQ, N, N1
DOUBLE PRECISION RHO
* ..
* .. Array Arguments ..
INTEGER COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),
$ INDXQ( * )
DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
$ W( * ), Z( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION MONE, ZERO, ONE, TWO, EIGHT
PARAMETER ( MONE = -1.0D0, ZERO = 0.0D0, ONE = 1.0D0,
$ TWO = 2.0D0, EIGHT = 8.0D0 )
* ..
* .. Local Arrays ..
INTEGER CTOT( 4 ), PSM( 4 )
* ..
* .. Local Scalars ..
INTEGER CT, I, IMAX, IQ1, IQ2, J, JMAX, JS, K2, N1P1,
$ N2, NJ, PJ
DOUBLE PRECISION C, EPS, S, T, TAU, TOL
* ..
* .. External Functions ..
INTEGER IDAMAX
DOUBLE PRECISION DLAMCH, DLAPY2
EXTERNAL IDAMAX, DLAMCH, DLAPY2
* ..
* .. External Subroutines ..
EXTERNAL DCOPY, DLACPY, DLAMRG, DROT, DSCAL, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, MAX, MIN, SQRT
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
*
IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
INFO = -6
ELSE IF( MIN( 1, ( N / 2 ) ).GT.N1 .OR. ( N / 2 ).LT.N1 ) THEN
INFO = -3
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DLAED2', -INFO )
RETURN
END IF
*
* Quick return if possible
*
IF( N.EQ.0 )
$ RETURN
*
N2 = N - N1
N1P1 = N1 + 1
*
IF( RHO.LT.ZERO ) THEN
CALL DSCAL( N2, MONE, Z( N1P1 ), 1 )
END IF
*
* Normalize z so that norm(z) = 1. Since z is the concatenation of
* two normalized vectors, norm2(z) = sqrt(2).
*
T = ONE / SQRT( TWO )
CALL DSCAL( N, T, Z, 1 )
*
* RHO = ABS( norm(z)**2 * RHO )
*
RHO = ABS( TWO*RHO )
*
* Sort the eigenvalues into increasing order
*
DO 10 I = N1P1, N
INDXQ( I ) = INDXQ( I ) + N1
10 CONTINUE
*
* re-integrate the deflated parts from the last pass
*
DO 20 I = 1, N
DLAMDA( I ) = D( INDXQ( I ) )
20 CONTINUE
CALL DLAMRG( N1, N2, DLAMDA, 1, 1, INDXC )
DO 30 I = 1, N
INDX( I ) = INDXQ( INDXC( I ) )
30 CONTINUE
*
* Calculate the allowable deflation tolerance
*
IMAX = IDAMAX( N, Z, 1 )
JMAX = IDAMAX( N, D, 1 )
EPS = DLAMCH( 'Epsilon' )
TOL = EIGHT*EPS*MAX( ABS( D( JMAX ) ), ABS( Z( IMAX ) ) )
*
* If the rank-1 modifier is small enough, no more needs to be done
* except to reorganize Q so that its columns correspond with the
* elements in D.
*
IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN
K = 0
IQ2 = 1
DO 40 J = 1, N
I = INDX( J )
CALL DCOPY( N, Q( 1, I ), 1, Q2( IQ2 ), 1 )
DLAMDA( J ) = D( I )
IQ2 = IQ2 + N
40 CONTINUE
CALL DLACPY( 'A', N, N, Q2, N, Q, LDQ )
CALL DCOPY( N, DLAMDA, 1, D, 1 )
GO TO 190
END IF
*
* If there are multiple eigenvalues then the problem deflates. Here
* the number of equal eigenvalues are found. As each equal
* eigenvalue is found, an elementary reflector is computed to rotate
* the corresponding eigensubspace so that the corresponding
* components of Z are zero in this new basis.
*
DO 50 I = 1, N1
COLTYP( I ) = 1
50 CONTINUE
DO 60 I = N1P1, N
COLTYP( I ) = 3
60 CONTINUE
*
*
K = 0
K2 = N + 1
DO 70 J = 1, N
NJ = INDX( J )
IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
*
* Deflate due to small z component.
*
K2 = K2 - 1
COLTYP( NJ ) = 4
INDXP( K2 ) = NJ
IF( J.EQ.N )
$ GO TO 100
ELSE
PJ = NJ
GO TO 80
END IF
70 CONTINUE
80 CONTINUE
J = J + 1
NJ = INDX( J )
IF( J.GT.N )
$ GO TO 100
IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
*
* Deflate due to small z component.
*
K2 = K2 - 1
COLTYP( NJ ) = 4
INDXP( K2 ) = NJ
ELSE
*
* Check if eigenvalues are close enough to allow deflation.
*
S = Z( PJ )
C = Z( NJ )
*
* Find sqrt(a**2+b**2) without overflow or
* destructive underflow.
*
TAU = DLAPY2( C, S )
T = D( NJ ) - D( PJ )
C = C / TAU
S = -S / TAU
IF( ABS( T*C*S ).LE.TOL ) THEN
*
* Deflation is possible.
*
Z( NJ ) = TAU
Z( PJ ) = ZERO
IF( COLTYP( NJ ).NE.COLTYP( PJ ) )
$ COLTYP( NJ ) = 2
COLTYP( PJ ) = 4
CALL DROT( N, Q( 1, PJ ), 1, Q( 1, NJ ), 1, C, S )
T = D( PJ )*C**2 + D( NJ )*S**2
D( NJ ) = D( PJ )*S**2 + D( NJ )*C**2
D( PJ ) = T
K2 = K2 - 1
I = 1
90 CONTINUE
IF( K2+I.LE.N ) THEN
IF( D( PJ ).LT.D( INDXP( K2+I ) ) ) THEN
INDXP( K2+I-1 ) = INDXP( K2+I )
INDXP( K2+I ) = PJ
I = I + 1
GO TO 90
ELSE
INDXP( K2+I-1 ) = PJ
END IF
ELSE
INDXP( K2+I-1 ) = PJ
END IF
PJ = NJ
ELSE
K = K + 1
DLAMDA( K ) = D( PJ )
W( K ) = Z( PJ )
INDXP( K ) = PJ
PJ = NJ
END IF
END IF
GO TO 80
100 CONTINUE
*
* Record the last eigenvalue.
*
K = K + 1
DLAMDA( K ) = D( PJ )
W( K ) = Z( PJ )
INDXP( K ) = PJ
*
* Count up the total number of the various types of columns, then
* form a permutation which positions the four column types into
* four uniform groups (although one or more of these groups may be
* empty).
*
DO 110 J = 1, 4
CTOT( J ) = 0
110 CONTINUE
DO 120 J = 1, N
CT = COLTYP( J )
CTOT( CT ) = CTOT( CT ) + 1
120 CONTINUE
*
* PSM(*) = Position in SubMatrix (of types 1 through 4)
*
PSM( 1 ) = 1
PSM( 2 ) = 1 + CTOT( 1 )
PSM( 3 ) = PSM( 2 ) + CTOT( 2 )
PSM( 4 ) = PSM( 3 ) + CTOT( 3 )
K = N - CTOT( 4 )
*
* Fill out the INDXC array so that the permutation which it induces
* will place all type-1 columns first, all type-2 columns next,
* then all type-3's, and finally all type-4's.
*
DO 130 J = 1, N
JS = INDXP( J )
CT = COLTYP( JS )
INDX( PSM( CT ) ) = JS
INDXC( PSM( CT ) ) = J
PSM( CT ) = PSM( CT ) + 1
130 CONTINUE
*
* Sort the eigenvalues and corresponding eigenvectors into DLAMDA
* and Q2 respectively. The eigenvalues/vectors which were not
* deflated go into the first K slots of DLAMDA and Q2 respectively,
* while those which were deflated go into the last N - K slots.
*
I = 1
IQ1 = 1
IQ2 = 1 + ( CTOT( 1 )+CTOT( 2 ) )*N1
DO 140 J = 1, CTOT( 1 )
JS = INDX( I )
CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )
Z( I ) = D( JS )
I = I + 1
IQ1 = IQ1 + N1
140 CONTINUE
*
DO 150 J = 1, CTOT( 2 )
JS = INDX( I )
CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )
CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )
Z( I ) = D( JS )
I = I + 1
IQ1 = IQ1 + N1
IQ2 = IQ2 + N2
150 CONTINUE
*
DO 160 J = 1, CTOT( 3 )
JS = INDX( I )
CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )
Z( I ) = D( JS )
I = I + 1
IQ2 = IQ2 + N2
160 CONTINUE
*
IQ1 = IQ2
DO 170 J = 1, CTOT( 4 )
JS = INDX( I )
CALL DCOPY( N, Q( 1, JS ), 1, Q2( IQ2 ), 1 )
IQ2 = IQ2 + N
Z( I ) = D( JS )
I = I + 1
170 CONTINUE
*
* The deflated eigenvalues and their corresponding vectors go back
* into the last N - K slots of D and Q respectively.
*
IF( K.LT.N ) THEN
CALL DLACPY( 'A', N, CTOT( 4 ), Q2( IQ1 ), N,
$ Q( 1, K+1 ), LDQ )
CALL DCOPY( N-K, Z( K+1 ), 1, D( K+1 ), 1 )
END IF
*
* Copy CTOT into COLTYP for referencing in DLAED3.
*
DO 180 J = 1, 4
COLTYP( J ) = CTOT( J )
180 CONTINUE
*
190 CONTINUE
RETURN
*
* End of DLAED2
*
END

138
lib/linalg/dlaed3.cpp Normal file
View File

@ -0,0 +1,138 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
static doublereal c_b22 = 1.;
static doublereal c_b23 = 0.;
int dlaed3_(integer *k, integer *n, integer *n1, doublereal *d__, doublereal *q, integer *ldq,
doublereal *rho, doublereal *dlamda, doublereal *q2, integer *indx, integer *ctot,
doublereal *w, doublereal *s, integer *info)
{
integer q_dim1, q_offset, i__1, i__2;
doublereal d__1;
double sqrt(doublereal), d_lmp_sign(doublereal *, doublereal *);
integer i__, j, n2, n12, ii, n23, iq2;
doublereal temp;
extern doublereal dnrm2_(integer *, doublereal *, integer *);
extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
ftnlen, ftnlen),
dcopy_(integer *, doublereal *, integer *, doublereal *, integer *),
dlaed4_(integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *,
doublereal *, integer *);
extern doublereal dlamc3_(doublereal *, doublereal *);
extern int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *,
integer *, ftnlen),
dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *,
ftnlen),
xerbla_(char *, integer *, ftnlen);
--d__;
q_dim1 = *ldq;
q_offset = 1 + q_dim1;
q -= q_offset;
--dlamda;
--q2;
--indx;
--ctot;
--w;
--s;
*info = 0;
if (*k < 0) {
*info = -1;
} else if (*n < *k) {
*info = -2;
} else if (*ldq < max(1, *n)) {
*info = -6;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DLAED3", &i__1, (ftnlen)6);
return 0;
}
if (*k == 0) {
return 0;
}
i__1 = *k;
for (i__ = 1; i__ <= i__1; ++i__) {
dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__];
}
i__1 = *k;
for (j = 1; j <= i__1; ++j) {
dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], info);
if (*info != 0) {
goto L120;
}
}
if (*k == 1) {
goto L110;
}
if (*k == 2) {
i__1 = *k;
for (j = 1; j <= i__1; ++j) {
w[1] = q[j * q_dim1 + 1];
w[2] = q[j * q_dim1 + 2];
ii = indx[1];
q[j * q_dim1 + 1] = w[ii];
ii = indx[2];
q[j * q_dim1 + 2] = w[ii];
}
goto L110;
}
dcopy_(k, &w[1], &c__1, &s[1], &c__1);
i__1 = *ldq + 1;
dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1);
i__1 = *k;
for (j = 1; j <= i__1; ++j) {
i__2 = j - 1;
for (i__ = 1; i__ <= i__2; ++i__) {
w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
}
i__2 = *k;
for (i__ = j + 1; i__ <= i__2; ++i__) {
w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
}
}
i__1 = *k;
for (i__ = 1; i__ <= i__1; ++i__) {
d__1 = sqrt(-w[i__]);
w[i__] = d_lmp_sign(&d__1, &s[i__]);
}
i__1 = *k;
for (j = 1; j <= i__1; ++j) {
i__2 = *k;
for (i__ = 1; i__ <= i__2; ++i__) {
s[i__] = w[i__] / q[i__ + j * q_dim1];
}
temp = dnrm2_(k, &s[1], &c__1);
i__2 = *k;
for (i__ = 1; i__ <= i__2; ++i__) {
ii = indx[i__];
q[i__ + j * q_dim1] = s[ii] / temp;
}
}
L110:
n2 = *n - *n1;
n12 = ctot[1] + ctot[2];
n23 = ctot[2] + ctot[3];
dlacpy_((char *)"A", &n23, k, &q[ctot[1] + 1 + q_dim1], ldq, &s[1], &n23, (ftnlen)1);
iq2 = *n1 * n12 + 1;
if (n23 != 0) {
dgemm_((char *)"N", (char *)"N", &n2, k, &n23, &c_b22, &q2[iq2], &n2, &s[1], &n23, &c_b23,
&q[*n1 + 1 + q_dim1], ldq, (ftnlen)1, (ftnlen)1);
} else {
dlaset_((char *)"A", &n2, k, &c_b23, &c_b23, &q[*n1 + 1 + q_dim1], ldq, (ftnlen)1);
}
dlacpy_((char *)"A", &n12, k, &q[q_offset], ldq, &s[1], &n12, (ftnlen)1);
if (n12 != 0) {
dgemm_((char *)"N", (char *)"N", n1, k, &n12, &c_b22, &q2[1], n1, &s[1], &n12, &c_b23, &q[q_offset], ldq,
(ftnlen)1, (ftnlen)1);
} else {
dlaset_((char *)"A", n1, k, &c_b23, &c_b23, &q[q_dim1 + 1], ldq, (ftnlen)1);
}
L120:
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,350 +0,0 @@
*> \brief \b DLAED3 used by DSTEDC. Finds the roots of the secular equation and updates the eigenvectors. Used when the original matrix is tridiagonal.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLAED3 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed3.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed3.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed3.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,
* CTOT, W, S, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, K, LDQ, N, N1
* DOUBLE PRECISION RHO
* ..
* .. Array Arguments ..
* INTEGER CTOT( * ), INDX( * )
* DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
* $ S( * ), W( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DLAED3 finds the roots of the secular equation, as defined by the
*> values in D, W, and RHO, between 1 and K. It makes the
*> appropriate calls to DLAED4 and then updates the eigenvectors by
*> multiplying the matrix of eigenvectors of the pair of eigensystems
*> being combined by the matrix of eigenvectors of the K-by-K system
*> which is solved here.
*>
*> This code makes very mild assumptions about floating point
*> arithmetic. It will work on machines with a guard digit in
*> add/subtract, or on those binary machines without guard digits
*> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
*> It could conceivably fail on hexadecimal or decimal machines
*> without guard digits, but we know of none.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] K
*> \verbatim
*> K is INTEGER
*> The number of terms in the rational function to be solved by
*> DLAED4. K >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of rows and columns in the Q matrix.
*> N >= K (deflation may result in N>K).
*> \endverbatim
*>
*> \param[in] N1
*> \verbatim
*> N1 is INTEGER
*> The location of the last eigenvalue in the leading submatrix.
*> min(1,N) <= N1 <= N/2.
*> \endverbatim
*>
*> \param[out] D
*> \verbatim
*> D is DOUBLE PRECISION array, dimension (N)
*> D(I) contains the updated eigenvalues for
*> 1 <= I <= K.
*> \endverbatim
*>
*> \param[out] Q
*> \verbatim
*> Q is DOUBLE PRECISION array, dimension (LDQ,N)
*> Initially the first K columns are used as workspace.
*> On output the columns 1 to K contain
*> the updated eigenvectors.
*> \endverbatim
*>
*> \param[in] LDQ
*> \verbatim
*> LDQ is INTEGER
*> The leading dimension of the array Q. LDQ >= max(1,N).
*> \endverbatim
*>
*> \param[in] RHO
*> \verbatim
*> RHO is DOUBLE PRECISION
*> The value of the parameter in the rank one update equation.
*> RHO >= 0 required.
*> \endverbatim
*>
*> \param[in,out] DLAMDA
*> \verbatim
*> DLAMDA is DOUBLE PRECISION array, dimension (K)
*> The first K elements of this array contain the old roots
*> of the deflated updating problem. These are the poles
*> of the secular equation. May be changed on output by
*> having lowest order bit set to zero on Cray X-MP, Cray Y-MP,
*> Cray-2, or Cray C-90, as described above.
*> \endverbatim
*>
*> \param[in] Q2
*> \verbatim
*> Q2 is DOUBLE PRECISION array, dimension (LDQ2*N)
*> The first K columns of this matrix contain the non-deflated
*> eigenvectors for the split problem.
*> \endverbatim
*>
*> \param[in] INDX
*> \verbatim
*> INDX is INTEGER array, dimension (N)
*> The permutation used to arrange the columns of the deflated
*> Q matrix into three groups (see DLAED2).
*> The rows of the eigenvectors found by DLAED4 must be likewise
*> permuted before the matrix multiply can take place.
*> \endverbatim
*>
*> \param[in] CTOT
*> \verbatim
*> CTOT is INTEGER array, dimension (4)
*> A count of the total number of the various types of columns
*> in Q, as described in INDX. The fourth column type is any
*> column which has been deflated.
*> \endverbatim
*>
*> \param[in,out] W
*> \verbatim
*> W is DOUBLE PRECISION array, dimension (K)
*> The first K elements of this array contain the components
*> of the deflation-adjusted updating vector. Destroyed on
*> output.
*> \endverbatim
*>
*> \param[out] S
*> \verbatim
*> S is DOUBLE PRECISION array, dimension (N1 + 1)*K
*> Will contain the eigenvectors of the repaired matrix which
*> will be multiplied by the previously accumulated eigenvectors
*> to update the system.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit.
*> < 0: if INFO = -i, the i-th argument had an illegal value.
*> > 0: if INFO = 1, an eigenvalue did not converge
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup auxOTHERcomputational
*
*> \par Contributors:
* ==================
*>
*> Jeff Rutter, Computer Science Division, University of California
*> at Berkeley, USA \n
*> Modified by Francoise Tisseur, University of Tennessee
*>
* =====================================================================
SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,
$ CTOT, W, S, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, K, LDQ, N, N1
DOUBLE PRECISION RHO
* ..
* .. Array Arguments ..
INTEGER CTOT( * ), INDX( * )
DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
$ S( * ), W( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE, ZERO
PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0 )
* ..
* .. Local Scalars ..
INTEGER I, II, IQ2, J, N12, N2, N23
DOUBLE PRECISION TEMP
* ..
* .. External Functions ..
DOUBLE PRECISION DLAMC3, DNRM2
EXTERNAL DLAMC3, DNRM2
* ..
* .. External Subroutines ..
EXTERNAL DCOPY, DGEMM, DLACPY, DLAED4, DLASET, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, SIGN, SQRT
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
*
IF( K.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.K ) THEN
INFO = -2
ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
INFO = -6
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DLAED3', -INFO )
RETURN
END IF
*
* Quick return if possible
*
IF( K.EQ.0 )
$ RETURN
*
* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
* be computed with high relative accuracy (barring over/underflow).
* This is a problem on machines without a guard digit in
* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
* which on any of these machines zeros out the bottommost
* bit of DLAMDA(I) if it is 1; this makes the subsequent
* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
* occurs. On binary machines with a guard digit (almost all
* machines) it does not change DLAMDA(I) at all. On hexadecimal
* and decimal machines with a guard digit, it slightly
* changes the bottommost bits of DLAMDA(I). It does not account
* for hexadecimal or decimal machines without guard digits
* (we know of none). We use a subroutine call to compute
* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating
* this code.
*
DO 10 I = 1, K
DLAMDA( I ) = DLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I )
10 CONTINUE
*
DO 20 J = 1, K
CALL DLAED4( K, J, DLAMDA, W, Q( 1, J ), RHO, D( J ), INFO )
*
* If the zero finder fails, the computation is terminated.
*
IF( INFO.NE.0 )
$ GO TO 120
20 CONTINUE
*
IF( K.EQ.1 )
$ GO TO 110
IF( K.EQ.2 ) THEN
DO 30 J = 1, K
W( 1 ) = Q( 1, J )
W( 2 ) = Q( 2, J )
II = INDX( 1 )
Q( 1, J ) = W( II )
II = INDX( 2 )
Q( 2, J ) = W( II )
30 CONTINUE
GO TO 110
END IF
*
* Compute updated W.
*
CALL DCOPY( K, W, 1, S, 1 )
*
* Initialize W(I) = Q(I,I)
*
CALL DCOPY( K, Q, LDQ+1, W, 1 )
DO 60 J = 1, K
DO 40 I = 1, J - 1
W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )
40 CONTINUE
DO 50 I = J + 1, K
W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )
50 CONTINUE
60 CONTINUE
DO 70 I = 1, K
W( I ) = SIGN( SQRT( -W( I ) ), S( I ) )
70 CONTINUE
*
* Compute eigenvectors of the modified rank-1 modification.
*
DO 100 J = 1, K
DO 80 I = 1, K
S( I ) = W( I ) / Q( I, J )
80 CONTINUE
TEMP = DNRM2( K, S, 1 )
DO 90 I = 1, K
II = INDX( I )
Q( I, J ) = S( II ) / TEMP
90 CONTINUE
100 CONTINUE
*
* Compute the updated eigenvectors.
*
110 CONTINUE
*
N2 = N - N1
N12 = CTOT( 1 ) + CTOT( 2 )
N23 = CTOT( 2 ) + CTOT( 3 )
*
CALL DLACPY( 'A', N23, K, Q( CTOT( 1 )+1, 1 ), LDQ, S, N23 )
IQ2 = N1*N12 + 1
IF( N23.NE.0 ) THEN
CALL DGEMM( 'N', 'N', N2, K, N23, ONE, Q2( IQ2 ), N2, S, N23,
$ ZERO, Q( N1+1, 1 ), LDQ )
ELSE
CALL DLASET( 'A', N2, K, ZERO, ZERO, Q( N1+1, 1 ), LDQ )
END IF
*
CALL DLACPY( 'A', N12, K, Q, LDQ, S, N12 )
IF( N12.NE.0 ) THEN
CALL DGEMM( 'N', 'N', N1, K, N12, ONE, Q2, N1, S, N12, ZERO, Q,
$ LDQ )
ELSE
CALL DLASET( 'A', N1, K, ZERO, ZERO, Q( 1, 1 ), LDQ )
END IF
*
*
120 CONTINUE
RETURN
*
* End of DLAED3
*
END

571
lib/linalg/dlaed4.cpp Normal file
View File

@ -0,0 +1,571 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dlaed4_(integer *n, integer *i__, doublereal *d__, doublereal *z__, doublereal *delta,
doublereal *rho, doublereal *dlam, integer *info)
{
integer i__1;
doublereal d__1;
double sqrt(doublereal);
doublereal a, b, c__;
integer j;
doublereal w;
integer ii;
doublereal dw, zz[3];
integer ip1;
doublereal del, eta, phi, eps, tau, psi;
integer iim1, iip1;
doublereal dphi, dpsi;
integer iter;
doublereal temp, prew, temp1, dltlb, dltub, midpt;
integer niter;
logical swtch;
extern int dlaed5_(integer *, doublereal *, doublereal *, doublereal *, doublereal *,
doublereal *),
dlaed6_(integer *, logical *, doublereal *, doublereal *, doublereal *, doublereal *,
doublereal *, integer *);
logical swtch3;
extern doublereal dlamch_(char *, ftnlen);
logical orgati;
doublereal erretm, rhoinv;
--delta;
--z__;
--d__;
*info = 0;
if (*n == 1) {
*dlam = d__[1] + *rho * z__[1] * z__[1];
delta[1] = 1.;
return 0;
}
if (*n == 2) {
dlaed5_(i__, &d__[1], &z__[1], &delta[1], rho, dlam);
return 0;
}
eps = dlamch_((char *)"Epsilon", (ftnlen)7);
rhoinv = 1. / *rho;
if (*i__ == *n) {
ii = *n - 1;
niter = 1;
midpt = *rho / 2.;
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
delta[j] = d__[j] - d__[*i__] - midpt;
}
psi = 0.;
i__1 = *n - 2;
for (j = 1; j <= i__1; ++j) {
psi += z__[j] * z__[j] / delta[j];
}
c__ = rhoinv + psi;
w = c__ + z__[ii] * z__[ii] / delta[ii] + z__[*n] * z__[*n] / delta[*n];
if (w <= 0.) {
temp = z__[*n - 1] * z__[*n - 1] / (d__[*n] - d__[*n - 1] + *rho) +
z__[*n] * z__[*n] / *rho;
if (c__ <= temp) {
tau = *rho;
} else {
del = d__[*n] - d__[*n - 1];
a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n];
b = z__[*n] * z__[*n] * del;
if (a < 0.) {
tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
} else {
tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
}
}
dltlb = midpt;
dltub = *rho;
} else {
del = d__[*n] - d__[*n - 1];
a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n];
b = z__[*n] * z__[*n] * del;
if (a < 0.) {
tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
} else {
tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
}
dltlb = 0.;
dltub = midpt;
}
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
delta[j] = d__[j] - d__[*i__] - tau;
}
dpsi = 0.;
psi = 0.;
erretm = 0.;
i__1 = ii;
for (j = 1; j <= i__1; ++j) {
temp = z__[j] / delta[j];
psi += z__[j] * temp;
dpsi += temp * temp;
erretm += psi;
}
erretm = abs(erretm);
temp = z__[*n] / delta[*n];
phi = z__[*n] * temp;
dphi = temp * temp;
erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + dphi);
w = rhoinv + phi + psi;
if (abs(w) <= eps * erretm) {
*dlam = d__[*i__] + tau;
goto L250;
}
if (w <= 0.) {
dltlb = max(dltlb, tau);
} else {
dltub = min(dltub, tau);
}
++niter;
c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi;
a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * (dpsi + dphi);
b = delta[*n - 1] * delta[*n] * w;
if (c__ < 0.) {
c__ = abs(c__);
}
if (c__ == 0.) {
eta = -w / (dpsi + dphi);
} else if (a >= 0.) {
eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
} else {
eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
}
if (w * eta > 0.) {
eta = -w / (dpsi + dphi);
}
temp = tau + eta;
if (temp > dltub || temp < dltlb) {
if (w < 0.) {
eta = (dltub - tau) / 2.;
} else {
eta = (dltlb - tau) / 2.;
}
}
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
delta[j] -= eta;
}
tau += eta;
dpsi = 0.;
psi = 0.;
erretm = 0.;
i__1 = ii;
for (j = 1; j <= i__1; ++j) {
temp = z__[j] / delta[j];
psi += z__[j] * temp;
dpsi += temp * temp;
erretm += psi;
}
erretm = abs(erretm);
temp = z__[*n] / delta[*n];
phi = z__[*n] * temp;
dphi = temp * temp;
erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + dphi);
w = rhoinv + phi + psi;
iter = niter + 1;
for (niter = iter; niter <= 30; ++niter) {
if (abs(w) <= eps * erretm) {
*dlam = d__[*i__] + tau;
goto L250;
}
if (w <= 0.) {
dltlb = max(dltlb, tau);
} else {
dltub = min(dltub, tau);
}
c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi;
a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * (dpsi + dphi);
b = delta[*n - 1] * delta[*n] * w;
if (a >= 0.) {
eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
} else {
eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
}
if (w * eta > 0.) {
eta = -w / (dpsi + dphi);
}
temp = tau + eta;
if (temp > dltub || temp < dltlb) {
if (w < 0.) {
eta = (dltub - tau) / 2.;
} else {
eta = (dltlb - tau) / 2.;
}
}
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
delta[j] -= eta;
}
tau += eta;
dpsi = 0.;
psi = 0.;
erretm = 0.;
i__1 = ii;
for (j = 1; j <= i__1; ++j) {
temp = z__[j] / delta[j];
psi += z__[j] * temp;
dpsi += temp * temp;
erretm += psi;
}
erretm = abs(erretm);
temp = z__[*n] / delta[*n];
phi = z__[*n] * temp;
dphi = temp * temp;
erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + dphi);
w = rhoinv + phi + psi;
}
*info = 1;
*dlam = d__[*i__] + tau;
goto L250;
} else {
niter = 1;
ip1 = *i__ + 1;
del = d__[ip1] - d__[*i__];
midpt = del / 2.;
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
delta[j] = d__[j] - d__[*i__] - midpt;
}
psi = 0.;
i__1 = *i__ - 1;
for (j = 1; j <= i__1; ++j) {
psi += z__[j] * z__[j] / delta[j];
}
phi = 0.;
i__1 = *i__ + 2;
for (j = *n; j >= i__1; --j) {
phi += z__[j] * z__[j] / delta[j];
}
c__ = rhoinv + psi + phi;
w = c__ + z__[*i__] * z__[*i__] / delta[*i__] + z__[ip1] * z__[ip1] / delta[ip1];
if (w > 0.) {
orgati = TRUE_;
a = c__ * del + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1];
b = z__[*i__] * z__[*i__] * del;
if (a > 0.) {
tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
} else {
tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
}
dltlb = 0.;
dltub = midpt;
} else {
orgati = FALSE_;
a = c__ * del - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1];
b = z__[ip1] * z__[ip1] * del;
if (a < 0.) {
tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs(d__1))));
} else {
tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) / (c__ * 2.);
}
dltlb = -midpt;
dltub = 0.;
}
if (orgati) {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
delta[j] = d__[j] - d__[*i__] - tau;
}
} else {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
delta[j] = d__[j] - d__[ip1] - tau;
}
}
if (orgati) {
ii = *i__;
} else {
ii = *i__ + 1;
}
iim1 = ii - 1;
iip1 = ii + 1;
dpsi = 0.;
psi = 0.;
erretm = 0.;
i__1 = iim1;
for (j = 1; j <= i__1; ++j) {
temp = z__[j] / delta[j];
psi += z__[j] * temp;
dpsi += temp * temp;
erretm += psi;
}
erretm = abs(erretm);
dphi = 0.;
phi = 0.;
i__1 = iip1;
for (j = *n; j >= i__1; --j) {
temp = z__[j] / delta[j];
phi += z__[j] * temp;
dphi += temp * temp;
erretm += phi;
}
w = rhoinv + phi + psi;
swtch3 = FALSE_;
if (orgati) {
if (w < 0.) {
swtch3 = TRUE_;
}
} else {
if (w > 0.) {
swtch3 = TRUE_;
}
}
if (ii == 1 || ii == *n) {
swtch3 = FALSE_;
}
temp = z__[ii] / delta[ii];
dw = dpsi + dphi + temp * temp;
temp = z__[ii] * temp;
w += temp;
erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + abs(tau) * dw;
if (abs(w) <= eps * erretm) {
if (orgati) {
*dlam = d__[*i__] + tau;
} else {
*dlam = d__[ip1] + tau;
}
goto L250;
}
if (w <= 0.) {
dltlb = max(dltlb, tau);
} else {
dltub = min(dltub, tau);
}
++niter;
if (!swtch3) {
if (orgati) {
d__1 = z__[*i__] / delta[*i__];
c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (d__1 * d__1);
} else {
d__1 = z__[ip1] / delta[ip1];
c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (d__1 * d__1);
}
a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] * dw;
b = delta[*i__] * delta[ip1] * w;
if (c__ == 0.) {
if (a == 0.) {
if (orgati) {
a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] * (dpsi + dphi);
} else {
a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] * (dpsi + dphi);
}
}
eta = b / a;
} else if (a <= 0.) {
eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
} else {
eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
}
} else {
temp = rhoinv + psi + phi;
if (orgati) {
temp1 = z__[iim1] / delta[iim1];
temp1 *= temp1;
c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[iip1]) * temp1;
zz[0] = z__[iim1] * z__[iim1];
zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi);
} else {
temp1 = z__[iip1] / delta[iip1];
temp1 *= temp1;
c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[iim1]) * temp1;
zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1));
zz[2] = z__[iip1] * z__[iip1];
}
zz[1] = z__[ii] * z__[ii];
dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info);
if (*info != 0) {
goto L250;
}
}
if (w * eta >= 0.) {
eta = -w / dw;
}
temp = tau + eta;
if (temp > dltub || temp < dltlb) {
if (w < 0.) {
eta = (dltub - tau) / 2.;
} else {
eta = (dltlb - tau) / 2.;
}
}
prew = w;
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
delta[j] -= eta;
}
dpsi = 0.;
psi = 0.;
erretm = 0.;
i__1 = iim1;
for (j = 1; j <= i__1; ++j) {
temp = z__[j] / delta[j];
psi += z__[j] * temp;
dpsi += temp * temp;
erretm += psi;
}
erretm = abs(erretm);
dphi = 0.;
phi = 0.;
i__1 = iip1;
for (j = *n; j >= i__1; --j) {
temp = z__[j] / delta[j];
phi += z__[j] * temp;
dphi += temp * temp;
erretm += phi;
}
temp = z__[ii] / delta[ii];
dw = dpsi + dphi + temp * temp;
temp = z__[ii] * temp;
w = rhoinv + phi + psi + temp;
erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. +
(d__1 = tau + eta, abs(d__1)) * dw;
swtch = FALSE_;
if (orgati) {
if (-w > abs(prew) / 10.) {
swtch = TRUE_;
}
} else {
if (w > abs(prew) / 10.) {
swtch = TRUE_;
}
}
tau += eta;
iter = niter + 1;
for (niter = iter; niter <= 30; ++niter) {
if (abs(w) <= eps * erretm) {
if (orgati) {
*dlam = d__[*i__] + tau;
} else {
*dlam = d__[ip1] + tau;
}
goto L250;
}
if (w <= 0.) {
dltlb = max(dltlb, tau);
} else {
dltub = min(dltub, tau);
}
if (!swtch3) {
if (!swtch) {
if (orgati) {
d__1 = z__[*i__] / delta[*i__];
c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (d__1 * d__1);
} else {
d__1 = z__[ip1] / delta[ip1];
c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (d__1 * d__1);
}
} else {
temp = z__[ii] / delta[ii];
if (orgati) {
dpsi += temp * temp;
} else {
dphi += temp * temp;
}
c__ = w - delta[*i__] * dpsi - delta[ip1] * dphi;
}
a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] * dw;
b = delta[*i__] * delta[ip1] * w;
if (c__ == 0.) {
if (a == 0.) {
if (!swtch) {
if (orgati) {
a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] * (dpsi + dphi);
} else {
a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] * (dpsi + dphi);
}
} else {
a = delta[*i__] * delta[*i__] * dpsi + delta[ip1] * delta[ip1] * dphi;
}
}
eta = b / a;
} else if (a <= 0.) {
eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
} else {
eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
}
} else {
temp = rhoinv + psi + phi;
if (swtch) {
c__ = temp - delta[iim1] * dpsi - delta[iip1] * dphi;
zz[0] = delta[iim1] * delta[iim1] * dpsi;
zz[2] = delta[iip1] * delta[iip1] * dphi;
} else {
if (orgati) {
temp1 = z__[iim1] / delta[iim1];
temp1 *= temp1;
c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[iip1]) * temp1;
zz[0] = z__[iim1] * z__[iim1];
zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi);
} else {
temp1 = z__[iip1] / delta[iip1];
temp1 *= temp1;
c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[iim1]) * temp1;
zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1));
zz[2] = z__[iip1] * z__[iip1];
}
}
dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info);
if (*info != 0) {
goto L250;
}
}
if (w * eta >= 0.) {
eta = -w / dw;
}
temp = tau + eta;
if (temp > dltub || temp < dltlb) {
if (w < 0.) {
eta = (dltub - tau) / 2.;
} else {
eta = (dltlb - tau) / 2.;
}
}
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
delta[j] -= eta;
}
tau += eta;
prew = w;
dpsi = 0.;
psi = 0.;
erretm = 0.;
i__1 = iim1;
for (j = 1; j <= i__1; ++j) {
temp = z__[j] / delta[j];
psi += z__[j] * temp;
dpsi += temp * temp;
erretm += psi;
}
erretm = abs(erretm);
dphi = 0.;
phi = 0.;
i__1 = iip1;
for (j = *n; j >= i__1; --j) {
temp = z__[j] / delta[j];
phi += z__[j] * temp;
dphi += temp * temp;
erretm += phi;
}
temp = z__[ii] / delta[ii];
dw = dpsi + dphi + temp * temp;
temp = z__[ii] * temp;
w = rhoinv + phi + psi + temp;
erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + abs(tau) * dw;
if (w * prew > 0. && abs(w) > abs(prew) / 10.) {
swtch = !swtch;
}
}
*info = 1;
if (orgati) {
*dlam = d__[*i__] + tau;
} else {
*dlam = d__[ip1] + tau;
}
}
L250:
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,917 +0,0 @@
*> \brief \b DLAED4 used by DSTEDC. Finds a single root of the secular equation.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLAED4 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed4.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed4.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed4.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLAED4( N, I, D, Z, DELTA, RHO, DLAM, INFO )
*
* .. Scalar Arguments ..
* INTEGER I, INFO, N
* DOUBLE PRECISION DLAM, RHO
* ..
* .. Array Arguments ..
* DOUBLE PRECISION D( * ), DELTA( * ), Z( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> This subroutine computes the I-th updated eigenvalue of a symmetric
*> rank-one modification to a diagonal matrix whose elements are
*> given in the array d, and that
*>
*> D(i) < D(j) for i < j
*>
*> and that RHO > 0. This is arranged by the calling routine, and is
*> no loss in generality. The rank-one modified system is thus
*>
*> diag( D ) + RHO * Z * Z_transpose.
*>
*> where we assume the Euclidean norm of Z is 1.
*>
*> The method consists of approximating the rational functions in the
*> secular equation by simpler interpolating rational functions.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The length of all arrays.
*> \endverbatim
*>
*> \param[in] I
*> \verbatim
*> I is INTEGER
*> The index of the eigenvalue to be computed. 1 <= I <= N.
*> \endverbatim
*>
*> \param[in] D
*> \verbatim
*> D is DOUBLE PRECISION array, dimension (N)
*> The original eigenvalues. It is assumed that they are in
*> order, D(I) < D(J) for I < J.
*> \endverbatim
*>
*> \param[in] Z
*> \verbatim
*> Z is DOUBLE PRECISION array, dimension (N)
*> The components of the updating vector.
*> \endverbatim
*>
*> \param[out] DELTA
*> \verbatim
*> DELTA is DOUBLE PRECISION array, dimension (N)
*> If N > 2, DELTA contains (D(j) - lambda_I) in its j-th
*> component. If N = 1, then DELTA(1) = 1. If N = 2, see DLAED5
*> for detail. The vector DELTA contains the information necessary
*> to construct the eigenvectors by DLAED3 and DLAED9.
*> \endverbatim
*>
*> \param[in] RHO
*> \verbatim
*> RHO is DOUBLE PRECISION
*> The scalar in the symmetric updating formula.
*> \endverbatim
*>
*> \param[out] DLAM
*> \verbatim
*> DLAM is DOUBLE PRECISION
*> The computed lambda_I, the I-th updated eigenvalue.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> > 0: if INFO = 1, the updating process failed.
*> \endverbatim
*
*> \par Internal Parameters:
* =========================
*>
*> \verbatim
*> Logical variable ORGATI (origin-at-i?) is used for distinguishing
*> whether D(i) or D(i+1) is treated as the origin.
*>
*> ORGATI = .true. origin at i
*> ORGATI = .false. origin at i+1
*>
*> Logical variable SWTCH3 (switch-for-3-poles?) is for noting
*> if we are working with THREE poles!
*>
*> MAXIT is the maximum number of iterations allowed for each
*> eigenvalue.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup auxOTHERcomputational
*
*> \par Contributors:
* ==================
*>
*> Ren-Cang Li, Computer Science Division, University of California
*> at Berkeley, USA
*>
* =====================================================================
SUBROUTINE DLAED4( N, I, D, Z, DELTA, RHO, DLAM, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER I, INFO, N
DOUBLE PRECISION DLAM, RHO
* ..
* .. Array Arguments ..
DOUBLE PRECISION D( * ), DELTA( * ), Z( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
INTEGER MAXIT
PARAMETER ( MAXIT = 30 )
DOUBLE PRECISION ZERO, ONE, TWO, THREE, FOUR, EIGHT, TEN
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0,
$ THREE = 3.0D0, FOUR = 4.0D0, EIGHT = 8.0D0,
$ TEN = 10.0D0 )
* ..
* .. Local Scalars ..
LOGICAL ORGATI, SWTCH, SWTCH3
INTEGER II, IIM1, IIP1, IP1, ITER, J, NITER
DOUBLE PRECISION A, B, C, DEL, DLTLB, DLTUB, DPHI, DPSI, DW,
$ EPS, ERRETM, ETA, MIDPT, PHI, PREW, PSI,
$ RHOINV, TAU, TEMP, TEMP1, W
* ..
* .. Local Arrays ..
DOUBLE PRECISION ZZ( 3 )
* ..
* .. External Functions ..
DOUBLE PRECISION DLAMCH
EXTERNAL DLAMCH
* ..
* .. External Subroutines ..
EXTERNAL DLAED5, DLAED6
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, MAX, MIN, SQRT
* ..
* .. Executable Statements ..
*
* Since this routine is called in an inner loop, we do no argument
* checking.
*
* Quick return for N=1 and 2.
*
INFO = 0
IF( N.EQ.1 ) THEN
*
* Presumably, I=1 upon entry
*
DLAM = D( 1 ) + RHO*Z( 1 )*Z( 1 )
DELTA( 1 ) = ONE
RETURN
END IF
IF( N.EQ.2 ) THEN
CALL DLAED5( I, D, Z, DELTA, RHO, DLAM )
RETURN
END IF
*
* Compute machine epsilon
*
EPS = DLAMCH( 'Epsilon' )
RHOINV = ONE / RHO
*
* The case I = N
*
IF( I.EQ.N ) THEN
*
* Initialize some basic variables
*
II = N - 1
NITER = 1
*
* Calculate initial guess
*
MIDPT = RHO / TWO
*
* If ||Z||_2 is not one, then TEMP should be set to
* RHO * ||Z||_2^2 / TWO
*
DO 10 J = 1, N
DELTA( J ) = ( D( J )-D( I ) ) - MIDPT
10 CONTINUE
*
PSI = ZERO
DO 20 J = 1, N - 2
PSI = PSI + Z( J )*Z( J ) / DELTA( J )
20 CONTINUE
*
C = RHOINV + PSI
W = C + Z( II )*Z( II ) / DELTA( II ) +
$ Z( N )*Z( N ) / DELTA( N )
*
IF( W.LE.ZERO ) THEN
TEMP = Z( N-1 )*Z( N-1 ) / ( D( N )-D( N-1 )+RHO ) +
$ Z( N )*Z( N ) / RHO
IF( C.LE.TEMP ) THEN
TAU = RHO
ELSE
DEL = D( N ) - D( N-1 )
A = -C*DEL + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N )
B = Z( N )*Z( N )*DEL
IF( A.LT.ZERO ) THEN
TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A )
ELSE
TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C )
END IF
END IF
*
* It can be proved that
* D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO
*
DLTLB = MIDPT
DLTUB = RHO
ELSE
DEL = D( N ) - D( N-1 )
A = -C*DEL + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N )
B = Z( N )*Z( N )*DEL
IF( A.LT.ZERO ) THEN
TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A )
ELSE
TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C )
END IF
*
* It can be proved that
* D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2
*
DLTLB = ZERO
DLTUB = MIDPT
END IF
*
DO 30 J = 1, N
DELTA( J ) = ( D( J )-D( I ) ) - TAU
30 CONTINUE
*
* Evaluate PSI and the derivative DPSI
*
DPSI = ZERO
PSI = ZERO
ERRETM = ZERO
DO 40 J = 1, II
TEMP = Z( J ) / DELTA( J )
PSI = PSI + Z( J )*TEMP
DPSI = DPSI + TEMP*TEMP
ERRETM = ERRETM + PSI
40 CONTINUE
ERRETM = ABS( ERRETM )
*
* Evaluate PHI and the derivative DPHI
*
TEMP = Z( N ) / DELTA( N )
PHI = Z( N )*TEMP
DPHI = TEMP*TEMP
ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +
$ ABS( TAU )*( DPSI+DPHI )
*
W = RHOINV + PHI + PSI
*
* Test for convergence
*
IF( ABS( W ).LE.EPS*ERRETM ) THEN
DLAM = D( I ) + TAU
GO TO 250
END IF
*
IF( W.LE.ZERO ) THEN
DLTLB = MAX( DLTLB, TAU )
ELSE
DLTUB = MIN( DLTUB, TAU )
END IF
*
* Calculate the new step
*
NITER = NITER + 1
C = W - DELTA( N-1 )*DPSI - DELTA( N )*DPHI
A = ( DELTA( N-1 )+DELTA( N ) )*W -
$ DELTA( N-1 )*DELTA( N )*( DPSI+DPHI )
B = DELTA( N-1 )*DELTA( N )*W
IF( C.LT.ZERO )
$ C = ABS( C )
IF( C.EQ.ZERO ) THEN
* ETA = B/A
* ETA = RHO - TAU
* ETA = DLTUB - TAU
*
* Update proposed by Li, Ren-Cang:
ETA = -W / ( DPSI+DPHI )
ELSE IF( A.GE.ZERO ) THEN
ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
ELSE
ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) )
END IF
*
* Note, eta should be positive if w is negative, and
* eta should be negative otherwise. However,
* if for some reason caused by roundoff, eta*w > 0,
* we simply use one Newton step instead. This way
* will guarantee eta*w < 0.
*
IF( W*ETA.GT.ZERO )
$ ETA = -W / ( DPSI+DPHI )
TEMP = TAU + ETA
IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN
IF( W.LT.ZERO ) THEN
ETA = ( DLTUB-TAU ) / TWO
ELSE
ETA = ( DLTLB-TAU ) / TWO
END IF
END IF
DO 50 J = 1, N
DELTA( J ) = DELTA( J ) - ETA
50 CONTINUE
*
TAU = TAU + ETA
*
* Evaluate PSI and the derivative DPSI
*
DPSI = ZERO
PSI = ZERO
ERRETM = ZERO
DO 60 J = 1, II
TEMP = Z( J ) / DELTA( J )
PSI = PSI + Z( J )*TEMP
DPSI = DPSI + TEMP*TEMP
ERRETM = ERRETM + PSI
60 CONTINUE
ERRETM = ABS( ERRETM )
*
* Evaluate PHI and the derivative DPHI
*
TEMP = Z( N ) / DELTA( N )
PHI = Z( N )*TEMP
DPHI = TEMP*TEMP
ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +
$ ABS( TAU )*( DPSI+DPHI )
*
W = RHOINV + PHI + PSI
*
* Main loop to update the values of the array DELTA
*
ITER = NITER + 1
*
DO 90 NITER = ITER, MAXIT
*
* Test for convergence
*
IF( ABS( W ).LE.EPS*ERRETM ) THEN
DLAM = D( I ) + TAU
GO TO 250
END IF
*
IF( W.LE.ZERO ) THEN
DLTLB = MAX( DLTLB, TAU )
ELSE
DLTUB = MIN( DLTUB, TAU )
END IF
*
* Calculate the new step
*
C = W - DELTA( N-1 )*DPSI - DELTA( N )*DPHI
A = ( DELTA( N-1 )+DELTA( N ) )*W -
$ DELTA( N-1 )*DELTA( N )*( DPSI+DPHI )
B = DELTA( N-1 )*DELTA( N )*W
IF( A.GE.ZERO ) THEN
ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
ELSE
ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) )
END IF
*
* Note, eta should be positive if w is negative, and
* eta should be negative otherwise. However,
* if for some reason caused by roundoff, eta*w > 0,
* we simply use one Newton step instead. This way
* will guarantee eta*w < 0.
*
IF( W*ETA.GT.ZERO )
$ ETA = -W / ( DPSI+DPHI )
TEMP = TAU + ETA
IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN
IF( W.LT.ZERO ) THEN
ETA = ( DLTUB-TAU ) / TWO
ELSE
ETA = ( DLTLB-TAU ) / TWO
END IF
END IF
DO 70 J = 1, N
DELTA( J ) = DELTA( J ) - ETA
70 CONTINUE
*
TAU = TAU + ETA
*
* Evaluate PSI and the derivative DPSI
*
DPSI = ZERO
PSI = ZERO
ERRETM = ZERO
DO 80 J = 1, II
TEMP = Z( J ) / DELTA( J )
PSI = PSI + Z( J )*TEMP
DPSI = DPSI + TEMP*TEMP
ERRETM = ERRETM + PSI
80 CONTINUE
ERRETM = ABS( ERRETM )
*
* Evaluate PHI and the derivative DPHI
*
TEMP = Z( N ) / DELTA( N )
PHI = Z( N )*TEMP
DPHI = TEMP*TEMP
ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +
$ ABS( TAU )*( DPSI+DPHI )
*
W = RHOINV + PHI + PSI
90 CONTINUE
*
* Return with INFO = 1, NITER = MAXIT and not converged
*
INFO = 1
DLAM = D( I ) + TAU
GO TO 250
*
* End for the case I = N
*
ELSE
*
* The case for I < N
*
NITER = 1
IP1 = I + 1
*
* Calculate initial guess
*
DEL = D( IP1 ) - D( I )
MIDPT = DEL / TWO
DO 100 J = 1, N
DELTA( J ) = ( D( J )-D( I ) ) - MIDPT
100 CONTINUE
*
PSI = ZERO
DO 110 J = 1, I - 1
PSI = PSI + Z( J )*Z( J ) / DELTA( J )
110 CONTINUE
*
PHI = ZERO
DO 120 J = N, I + 2, -1
PHI = PHI + Z( J )*Z( J ) / DELTA( J )
120 CONTINUE
C = RHOINV + PSI + PHI
W = C + Z( I )*Z( I ) / DELTA( I ) +
$ Z( IP1 )*Z( IP1 ) / DELTA( IP1 )
*
IF( W.GT.ZERO ) THEN
*
* d(i)< the ith eigenvalue < (d(i)+d(i+1))/2
*
* We choose d(i) as origin.
*
ORGATI = .TRUE.
A = C*DEL + Z( I )*Z( I ) + Z( IP1 )*Z( IP1 )
B = Z( I )*Z( I )*DEL
IF( A.GT.ZERO ) THEN
TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
ELSE
TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
END IF
DLTLB = ZERO
DLTUB = MIDPT
ELSE
*
* (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1)
*
* We choose d(i+1) as origin.
*
ORGATI = .FALSE.
A = C*DEL - Z( I )*Z( I ) - Z( IP1 )*Z( IP1 )
B = Z( IP1 )*Z( IP1 )*DEL
IF( A.LT.ZERO ) THEN
TAU = TWO*B / ( A-SQRT( ABS( A*A+FOUR*B*C ) ) )
ELSE
TAU = -( A+SQRT( ABS( A*A+FOUR*B*C ) ) ) / ( TWO*C )
END IF
DLTLB = -MIDPT
DLTUB = ZERO
END IF
*
IF( ORGATI ) THEN
DO 130 J = 1, N
DELTA( J ) = ( D( J )-D( I ) ) - TAU
130 CONTINUE
ELSE
DO 140 J = 1, N
DELTA( J ) = ( D( J )-D( IP1 ) ) - TAU
140 CONTINUE
END IF
IF( ORGATI ) THEN
II = I
ELSE
II = I + 1
END IF
IIM1 = II - 1
IIP1 = II + 1
*
* Evaluate PSI and the derivative DPSI
*
DPSI = ZERO
PSI = ZERO
ERRETM = ZERO
DO 150 J = 1, IIM1
TEMP = Z( J ) / DELTA( J )
PSI = PSI + Z( J )*TEMP
DPSI = DPSI + TEMP*TEMP
ERRETM = ERRETM + PSI
150 CONTINUE
ERRETM = ABS( ERRETM )
*
* Evaluate PHI and the derivative DPHI
*
DPHI = ZERO
PHI = ZERO
DO 160 J = N, IIP1, -1
TEMP = Z( J ) / DELTA( J )
PHI = PHI + Z( J )*TEMP
DPHI = DPHI + TEMP*TEMP
ERRETM = ERRETM + PHI
160 CONTINUE
*
W = RHOINV + PHI + PSI
*
* W is the value of the secular function with
* its ii-th element removed.
*
SWTCH3 = .FALSE.
IF( ORGATI ) THEN
IF( W.LT.ZERO )
$ SWTCH3 = .TRUE.
ELSE
IF( W.GT.ZERO )
$ SWTCH3 = .TRUE.
END IF
IF( II.EQ.1 .OR. II.EQ.N )
$ SWTCH3 = .FALSE.
*
TEMP = Z( II ) / DELTA( II )
DW = DPSI + DPHI + TEMP*TEMP
TEMP = Z( II )*TEMP
W = W + TEMP
ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV +
$ THREE*ABS( TEMP ) + ABS( TAU )*DW
*
* Test for convergence
*
IF( ABS( W ).LE.EPS*ERRETM ) THEN
IF( ORGATI ) THEN
DLAM = D( I ) + TAU
ELSE
DLAM = D( IP1 ) + TAU
END IF
GO TO 250
END IF
*
IF( W.LE.ZERO ) THEN
DLTLB = MAX( DLTLB, TAU )
ELSE
DLTUB = MIN( DLTUB, TAU )
END IF
*
* Calculate the new step
*
NITER = NITER + 1
IF( .NOT.SWTCH3 ) THEN
IF( ORGATI ) THEN
C = W - DELTA( IP1 )*DW - ( D( I )-D( IP1 ) )*
$ ( Z( I ) / DELTA( I ) )**2
ELSE
C = W - DELTA( I )*DW - ( D( IP1 )-D( I ) )*
$ ( Z( IP1 ) / DELTA( IP1 ) )**2
END IF
A = ( DELTA( I )+DELTA( IP1 ) )*W -
$ DELTA( I )*DELTA( IP1 )*DW
B = DELTA( I )*DELTA( IP1 )*W
IF( C.EQ.ZERO ) THEN
IF( A.EQ.ZERO ) THEN
IF( ORGATI ) THEN
A = Z( I )*Z( I ) + DELTA( IP1 )*DELTA( IP1 )*
$ ( DPSI+DPHI )
ELSE
A = Z( IP1 )*Z( IP1 ) + DELTA( I )*DELTA( I )*
$ ( DPSI+DPHI )
END IF
END IF
ETA = B / A
ELSE IF( A.LE.ZERO ) THEN
ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
ELSE
ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
END IF
ELSE
*
* Interpolation using THREE most relevant poles
*
TEMP = RHOINV + PSI + PHI
IF( ORGATI ) THEN
TEMP1 = Z( IIM1 ) / DELTA( IIM1 )
TEMP1 = TEMP1*TEMP1
C = TEMP - DELTA( IIP1 )*( DPSI+DPHI ) -
$ ( D( IIM1 )-D( IIP1 ) )*TEMP1
ZZ( 1 ) = Z( IIM1 )*Z( IIM1 )
ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )*
$ ( ( DPSI-TEMP1 )+DPHI )
ELSE
TEMP1 = Z( IIP1 ) / DELTA( IIP1 )
TEMP1 = TEMP1*TEMP1
C = TEMP - DELTA( IIM1 )*( DPSI+DPHI ) -
$ ( D( IIP1 )-D( IIM1 ) )*TEMP1
ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )*
$ ( DPSI+( DPHI-TEMP1 ) )
ZZ( 3 ) = Z( IIP1 )*Z( IIP1 )
END IF
ZZ( 2 ) = Z( II )*Z( II )
CALL DLAED6( NITER, ORGATI, C, DELTA( IIM1 ), ZZ, W, ETA,
$ INFO )
IF( INFO.NE.0 )
$ GO TO 250
END IF
*
* Note, eta should be positive if w is negative, and
* eta should be negative otherwise. However,
* if for some reason caused by roundoff, eta*w > 0,
* we simply use one Newton step instead. This way
* will guarantee eta*w < 0.
*
IF( W*ETA.GE.ZERO )
$ ETA = -W / DW
TEMP = TAU + ETA
IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN
IF( W.LT.ZERO ) THEN
ETA = ( DLTUB-TAU ) / TWO
ELSE
ETA = ( DLTLB-TAU ) / TWO
END IF
END IF
*
PREW = W
*
DO 180 J = 1, N
DELTA( J ) = DELTA( J ) - ETA
180 CONTINUE
*
* Evaluate PSI and the derivative DPSI
*
DPSI = ZERO
PSI = ZERO
ERRETM = ZERO
DO 190 J = 1, IIM1
TEMP = Z( J ) / DELTA( J )
PSI = PSI + Z( J )*TEMP
DPSI = DPSI + TEMP*TEMP
ERRETM = ERRETM + PSI
190 CONTINUE
ERRETM = ABS( ERRETM )
*
* Evaluate PHI and the derivative DPHI
*
DPHI = ZERO
PHI = ZERO
DO 200 J = N, IIP1, -1
TEMP = Z( J ) / DELTA( J )
PHI = PHI + Z( J )*TEMP
DPHI = DPHI + TEMP*TEMP
ERRETM = ERRETM + PHI
200 CONTINUE
*
TEMP = Z( II ) / DELTA( II )
DW = DPSI + DPHI + TEMP*TEMP
TEMP = Z( II )*TEMP
W = RHOINV + PHI + PSI + TEMP
ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV +
$ THREE*ABS( TEMP ) + ABS( TAU+ETA )*DW
*
SWTCH = .FALSE.
IF( ORGATI ) THEN
IF( -W.GT.ABS( PREW ) / TEN )
$ SWTCH = .TRUE.
ELSE
IF( W.GT.ABS( PREW ) / TEN )
$ SWTCH = .TRUE.
END IF
*
TAU = TAU + ETA
*
* Main loop to update the values of the array DELTA
*
ITER = NITER + 1
*
DO 240 NITER = ITER, MAXIT
*
* Test for convergence
*
IF( ABS( W ).LE.EPS*ERRETM ) THEN
IF( ORGATI ) THEN
DLAM = D( I ) + TAU
ELSE
DLAM = D( IP1 ) + TAU
END IF
GO TO 250
END IF
*
IF( W.LE.ZERO ) THEN
DLTLB = MAX( DLTLB, TAU )
ELSE
DLTUB = MIN( DLTUB, TAU )
END IF
*
* Calculate the new step
*
IF( .NOT.SWTCH3 ) THEN
IF( .NOT.SWTCH ) THEN
IF( ORGATI ) THEN
C = W - DELTA( IP1 )*DW -
$ ( D( I )-D( IP1 ) )*( Z( I ) / DELTA( I ) )**2
ELSE
C = W - DELTA( I )*DW - ( D( IP1 )-D( I ) )*
$ ( Z( IP1 ) / DELTA( IP1 ) )**2
END IF
ELSE
TEMP = Z( II ) / DELTA( II )
IF( ORGATI ) THEN
DPSI = DPSI + TEMP*TEMP
ELSE
DPHI = DPHI + TEMP*TEMP
END IF
C = W - DELTA( I )*DPSI - DELTA( IP1 )*DPHI
END IF
A = ( DELTA( I )+DELTA( IP1 ) )*W -
$ DELTA( I )*DELTA( IP1 )*DW
B = DELTA( I )*DELTA( IP1 )*W
IF( C.EQ.ZERO ) THEN
IF( A.EQ.ZERO ) THEN
IF( .NOT.SWTCH ) THEN
IF( ORGATI ) THEN
A = Z( I )*Z( I ) + DELTA( IP1 )*
$ DELTA( IP1 )*( DPSI+DPHI )
ELSE
A = Z( IP1 )*Z( IP1 ) +
$ DELTA( I )*DELTA( I )*( DPSI+DPHI )
END IF
ELSE
A = DELTA( I )*DELTA( I )*DPSI +
$ DELTA( IP1 )*DELTA( IP1 )*DPHI
END IF
END IF
ETA = B / A
ELSE IF( A.LE.ZERO ) THEN
ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
ELSE
ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
END IF
ELSE
*
* Interpolation using THREE most relevant poles
*
TEMP = RHOINV + PSI + PHI
IF( SWTCH ) THEN
C = TEMP - DELTA( IIM1 )*DPSI - DELTA( IIP1 )*DPHI
ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )*DPSI
ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )*DPHI
ELSE
IF( ORGATI ) THEN
TEMP1 = Z( IIM1 ) / DELTA( IIM1 )
TEMP1 = TEMP1*TEMP1
C = TEMP - DELTA( IIP1 )*( DPSI+DPHI ) -
$ ( D( IIM1 )-D( IIP1 ) )*TEMP1
ZZ( 1 ) = Z( IIM1 )*Z( IIM1 )
ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )*
$ ( ( DPSI-TEMP1 )+DPHI )
ELSE
TEMP1 = Z( IIP1 ) / DELTA( IIP1 )
TEMP1 = TEMP1*TEMP1
C = TEMP - DELTA( IIM1 )*( DPSI+DPHI ) -
$ ( D( IIP1 )-D( IIM1 ) )*TEMP1
ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )*
$ ( DPSI+( DPHI-TEMP1 ) )
ZZ( 3 ) = Z( IIP1 )*Z( IIP1 )
END IF
END IF
CALL DLAED6( NITER, ORGATI, C, DELTA( IIM1 ), ZZ, W, ETA,
$ INFO )
IF( INFO.NE.0 )
$ GO TO 250
END IF
*
* Note, eta should be positive if w is negative, and
* eta should be negative otherwise. However,
* if for some reason caused by roundoff, eta*w > 0,
* we simply use one Newton step instead. This way
* will guarantee eta*w < 0.
*
IF( W*ETA.GE.ZERO )
$ ETA = -W / DW
TEMP = TAU + ETA
IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN
IF( W.LT.ZERO ) THEN
ETA = ( DLTUB-TAU ) / TWO
ELSE
ETA = ( DLTLB-TAU ) / TWO
END IF
END IF
*
DO 210 J = 1, N
DELTA( J ) = DELTA( J ) - ETA
210 CONTINUE
*
TAU = TAU + ETA
PREW = W
*
* Evaluate PSI and the derivative DPSI
*
DPSI = ZERO
PSI = ZERO
ERRETM = ZERO
DO 220 J = 1, IIM1
TEMP = Z( J ) / DELTA( J )
PSI = PSI + Z( J )*TEMP
DPSI = DPSI + TEMP*TEMP
ERRETM = ERRETM + PSI
220 CONTINUE
ERRETM = ABS( ERRETM )
*
* Evaluate PHI and the derivative DPHI
*
DPHI = ZERO
PHI = ZERO
DO 230 J = N, IIP1, -1
TEMP = Z( J ) / DELTA( J )
PHI = PHI + Z( J )*TEMP
DPHI = DPHI + TEMP*TEMP
ERRETM = ERRETM + PHI
230 CONTINUE
*
TEMP = Z( II ) / DELTA( II )
DW = DPSI + DPHI + TEMP*TEMP
TEMP = Z( II )*TEMP
W = RHOINV + PHI + PSI + TEMP
ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV +
$ THREE*ABS( TEMP ) + ABS( TAU )*DW
IF( W*PREW.GT.ZERO .AND. ABS( W ).GT.ABS( PREW ) / TEN )
$ SWTCH = .NOT.SWTCH
*
240 CONTINUE
*
* Return with INFO = 1, NITER = MAXIT and not converged
*
INFO = 1
IF( ORGATI ) THEN
DLAM = D( I ) + TAU
ELSE
DLAM = D( IP1 ) + TAU
END IF
*
END IF
*
250 CONTINUE
*
RETURN
*
* End of DLAED4
*
END

58
lib/linalg/dlaed5.cpp Normal file
View File

@ -0,0 +1,58 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dlaed5_(integer *i__, doublereal *d__, doublereal *z__, doublereal *delta, doublereal *rho,
doublereal *dlam)
{
doublereal d__1;
double sqrt(doublereal);
doublereal b, c__, w, del, tau, temp;
--delta;
--z__;
--d__;
del = d__[2] - d__[1];
if (*i__ == 1) {
w = *rho * 2. * (z__[2] * z__[2] - z__[1] * z__[1]) / del + 1.;
if (w > 0.) {
b = del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
c__ = *rho * z__[1] * z__[1] * del;
tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1))));
*dlam = d__[1] + tau;
delta[1] = -z__[1] / tau;
delta[2] = z__[2] / (del - tau);
} else {
b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
c__ = *rho * z__[2] * z__[2] * del;
if (b > 0.) {
tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.));
} else {
tau = (b - sqrt(b * b + c__ * 4.)) / 2.;
}
*dlam = d__[2] + tau;
delta[1] = -z__[1] / (del + tau);
delta[2] = -z__[2] / tau;
}
temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
delta[1] /= temp;
delta[2] /= temp;
} else {
b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
c__ = *rho * z__[2] * z__[2] * del;
if (b > 0.) {
tau = (b + sqrt(b * b + c__ * 4.)) / 2.;
} else {
tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.));
}
*dlam = d__[2] + tau;
delta[1] = -z__[1] / (del + tau);
delta[2] = -z__[2] / tau;
temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
delta[1] /= temp;
delta[2] /= temp;
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,186 +0,0 @@
*> \brief \b DLAED5 used by DSTEDC. Solves the 2-by-2 secular equation.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLAED5 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed5.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed5.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed5.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLAED5( I, D, Z, DELTA, RHO, DLAM )
*
* .. Scalar Arguments ..
* INTEGER I
* DOUBLE PRECISION DLAM, RHO
* ..
* .. Array Arguments ..
* DOUBLE PRECISION D( 2 ), DELTA( 2 ), Z( 2 )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> This subroutine computes the I-th eigenvalue of a symmetric rank-one
*> modification of a 2-by-2 diagonal matrix
*>
*> diag( D ) + RHO * Z * transpose(Z) .
*>
*> The diagonal elements in the array D are assumed to satisfy
*>
*> D(i) < D(j) for i < j .
*>
*> We also assume RHO > 0 and that the Euclidean norm of the vector
*> Z is one.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] I
*> \verbatim
*> I is INTEGER
*> The index of the eigenvalue to be computed. I = 1 or I = 2.
*> \endverbatim
*>
*> \param[in] D
*> \verbatim
*> D is DOUBLE PRECISION array, dimension (2)
*> The original eigenvalues. We assume D(1) < D(2).
*> \endverbatim
*>
*> \param[in] Z
*> \verbatim
*> Z is DOUBLE PRECISION array, dimension (2)
*> The components of the updating vector.
*> \endverbatim
*>
*> \param[out] DELTA
*> \verbatim
*> DELTA is DOUBLE PRECISION array, dimension (2)
*> The vector DELTA contains the information necessary
*> to construct the eigenvectors.
*> \endverbatim
*>
*> \param[in] RHO
*> \verbatim
*> RHO is DOUBLE PRECISION
*> The scalar in the symmetric updating formula.
*> \endverbatim
*>
*> \param[out] DLAM
*> \verbatim
*> DLAM is DOUBLE PRECISION
*> The computed lambda_I, the I-th updated eigenvalue.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup auxOTHERcomputational
*
*> \par Contributors:
* ==================
*>
*> Ren-Cang Li, Computer Science Division, University of California
*> at Berkeley, USA
*>
* =====================================================================
SUBROUTINE DLAED5( I, D, Z, DELTA, RHO, DLAM )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER I
DOUBLE PRECISION DLAM, RHO
* ..
* .. Array Arguments ..
DOUBLE PRECISION D( 2 ), DELTA( 2 ), Z( 2 )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO, ONE, TWO, FOUR
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0,
$ FOUR = 4.0D0 )
* ..
* .. Local Scalars ..
DOUBLE PRECISION B, C, DEL, TAU, TEMP, W
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, SQRT
* ..
* .. Executable Statements ..
*
DEL = D( 2 ) - D( 1 )
IF( I.EQ.1 ) THEN
W = ONE + TWO*RHO*( Z( 2 )*Z( 2 )-Z( 1 )*Z( 1 ) ) / DEL
IF( W.GT.ZERO ) THEN
B = DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
C = RHO*Z( 1 )*Z( 1 )*DEL
*
* B > ZERO, always
*
TAU = TWO*C / ( B+SQRT( ABS( B*B-FOUR*C ) ) )
DLAM = D( 1 ) + TAU
DELTA( 1 ) = -Z( 1 ) / TAU
DELTA( 2 ) = Z( 2 ) / ( DEL-TAU )
ELSE
B = -DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
C = RHO*Z( 2 )*Z( 2 )*DEL
IF( B.GT.ZERO ) THEN
TAU = -TWO*C / ( B+SQRT( B*B+FOUR*C ) )
ELSE
TAU = ( B-SQRT( B*B+FOUR*C ) ) / TWO
END IF
DLAM = D( 2 ) + TAU
DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
DELTA( 2 ) = -Z( 2 ) / TAU
END IF
TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
DELTA( 1 ) = DELTA( 1 ) / TEMP
DELTA( 2 ) = DELTA( 2 ) / TEMP
ELSE
*
* Now I=2
*
B = -DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
C = RHO*Z( 2 )*Z( 2 )*DEL
IF( B.GT.ZERO ) THEN
TAU = ( B+SQRT( B*B+FOUR*C ) ) / TWO
ELSE
TAU = TWO*C / ( -B+SQRT( B*B+FOUR*C ) )
END IF
DLAM = D( 2 ) + TAU
DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
DELTA( 2 ) = -Z( 2 ) / TAU
TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
DELTA( 1 ) = DELTA( 1 ) / TEMP
DELTA( 2 ) = DELTA( 2 ) / TEMP
END IF
RETURN
*
* End of DLAED5
*
END

209
lib/linalg/dlaed6.cpp Normal file
View File

@ -0,0 +1,209 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dlaed6_(integer *kniter, logical *orgati, doublereal *rho, doublereal *d__, doublereal *z__,
doublereal *finit, doublereal *tau, integer *info)
{
integer i__1;
doublereal d__1, d__2, d__3, d__4;
double sqrt(doublereal), log(doublereal), pow_lmp_di(doublereal *, integer *);
doublereal a, b, c__, f;
integer i__;
doublereal fc, df, ddf, lbd, eta, ubd, eps, base;
integer iter;
doublereal temp, temp1, temp2, temp3, temp4;
logical scale;
integer niter;
doublereal small1, small2, sminv1, sminv2;
extern doublereal dlamch_(char *, ftnlen);
doublereal dscale[3], sclfac, zscale[3], erretm, sclinv;
--z__;
--d__;
*info = 0;
if (*orgati) {
lbd = d__[2];
ubd = d__[3];
} else {
lbd = d__[1];
ubd = d__[2];
}
if (*finit < 0.) {
lbd = 0.;
} else {
ubd = 0.;
}
niter = 1;
*tau = 0.;
if (*kniter == 2) {
if (*orgati) {
temp = (d__[3] - d__[2]) / 2.;
c__ = *rho + z__[1] / (d__[1] - d__[2] - temp);
a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3];
b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2];
} else {
temp = (d__[1] - d__[2]) / 2.;
c__ = *rho + z__[3] / (d__[3] - d__[2] - temp);
a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2];
b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1];
}
d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1, d__2), d__2 = abs(c__);
temp = max(d__1, d__2);
a /= temp;
b /= temp;
c__ /= temp;
if (c__ == 0.) {
*tau = b / a;
} else if (a <= 0.) {
*tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
} else {
*tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
}
if (*tau < lbd || *tau > ubd) {
*tau = (lbd + ubd) / 2.;
}
if (d__[1] == *tau || d__[2] == *tau || d__[3] == *tau) {
*tau = 0.;
} else {
temp = *finit + *tau * z__[1] / (d__[1] * (d__[1] - *tau)) +
*tau * z__[2] / (d__[2] * (d__[2] - *tau)) +
*tau * z__[3] / (d__[3] * (d__[3] - *tau));
if (temp <= 0.) {
lbd = *tau;
} else {
ubd = *tau;
}
if (abs(*finit) <= abs(temp)) {
*tau = 0.;
}
}
}
eps = dlamch_((char *)"Epsilon", (ftnlen)7);
base = dlamch_((char *)"Base", (ftnlen)4);
i__1 = (integer)(log(dlamch_((char *)"SafMin", (ftnlen)6)) / log(base) / 3.);
small1 = pow_lmp_di(&base, &i__1);
sminv1 = 1. / small1;
small2 = small1 * small1;
sminv2 = sminv1 * sminv1;
if (*orgati) {
d__3 = (d__1 = d__[2] - *tau, abs(d__1)), d__4 = (d__2 = d__[3] - *tau, abs(d__2));
temp = min(d__3, d__4);
} else {
d__3 = (d__1 = d__[1] - *tau, abs(d__1)), d__4 = (d__2 = d__[2] - *tau, abs(d__2));
temp = min(d__3, d__4);
}
scale = FALSE_;
if (temp <= small1) {
scale = TRUE_;
if (temp <= small2) {
sclfac = sminv2;
sclinv = small2;
} else {
sclfac = sminv1;
sclinv = small1;
}
for (i__ = 1; i__ <= 3; ++i__) {
dscale[i__ - 1] = d__[i__] * sclfac;
zscale[i__ - 1] = z__[i__] * sclfac;
}
*tau *= sclfac;
lbd *= sclfac;
ubd *= sclfac;
} else {
for (i__ = 1; i__ <= 3; ++i__) {
dscale[i__ - 1] = d__[i__];
zscale[i__ - 1] = z__[i__];
}
}
fc = 0.;
df = 0.;
ddf = 0.;
for (i__ = 1; i__ <= 3; ++i__) {
temp = 1. / (dscale[i__ - 1] - *tau);
temp1 = zscale[i__ - 1] * temp;
temp2 = temp1 * temp;
temp3 = temp2 * temp;
fc += temp1 / dscale[i__ - 1];
df += temp2;
ddf += temp3;
}
f = *finit + *tau * fc;
if (abs(f) <= 0.) {
goto L60;
}
if (f <= 0.) {
lbd = *tau;
} else {
ubd = *tau;
}
iter = niter + 1;
for (niter = iter; niter <= 40; ++niter) {
if (*orgati) {
temp1 = dscale[1] - *tau;
temp2 = dscale[2] - *tau;
} else {
temp1 = dscale[0] - *tau;
temp2 = dscale[1] - *tau;
}
a = (temp1 + temp2) * f - temp1 * temp2 * df;
b = temp1 * temp2 * f;
c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf;
d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1, d__2), d__2 = abs(c__);
temp = max(d__1, d__2);
a /= temp;
b /= temp;
c__ /= temp;
if (c__ == 0.) {
eta = b / a;
} else if (a <= 0.) {
eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
} else {
eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
}
if (f * eta >= 0.) {
eta = -f / df;
}
*tau += eta;
if (*tau < lbd || *tau > ubd) {
*tau = (lbd + ubd) / 2.;
}
fc = 0.;
erretm = 0.;
df = 0.;
ddf = 0.;
for (i__ = 1; i__ <= 3; ++i__) {
if (dscale[i__ - 1] - *tau != 0.) {
temp = 1. / (dscale[i__ - 1] - *tau);
temp1 = zscale[i__ - 1] * temp;
temp2 = temp1 * temp;
temp3 = temp2 * temp;
temp4 = temp1 / dscale[i__ - 1];
fc += temp4;
erretm += abs(temp4);
df += temp2;
ddf += temp3;
} else {
goto L60;
}
}
f = *finit + *tau * fc;
erretm = (abs(*finit) + abs(*tau) * erretm) * 8. + abs(*tau) * df;
if (abs(f) <= eps * 4. * erretm || ubd - lbd <= eps * 4. * abs(*tau)) {
goto L60;
}
if (f <= 0.) {
lbd = *tau;
} else {
ubd = *tau;
}
}
*info = 1;
L60:
if (scale) {
*tau *= sclinv;
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,407 +0,0 @@
*> \brief \b DLAED6 used by DSTEDC. Computes one Newton step in solution of the secular equation.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLAED6 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed6.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed6.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed6.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLAED6( KNITER, ORGATI, RHO, D, Z, FINIT, TAU, INFO )
*
* .. Scalar Arguments ..
* LOGICAL ORGATI
* INTEGER INFO, KNITER
* DOUBLE PRECISION FINIT, RHO, TAU
* ..
* .. Array Arguments ..
* DOUBLE PRECISION D( 3 ), Z( 3 )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DLAED6 computes the positive or negative root (closest to the origin)
*> of
*> z(1) z(2) z(3)
*> f(x) = rho + --------- + ---------- + ---------
*> d(1)-x d(2)-x d(3)-x
*>
*> It is assumed that
*>
*> if ORGATI = .true. the root is between d(2) and d(3);
*> otherwise it is between d(1) and d(2)
*>
*> This routine will be called by DLAED4 when necessary. In most cases,
*> the root sought is the smallest in magnitude, though it might not be
*> in some extremely rare situations.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] KNITER
*> \verbatim
*> KNITER is INTEGER
*> Refer to DLAED4 for its significance.
*> \endverbatim
*>
*> \param[in] ORGATI
*> \verbatim
*> ORGATI is LOGICAL
*> If ORGATI is true, the needed root is between d(2) and
*> d(3); otherwise it is between d(1) and d(2). See
*> DLAED4 for further details.
*> \endverbatim
*>
*> \param[in] RHO
*> \verbatim
*> RHO is DOUBLE PRECISION
*> Refer to the equation f(x) above.
*> \endverbatim
*>
*> \param[in] D
*> \verbatim
*> D is DOUBLE PRECISION array, dimension (3)
*> D satisfies d(1) < d(2) < d(3).
*> \endverbatim
*>
*> \param[in] Z
*> \verbatim
*> Z is DOUBLE PRECISION array, dimension (3)
*> Each of the elements in z must be positive.
*> \endverbatim
*>
*> \param[in] FINIT
*> \verbatim
*> FINIT is DOUBLE PRECISION
*> The value of f at 0. It is more accurate than the one
*> evaluated inside this routine (if someone wants to do
*> so).
*> \endverbatim
*>
*> \param[out] TAU
*> \verbatim
*> TAU is DOUBLE PRECISION
*> The root of the equation f(x).
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> > 0: if INFO = 1, failure to converge
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup auxOTHERcomputational
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> 10/02/03: This version has a few statements commented out for thread
*> safety (machine parameters are computed on each entry). SJH.
*>
*> 05/10/06: Modified from a new version of Ren-Cang Li, use
*> Gragg-Thornton-Warner cubic convergent scheme for better stability.
*> \endverbatim
*
*> \par Contributors:
* ==================
*>
*> Ren-Cang Li, Computer Science Division, University of California
*> at Berkeley, USA
*>
* =====================================================================
SUBROUTINE DLAED6( KNITER, ORGATI, RHO, D, Z, FINIT, TAU, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
LOGICAL ORGATI
INTEGER INFO, KNITER
DOUBLE PRECISION FINIT, RHO, TAU
* ..
* .. Array Arguments ..
DOUBLE PRECISION D( 3 ), Z( 3 )
* ..
*
* =====================================================================
*
* .. Parameters ..
INTEGER MAXIT
PARAMETER ( MAXIT = 40 )
DOUBLE PRECISION ZERO, ONE, TWO, THREE, FOUR, EIGHT
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0,
$ THREE = 3.0D0, FOUR = 4.0D0, EIGHT = 8.0D0 )
* ..
* .. External Functions ..
DOUBLE PRECISION DLAMCH
EXTERNAL DLAMCH
* ..
* .. Local Arrays ..
DOUBLE PRECISION DSCALE( 3 ), ZSCALE( 3 )
* ..
* .. Local Scalars ..
LOGICAL SCALE
INTEGER I, ITER, NITER
DOUBLE PRECISION A, B, BASE, C, DDF, DF, EPS, ERRETM, ETA, F,
$ FC, SCLFAC, SCLINV, SMALL1, SMALL2, SMINV1,
$ SMINV2, TEMP, TEMP1, TEMP2, TEMP3, TEMP4,
$ LBD, UBD
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, INT, LOG, MAX, MIN, SQRT
* ..
* .. Executable Statements ..
*
INFO = 0
*
IF( ORGATI ) THEN
LBD = D(2)
UBD = D(3)
ELSE
LBD = D(1)
UBD = D(2)
END IF
IF( FINIT .LT. ZERO )THEN
LBD = ZERO
ELSE
UBD = ZERO
END IF
*
NITER = 1
TAU = ZERO
IF( KNITER.EQ.2 ) THEN
IF( ORGATI ) THEN
TEMP = ( D( 3 )-D( 2 ) ) / TWO
C = RHO + Z( 1 ) / ( ( D( 1 )-D( 2 ) )-TEMP )
A = C*( D( 2 )+D( 3 ) ) + Z( 2 ) + Z( 3 )
B = C*D( 2 )*D( 3 ) + Z( 2 )*D( 3 ) + Z( 3 )*D( 2 )
ELSE
TEMP = ( D( 1 )-D( 2 ) ) / TWO
C = RHO + Z( 3 ) / ( ( D( 3 )-D( 2 ) )-TEMP )
A = C*( D( 1 )+D( 2 ) ) + Z( 1 ) + Z( 2 )
B = C*D( 1 )*D( 2 ) + Z( 1 )*D( 2 ) + Z( 2 )*D( 1 )
END IF
TEMP = MAX( ABS( A ), ABS( B ), ABS( C ) )
A = A / TEMP
B = B / TEMP
C = C / TEMP
IF( C.EQ.ZERO ) THEN
TAU = B / A
ELSE IF( A.LE.ZERO ) THEN
TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
ELSE
TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
END IF
IF( TAU .LT. LBD .OR. TAU .GT. UBD )
$ TAU = ( LBD+UBD )/TWO
IF( D(1).EQ.TAU .OR. D(2).EQ.TAU .OR. D(3).EQ.TAU ) THEN
TAU = ZERO
ELSE
TEMP = FINIT + TAU*Z(1)/( D(1)*( D( 1 )-TAU ) ) +
$ TAU*Z(2)/( D(2)*( D( 2 )-TAU ) ) +
$ TAU*Z(3)/( D(3)*( D( 3 )-TAU ) )
IF( TEMP .LE. ZERO )THEN
LBD = TAU
ELSE
UBD = TAU
END IF
IF( ABS( FINIT ).LE.ABS( TEMP ) )
$ TAU = ZERO
END IF
END IF
*
* get machine parameters for possible scaling to avoid overflow
*
* modified by Sven: parameters SMALL1, SMINV1, SMALL2,
* SMINV2, EPS are not SAVEd anymore between one call to the
* others but recomputed at each call
*
EPS = DLAMCH( 'Epsilon' )
BASE = DLAMCH( 'Base' )
SMALL1 = BASE**( INT( LOG( DLAMCH( 'SafMin' ) ) / LOG( BASE ) /
$ THREE ) )
SMINV1 = ONE / SMALL1
SMALL2 = SMALL1*SMALL1
SMINV2 = SMINV1*SMINV1
*
* Determine if scaling of inputs necessary to avoid overflow
* when computing 1/TEMP**3
*
IF( ORGATI ) THEN
TEMP = MIN( ABS( D( 2 )-TAU ), ABS( D( 3 )-TAU ) )
ELSE
TEMP = MIN( ABS( D( 1 )-TAU ), ABS( D( 2 )-TAU ) )
END IF
SCALE = .FALSE.
IF( TEMP.LE.SMALL1 ) THEN
SCALE = .TRUE.
IF( TEMP.LE.SMALL2 ) THEN
*
* Scale up by power of radix nearest 1/SAFMIN**(2/3)
*
SCLFAC = SMINV2
SCLINV = SMALL2
ELSE
*
* Scale up by power of radix nearest 1/SAFMIN**(1/3)
*
SCLFAC = SMINV1
SCLINV = SMALL1
END IF
*
* Scaling up safe because D, Z, TAU scaled elsewhere to be O(1)
*
DO 10 I = 1, 3
DSCALE( I ) = D( I )*SCLFAC
ZSCALE( I ) = Z( I )*SCLFAC
10 CONTINUE
TAU = TAU*SCLFAC
LBD = LBD*SCLFAC
UBD = UBD*SCLFAC
ELSE
*
* Copy D and Z to DSCALE and ZSCALE
*
DO 20 I = 1, 3
DSCALE( I ) = D( I )
ZSCALE( I ) = Z( I )
20 CONTINUE
END IF
*
FC = ZERO
DF = ZERO
DDF = ZERO
DO 30 I = 1, 3
TEMP = ONE / ( DSCALE( I )-TAU )
TEMP1 = ZSCALE( I )*TEMP
TEMP2 = TEMP1*TEMP
TEMP3 = TEMP2*TEMP
FC = FC + TEMP1 / DSCALE( I )
DF = DF + TEMP2
DDF = DDF + TEMP3
30 CONTINUE
F = FINIT + TAU*FC
*
IF( ABS( F ).LE.ZERO )
$ GO TO 60
IF( F .LE. ZERO )THEN
LBD = TAU
ELSE
UBD = TAU
END IF
*
* Iteration begins -- Use Gragg-Thornton-Warner cubic convergent
* scheme
*
* It is not hard to see that
*
* 1) Iterations will go up monotonically
* if FINIT < 0;
*
* 2) Iterations will go down monotonically
* if FINIT > 0.
*
ITER = NITER + 1
*
DO 50 NITER = ITER, MAXIT
*
IF( ORGATI ) THEN
TEMP1 = DSCALE( 2 ) - TAU
TEMP2 = DSCALE( 3 ) - TAU
ELSE
TEMP1 = DSCALE( 1 ) - TAU
TEMP2 = DSCALE( 2 ) - TAU
END IF
A = ( TEMP1+TEMP2 )*F - TEMP1*TEMP2*DF
B = TEMP1*TEMP2*F
C = F - ( TEMP1+TEMP2 )*DF + TEMP1*TEMP2*DDF
TEMP = MAX( ABS( A ), ABS( B ), ABS( C ) )
A = A / TEMP
B = B / TEMP
C = C / TEMP
IF( C.EQ.ZERO ) THEN
ETA = B / A
ELSE IF( A.LE.ZERO ) THEN
ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
ELSE
ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
END IF
IF( F*ETA.GE.ZERO ) THEN
ETA = -F / DF
END IF
*
TAU = TAU + ETA
IF( TAU .LT. LBD .OR. TAU .GT. UBD )
$ TAU = ( LBD + UBD )/TWO
*
FC = ZERO
ERRETM = ZERO
DF = ZERO
DDF = ZERO
DO 40 I = 1, 3
IF ( ( DSCALE( I )-TAU ).NE.ZERO ) THEN
TEMP = ONE / ( DSCALE( I )-TAU )
TEMP1 = ZSCALE( I )*TEMP
TEMP2 = TEMP1*TEMP
TEMP3 = TEMP2*TEMP
TEMP4 = TEMP1 / DSCALE( I )
FC = FC + TEMP4
ERRETM = ERRETM + ABS( TEMP4 )
DF = DF + TEMP2
DDF = DDF + TEMP3
ELSE
GO TO 60
END IF
40 CONTINUE
F = FINIT + TAU*FC
ERRETM = EIGHT*( ABS( FINIT )+ABS( TAU )*ERRETM ) +
$ ABS( TAU )*DF
IF( ( ABS( F ).LE.FOUR*EPS*ERRETM ) .OR.
$ ( (UBD-LBD).LE.FOUR*EPS*ABS(TAU) ) )
$ GO TO 60
IF( F .LE. ZERO )THEN
LBD = TAU
ELSE
UBD = TAU
END IF
50 CONTINUE
INFO = 1
60 CONTINUE
*
* Undo scaling
*
IF( SCALE )
$ TAU = TAU*SCLINV
RETURN
*
* End of DLAED6
*
END

131
lib/linalg/dlaed7.cpp Normal file
View File

@ -0,0 +1,131 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__2 = 2;
static integer c__1 = 1;
static doublereal c_b10 = 1.;
static doublereal c_b11 = 0.;
static integer c_n1 = -1;
int dlaed7_(integer *icompq, integer *n, integer *qsiz, integer *tlvls, integer *curlvl,
integer *curpbm, doublereal *d__, doublereal *q, integer *ldq, integer *indxq,
doublereal *rho, integer *cutpnt, doublereal *qstore, integer *qptr, integer *prmptr,
integer *perm, integer *givptr, integer *givcol, doublereal *givnum, doublereal *work,
integer *iwork, integer *info)
{
integer q_dim1, q_offset, i__1, i__2;
integer pow_lmp_ii(integer *, integer *);
integer i__, k, n1, n2, is, iw, iz, iq2, ptr, ldq2, indx, curr;
extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
ftnlen, ftnlen);
integer indxc, indxp;
extern int dlaed8_(integer *, integer *, integer *, integer *, doublereal *, doublereal *,
integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
doublereal *, integer *, doublereal *, integer *, integer *, integer *,
doublereal *, integer *, integer *, integer *),
dlaed9_(integer *, integer *, integer *, integer *, doublereal *, doublereal *, integer *,
doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *),
dlaeda_(integer *, integer *, integer *, integer *, integer *, integer *, integer *,
integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *,
integer *);
integer idlmda;
extern int dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *),
xerbla_(char *, integer *, ftnlen);
integer coltyp;
--d__;
q_dim1 = *ldq;
q_offset = 1 + q_dim1;
q -= q_offset;
--indxq;
--qstore;
--qptr;
--prmptr;
--perm;
--givptr;
givcol -= 3;
givnum -= 3;
--work;
--iwork;
*info = 0;
if (*icompq < 0 || *icompq > 1) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*icompq == 1 && *qsiz < *n) {
*info = -3;
} else if (*ldq < max(1, *n)) {
*info = -9;
} else if (min(1, *n) > *cutpnt || *n < *cutpnt) {
*info = -12;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DLAED7", &i__1, (ftnlen)6);
return 0;
}
if (*n == 0) {
return 0;
}
if (*icompq == 1) {
ldq2 = *qsiz;
} else {
ldq2 = *n;
}
iz = 1;
idlmda = iz + *n;
iw = idlmda + *n;
iq2 = iw + *n;
is = iq2 + *n * ldq2;
indx = 1;
indxc = indx + *n;
coltyp = indxc + *n;
indxp = coltyp + *n;
ptr = pow_lmp_ii(&c__2, tlvls) + 1;
i__1 = *curlvl - 1;
for (i__ = 1; i__ <= i__1; ++i__) {
i__2 = *tlvls - i__;
ptr += pow_lmp_ii(&c__2, &i__2);
}
curr = ptr + *curpbm;
dlaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], &givcol[3], &givnum[3],
&qstore[1], &qptr[1], &work[iz], &work[iz + *n], info);
if (*curlvl == *tlvls) {
qptr[curr] = 1;
prmptr[curr] = 1;
givptr[curr] = 1;
}
dlaed8_(icompq, &k, n, qsiz, &d__[1], &q[q_offset], ldq, &indxq[1], rho, cutpnt, &work[iz],
&work[idlmda], &work[iq2], &ldq2, &work[iw], &perm[prmptr[curr]], &givptr[curr + 1],
&givcol[(givptr[curr] << 1) + 1], &givnum[(givptr[curr] << 1) + 1], &iwork[indxp],
&iwork[indx], info);
prmptr[curr + 1] = prmptr[curr] + *n;
givptr[curr + 1] += givptr[curr];
if (k != 0) {
dlaed9_(&k, &c__1, &k, n, &d__[1], &work[is], &k, rho, &work[idlmda], &work[iw],
&qstore[qptr[curr]], &k, info);
if (*info != 0) {
goto L30;
}
if (*icompq == 1) {
dgemm_((char *)"N", (char *)"N", qsiz, &k, &k, &c_b10, &work[iq2], &ldq2, &qstore[qptr[curr]], &k,
&c_b11, &q[q_offset], ldq, (ftnlen)1, (ftnlen)1);
}
i__1 = k;
qptr[curr + 1] = qptr[curr] + i__1 * i__1;
n1 = k;
n2 = *n - k;
dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
} else {
qptr[curr + 1] = qptr[curr];
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
indxq[i__] = i__;
}
}
L30:
return 0;
}
#ifdef __cplusplus
}
#endif

Some files were not shown because too many files have changed in this diff Show More