Merge pull request #3579 from akohlmey/linalg-in-cpp

Convert linalg library from Fortran to C++
This commit is contained in:
Axel Kohlmeyer
2023-01-05 21:16:06 -05:00
committed by GitHub
431 changed files with 31249 additions and 63712 deletions

View File

@ -440,16 +440,12 @@ if(PKG_MSCG OR PKG_ATC OR PKG_AWPMD OR PKG_ML-QUIP OR PKG_ML-POD OR PKG_LATTE OR
find_package(BLAS)
endif()
if(NOT LAPACK_FOUND OR NOT BLAS_FOUND OR USE_INTERNAL_LINALG)
include(CheckGeneratorSupport)
if(NOT CMAKE_GENERATOR_SUPPORT_FORTRAN)
status(FATAL_ERROR "Cannot build internal linear algebra library as CMake build tool lacks Fortran support")
endif()
enable_language(Fortran)
file(GLOB LAPACK_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/linalg/[^.]*.[fF])
add_library(linalg STATIC ${LAPACK_SOURCES})
file(GLOB LINALG_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/linalg/[^.]*.cpp)
add_library(linalg STATIC ${LINALG_SOURCES})
set_target_properties(linalg PROPERTIES OUTPUT_NAME lammps_linalg${LAMMPS_MACHINE})
set(BLAS_LIBRARIES "$<TARGET_FILE:linalg>")
set(LAPACK_LIBRARIES "$<TARGET_FILE:linalg>")
target_link_libraries(lammps PRIVATE linalg)
else()
list(APPEND LAPACK_LIBRARIES ${BLAS_LIBRARIES})
endif()

View File

@ -72,7 +72,7 @@
"configurationType": "Debug",
"buildRoot": "${workspaceRoot}\\build\\${name}",
"installRoot": "${workspaceRoot}\\install\\${name}",
"cmakeCommandArgs": "-C ${workspaceRoot}\\cmake\\presets\\windows.cmake -DCMAKE_C_COMPILER=clang-cl.exe -DCMAKE_CXX_COMPILER=clang-cl.exe",
"cmakeCommandArgs": "-C ${workspaceRoot}\\cmake\\presets\\windows.cmake -DCMAKE_C_COMPILER=clang-cl.exe -DCMAKE_CXX_COMPILER=clang-cl.exe -DBUILD_MPI=off",
"buildCommandArgs": "",
"ctestCommandArgs": "",
"inheritEnvironments": [ "clang_cl_x64" ],
@ -105,7 +105,7 @@
"configurationType": "Release",
"buildRoot": "${workspaceRoot}\\build\\${name}",
"installRoot": "${workspaceRoot}\\install\\${name}",
"cmakeCommandArgs": "-C ${workspaceRoot}\\cmake\\presets\\windows.cmake -DCMAKE_C_COMPILER=clang-cl.exe -DCMAKE_CXX_COMPILER=clang-cl.exe",
"cmakeCommandArgs": "-C ${workspaceRoot}\\cmake\\presets\\windows.cmake -DCMAKE_C_COMPILER=clang-cl.exe -DCMAKE_CXX_COMPILER=clang-cl.exe -DBUILD_MPI=off",
"buildCommandArgs": "",
"ctestCommandArgs": "-V",
"inheritEnvironments": [ "clang_cl_x64" ],

View File

@ -1,6 +1,7 @@
set(WIN_PACKAGES
AMOEBA
ASPHERE
AWPMD
BOCS
BODY
BPM
@ -20,6 +21,7 @@ set(WIN_PACKAGES
DPD-SMOOTH
DRUDE
EFF
ELECTRODE
EXTRA-COMPUTE
EXTRA-DUMP
EXTRA-FIX
@ -35,6 +37,7 @@ set(WIN_PACKAGES
MEAM
MISC
ML-IAP
ML-POD
ML-SNAP
MOFFF
MOLECULE

View File

@ -1209,9 +1209,9 @@ The ATC package requires the MANYBODY package also be installed.
.. code-block:: bash
$ make lib-linalg # print help message
$ make lib-linalg args="-m serial" # build with GNU Fortran compiler (settings as with "make serial")
$ make lib-linalg args="-m mpi" # build with default MPI Fortran compiler (settings as with "make mpi")
$ make lib-linalg args="-m gfortran" # build with GNU Fortran compiler
$ make lib-linalg args="-m serial" # build with GNU C++ compiler (settings as with "make serial")
$ make lib-linalg args="-m mpi" # build with default MPI C++ compiler (settings as with "make mpi")
$ make lib-linalg args="-m g++" # build with GNU C++ compiler
----------
@ -1260,9 +1260,9 @@ AWPMD package
.. code-block:: bash
$ make lib-linalg # print help message
$ make lib-linalg args="-m serial" # build with GNU Fortran compiler (settings as with "make serial")
$ make lib-linalg args="-m mpi" # build with default MPI Fortran compiler (settings as with "make mpi")
$ make lib-linalg args="-m gfortran" # build with GNU Fortran compiler
$ make lib-linalg args="-m serial" # build with GNU C++ compiler (settings as with "make serial")
$ make lib-linalg args="-m mpi" # build with default MPI C++ compiler (settings as with "make mpi")
$ make lib-linalg args="-m g++" # build with GNU C++ compiler
----------
@ -1364,9 +1364,9 @@ This package depends on the KSPACE package.
.. code-block:: bash
$ make lib-linalg # print help message
$ make lib-linalg args="-m serial" # build with GNU Fortran compiler (settings as with "make serial")
$ make lib-linalg args="-m mpi" # build with default MPI Fortran compiler (settings as with "make mpi")
$ make lib-linalg args="-m gfortran" # build with GNU Fortran compiler
$ make lib-linalg args="-m serial" # build with GNU C++ compiler (settings as with "make serial")
$ make lib-linalg args="-m mpi" # build with default MPI C++ compiler (settings as with "make mpi")
$ make lib-linalg args="-m g++" # build with GNU C++ compiler
The package itself is activated with ``make yes-KSPACE`` and
``make yes-ELECTRODE``
@ -1448,9 +1448,9 @@ ML-POD package
.. code-block:: bash
$ make lib-linalg # print help message
$ make lib-linalg args="-m serial" # build with GNU Fortran compiler (settings as with "make serial")
$ make lib-linalg args="-m mpi" # build with default MPI Fortran compiler (settings as with "make mpi")
$ make lib-linalg args="-m gfortran" # build with GNU Fortran compiler
$ make lib-linalg args="-m serial" # build with GNU C++ compiler (settings as with "make serial")
$ make lib-linalg args="-m mpi" # build with default MPI C++ compiler (settings as with "make mpi")
$ make lib-linalg args="-m g++" # build with GNU C++ compiler
The package itself is activated with ``make yes-ML-POD``.

View File

@ -1,6 +1,6 @@
# Settings that the LAMMPS build will import when this package library is used
atc_SYSINC =
atc_SYSLIB = -llinalg -lgfortran
atc_SYSLIB = -llinalg
atc_SYSPATH = -L../../lib/linalg$(LIBOBJDIR)

View File

@ -1,5 +1,5 @@
# Settings that the LAMMPS build will import when this package library is used
awpmd_SYSINC =
awpmd_SYSLIB = -llinalg -lgfortran
awpmd_SYSLIB = -llinalg
awpmd_SYSPATH = -L../../lib/linalg$(LIBOBJDIR)

View File

@ -1,19 +0,0 @@
# ifndef ERF_H
# define ERF_H
# ifdef _WIN32
# ifdef __cplusplus
extern "C" {
# endif
double erf(double x);
double erfc(double x);
# ifdef __cplusplus
}
# endif
# endif
# endif

View File

@ -1,53 +1,36 @@
// Interface for LAPACK function
# ifndef LAPACK_INTER_H
# define LAPACK_INTER_H
#ifndef LAPACK_INTER_H
#define LAPACK_INTER_H
#include <complex>
typedef int lapack_int;
typedef complex<float> lapack_complex_float;
typedef complex<double> lapack_complex_double;
#if defined(_WIN32) && !defined(__MINGW32__)
#define DGETRF dgetrf_
#define DGETRS dgetrs_
#define DGETRI dgetri_
#define ZPPTRF zpptrf_
#define ZPPTRI zpptri_
//#define MKL_Complex8 lapack_complex_float
//#define MKL_Complex16 lapack_complex_double
#include "mkl.h"
inline void ZPPTRF( char* uplo, const lapack_int* n, lapack_complex_double* ap, lapack_int* info ) {
ZPPTRF(uplo, (int*)n, (MKL_Complex16*)ap, (int*)info);
}
inline void ZPPTRI( char* uplo, const lapack_int* n, lapack_complex_double* ap, lapack_int* info ){
ZPPTRI(uplo, (int*)n, (MKL_Complex16*)ap, (int*)info);
}
#else
#define DGETRF dgetrf_
#define DGETRS dgetrs_
#define DGETRI dgetri_
#define ZPPTRF zpptrf_
#define ZPPTRI zpptri_
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
void dgetrf_( const lapack_int* m, const lapack_int* n, double* a, const lapack_int* lda,
lapack_int* ipiv, lapack_int* info );
void dgetrs_( const char* trans, const lapack_int* n, const lapack_int* nrhs,
const double* a, const lapack_int* lda, const lapack_int* ipiv,
double* b, const lapack_int* ldb, lapack_int* info );
void dgetri_( const lapack_int* n, double* a, const lapack_int* lda,
const lapack_int* ipiv, double* work, const lapack_int* lwork,
lapack_int* info );
void zpptrf_( const char* uplo, const lapack_int* n, lapack_complex_double* ap,
lapack_int* info );
void zpptri_( const char* uplo, const lapack_int* n, lapack_complex_double* ap,
lapack_int* info );
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
void dgetrf_(const lapack_int *m, const lapack_int *n, double *a,
const lapack_int *lda, lapack_int *ipiv, lapack_int *info);
void dgetrs_(const char *trans, const lapack_int *n, const lapack_int *nrhs,
const double *a, const lapack_int *lda, const lapack_int *ipiv,
double *b, const lapack_int *ldb, lapack_int *info);
void dgetri_(const lapack_int *n, double *a, const lapack_int *lda,
const lapack_int *ipiv, double *work, const lapack_int *lwork,
lapack_int *info);
void zpptrf_(const char *uplo, const lapack_int *n, lapack_complex_double *ap,
lapack_int *info);
void zpptri_(const char *uplo, const lapack_int *n, lapack_complex_double *ap,
lapack_int *info);
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif /* lapack_intER_H */

View File

@ -149,10 +149,8 @@
# include "pairhash.h"
# include "TCP/tcpdefs.h"
# include "wavepacket.h"
# include "erf.h"
# include "cerf.h"
using namespace std;
# include "lapack_inter.h"

View File

@ -1,6 +1,4 @@
# include "wpmd_split.h"
//# include "erf.h"
void AWPMD_split::resize(int flag){
for(int s=0;s<2;s++){

View File

@ -14,7 +14,7 @@
#include <algorithm>
// used to set the absolute path of a replica file
#if defined(WIN32) && !defined(__CYGWIN__)
#if defined(_WIN32) && !defined(__CYGWIN__)
#include <direct.h>
#define CHDIR ::_chdir
#define GETCWD ::_getcwd

View File

@ -8,10 +8,10 @@
// Colvars repository at GitHub.
// Using access() to check if a file exists (until we can assume C++14/17)
#if !defined(WIN32) || defined(__CYGWIN__)
#if !defined(_WIN32) || defined(__CYGWIN__)
#include <unistd.h>
#endif
#if defined(WIN32)
#if defined(_WIN32)
#include <io.h>
#endif
@ -678,7 +678,7 @@ int colvarproxy_io::backup_file(char const *filename)
// Simplified version of NAMD_file_exists()
int exit_code;
do {
#if defined(WIN32) && !defined(__CYGWIN__)
#if defined(_WIN32) && !defined(__CYGWIN__)
// We could use _access_s here, but it is probably too new
exit_code = _access(filename, 00);
#else
@ -708,7 +708,7 @@ int colvarproxy_io::backup_file(char const *filename)
int colvarproxy_io::remove_file(char const *filename)
{
int error_code = COLVARS_OK;
#if defined(WIN32) && !defined(__CYGWIN__)
#if defined(_WIN32) && !defined(__CYGWIN__)
// Because the file may be open by other processes, rename it to filename.old
std::string const renamed_file(std::string(filename)+".old");
// It may still be there from an interrupted run, so remove it to be safe
@ -741,7 +741,7 @@ int colvarproxy_io::remove_file(char const *filename)
int colvarproxy_io::rename_file(char const *filename, char const *newfilename)
{
int error_code = COLVARS_OK;
#if defined(WIN32) && !defined(__CYGWIN__)
#if defined(_WIN32) && !defined(__CYGWIN__)
// On straight Windows, must remove the destination before renaming it
error_code |= remove_file(newfilename);
#endif

View File

@ -1,5 +1,5 @@
# Settings that the LAMMPS build will import when this package library is used
electrode_SYSINC =
electrode_SYSLIB = -llinalg -lgfortran
electrode_SYSLIB = -llinalg
electrode_SYSPATH = -L../../lib/linalg$(LIBOBJDIR)

View File

@ -6,21 +6,17 @@ SHELL = /bin/sh
# ------ FILES ------
SRC = $(wildcard *.f)
SRC1 = $(wildcard *.F)
FILES = $(SRC) $(SRC1) Makefile.* README
SRC = $(wildcard *.cpp)
# ------ DEFINITIONS ------
LIB = liblinalg.a
OBJ = $(SRC:.f=.o) $(SRC1:.F=.o)
OBJ = $(SRC:.cpp=.o)
# ------ SETTINGS ------
FC = gfortran
FFLAGS = -O3 -fPIC -ffast-math -fstrict-aliasing -fno-second-underscore
FFLAGS0 = -O0 -fPIC -fno-second-underscore
CXX = g++ -std=c++11
CCFLAGS = -O3 -fPIC -ffast-math -fstrict-aliasing
ARCHIVE = ar
AR = ar
ARCHFLAG = -rcs
@ -34,20 +30,11 @@ lib: $(OBJ)
# ------ COMPILE RULES ------
%.o:%.F
$(FC) $(FFLAGS) -c $<
%.o:%.f
$(FC) $(FFLAGS) -c $<
dlamch.o: dlamch.f
$(FC) $(FFLAGS0) -c $<
%.o:%.cpp
$(CC) $(CCFLAGS) -c $<
# ------ CLEAN ------
clean:
-rm -f *.o *.mod *~ $(LIB)
tar:
-tar -czvf ../linalg.tar.gz $(FILES)
-rm -f *.o *~ $(LIB)

View File

@ -6,21 +6,17 @@ SHELL = /bin/sh
# ------ FILES ------
SRC = $(wildcard *.f)
SRC1 = $(wildcard *.F)
FILES = $(SRC) $(SRC1) Makefile.* README
SRC = $(wildcard *.cpp)
# ------ DEFINITIONS ------
LIB = liblinalg.a
OBJ = $(SRC:.f=.o) $(SRC1:.F=.o)
OBJ = $(SRC:.cpp=.o)
# ------ SETTINGS ------
FC = mpifort
FFLAGS = -O3 -fPIC
FFLAGS0 = -O0 -fPIC
CC = mpicxx
CCFLAGS = -O3 -fPIC
ARCHIVE = ar
AR = ar
ARCHFLAG = -rcs
@ -34,20 +30,11 @@ lib: $(OBJ)
# ------ COMPILE RULES ------
%.o:%.F
$(FC) $(FFLAGS) -c $<
%.o:%.f
$(FC) $(FFLAGS) -c $<
dlamch.o: dlamch.f
$(FC) $(FFLAGS0) -c $<
%.o:%.cpp
$(CC) $(CCFLAGS) -c $<
# ------ CLEAN ------
clean:
-rm -f *.o *.mod *~ $(LIB)
tar:
-tar -czvf ../linalg.tar.gz $(FILES)
-rm -f *.o *~ $(LIB)

View File

@ -1 +1 @@
Makefile.gfortran
Makefile.g++

View File

@ -1,7 +1,13 @@
This directory has generic BLAS and LAPACK source files needed by the
ATC, AWPMD, ELECTRODE, LATTE, and ML-POD packages (and possibly by other
packages) in the future that can be used instead of platform or vendor
optimized BLAS/LAPACK library.
optimized BLAS/LAPACK library. To simplify installation, these files
have been translated from the Fortran versions of the BLAS and LAPACK
references source files at https://netlib.org/lapack/ to C++ with f2c.
The package with the tools to do the translation and the matching
original Fortran sources are at https://github.com/lammps/linalg.
Please note that even through the files are C++ source code the
resulting library will follow the Fortran binary conventions.
Note that this is an *incomplete* subset of full BLAS/LAPACK.
@ -20,7 +26,7 @@ can do it manually by following the instructions below.
Build the library using one of the provided Makefile.* files or create
your own, specific to your compiler and system. For example:
make -f Makefile.gfortran
make -f Makefile.g++
When you are done building this library, one file should exist in this
directory:

13
lib/linalg/d_lmp_cnjg.cpp Normal file
View File

@ -0,0 +1,13 @@
#include "lmp_f2c.h"
extern "C" {
void d_lmp_cnjg(doublecomplex *r, doublecomplex *z)
{
doublereal zi = z->i;
r->r = z->r;
r->i = -zi;
}
}

10
lib/linalg/d_lmp_imag.cpp Normal file
View File

@ -0,0 +1,10 @@
#include "lmp_f2c.h"
extern "C" {
double d_lmp_imag(doublecomplex *z)
{
return (z->i);
}
}

14
lib/linalg/d_lmp_lg10.cpp Normal file
View File

@ -0,0 +1,14 @@
#include "lmp_f2c.h"
#undef abs
static constexpr double log10e = 0.43429448190325182765;
#include <cmath>
extern "C" {
double d_lmp_lg10(doublereal *x)
{
return (log10e * log(*x));
}
}

12
lib/linalg/d_lmp_sign.cpp Normal file
View File

@ -0,0 +1,12 @@
#include "lmp_f2c.h"
extern "C" {
double d_lmp_sign(doublereal *a, doublereal *b)
{
double x;
x = (*a >= 0 ? *a : -*a);
return (*b >= 0 ? x : -x);
}
}

50
lib/linalg/dasum.cpp Normal file
View File

@ -0,0 +1,50 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
doublereal dasum_(integer *n, doublereal *dx, integer *incx)
{
integer i__1, i__2;
doublereal ret_val, d__1, d__2, d__3, d__4, d__5, d__6;
integer i__, m, mp1;
doublereal dtemp;
integer nincx;
--dx;
ret_val = 0.;
dtemp = 0.;
if (*n <= 0 || *incx <= 0) {
return ret_val;
}
if (*incx == 1) {
m = *n % 6;
if (m != 0) {
i__1 = m;
for (i__ = 1; i__ <= i__1; ++i__) {
dtemp += (d__1 = dx[i__], abs(d__1));
}
if (*n < 6) {
ret_val = dtemp;
return ret_val;
}
}
mp1 = m + 1;
i__1 = *n;
for (i__ = mp1; i__ <= i__1; i__ += 6) {
dtemp = dtemp + (d__1 = dx[i__], abs(d__1)) + (d__2 = dx[i__ + 1], abs(d__2)) +
(d__3 = dx[i__ + 2], abs(d__3)) + (d__4 = dx[i__ + 3], abs(d__4)) +
(d__5 = dx[i__ + 4], abs(d__5)) + (d__6 = dx[i__ + 5], abs(d__6));
}
} else {
nincx = *n * *incx;
i__1 = nincx;
i__2 = *incx;
for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
dtemp += (d__1 = dx[i__], abs(d__1));
}
}
ret_val = dtemp;
return ret_val;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,131 +0,0 @@
*> \brief \b DASUM
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* DOUBLE PRECISION FUNCTION DASUM(N,DX,INCX)
*
* .. Scalar Arguments ..
* INTEGER INCX,N
* ..
* .. Array Arguments ..
* DOUBLE PRECISION DX(*)
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DASUM takes the sum of the absolute values.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> number of elements in input vector(s)
*> \endverbatim
*>
*> \param[in] DX
*> \verbatim
*> DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
*> \endverbatim
*>
*> \param[in] INCX
*> \verbatim
*> INCX is INTEGER
*> storage spacing between elements of DX
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup double_blas_level1
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> jack dongarra, linpack, 3/11/78.
*> modified 3/93 to return if incx .le. 0.
*> modified 12/3/93, array(1) declarations changed to array(*)
*> \endverbatim
*>
* =====================================================================
DOUBLE PRECISION FUNCTION DASUM(N,DX,INCX)
*
* -- Reference BLAS level1 routine --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INCX,N
* ..
* .. Array Arguments ..
DOUBLE PRECISION DX(*)
* ..
*
* =====================================================================
*
* .. Local Scalars ..
DOUBLE PRECISION DTEMP
INTEGER I,M,MP1,NINCX
* ..
* .. Intrinsic Functions ..
INTRINSIC DABS,MOD
* ..
DASUM = 0.0d0
DTEMP = 0.0d0
IF (N.LE.0 .OR. INCX.LE.0) RETURN
IF (INCX.EQ.1) THEN
* code for increment equal to 1
*
*
* clean-up loop
*
M = MOD(N,6)
IF (M.NE.0) THEN
DO I = 1,M
DTEMP = DTEMP + DABS(DX(I))
END DO
IF (N.LT.6) THEN
DASUM = DTEMP
RETURN
END IF
END IF
MP1 = M + 1
DO I = MP1,N,6
DTEMP = DTEMP + DABS(DX(I)) + DABS(DX(I+1)) +
$ DABS(DX(I+2)) + DABS(DX(I+3)) +
$ DABS(DX(I+4)) + DABS(DX(I+5))
END DO
ELSE
*
* code for increment not equal to 1
*
NINCX = N*INCX
DO I = 1,NINCX,INCX
DTEMP = DTEMP + DABS(DX(I))
END DO
END IF
DASUM = DTEMP
RETURN
*
* End of DASUM
*
END

56
lib/linalg/daxpy.cpp Normal file
View File

@ -0,0 +1,56 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int daxpy_(integer *n, doublereal *da, doublereal *dx, integer *incx, doublereal *dy, integer *incy)
{
integer i__1;
integer i__, m, ix, iy, mp1;
--dy;
--dx;
if (*n <= 0) {
return 0;
}
if (*da == 0.) {
return 0;
}
if (*incx == 1 && *incy == 1) {
m = *n % 4;
if (m != 0) {
i__1 = m;
for (i__ = 1; i__ <= i__1; ++i__) {
dy[i__] += *da * dx[i__];
}
}
if (*n < 4) {
return 0;
}
mp1 = m + 1;
i__1 = *n;
for (i__ = mp1; i__ <= i__1; i__ += 4) {
dy[i__] += *da * dx[i__];
dy[i__ + 1] += *da * dx[i__ + 1];
dy[i__ + 2] += *da * dx[i__ + 2];
dy[i__ + 3] += *da * dx[i__ + 3];
}
} else {
ix = 1;
iy = 1;
if (*incx < 0) {
ix = (-(*n) + 1) * *incx + 1;
}
if (*incy < 0) {
iy = (-(*n) + 1) * *incy + 1;
}
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
dy[iy] += *da * dx[ix];
ix += *incx;
iy += *incy;
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,152 +0,0 @@
*> \brief \b DAXPY
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* SUBROUTINE DAXPY(N,DA,DX,INCX,DY,INCY)
*
* .. Scalar Arguments ..
* DOUBLE PRECISION DA
* INTEGER INCX,INCY,N
* ..
* .. Array Arguments ..
* DOUBLE PRECISION DX(*),DY(*)
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DAXPY constant times a vector plus a vector.
*> uses unrolled loops for increments equal to one.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> number of elements in input vector(s)
*> \endverbatim
*>
*> \param[in] DA
*> \verbatim
*> DA is DOUBLE PRECISION
*> On entry, DA specifies the scalar alpha.
*> \endverbatim
*>
*> \param[in] DX
*> \verbatim
*> DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
*> \endverbatim
*>
*> \param[in] INCX
*> \verbatim
*> INCX is INTEGER
*> storage spacing between elements of DX
*> \endverbatim
*>
*> \param[in,out] DY
*> \verbatim
*> DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
*> \endverbatim
*>
*> \param[in] INCY
*> \verbatim
*> INCY is INTEGER
*> storage spacing between elements of DY
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup double_blas_level1
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> jack dongarra, linpack, 3/11/78.
*> modified 12/3/93, array(1) declarations changed to array(*)
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DAXPY(N,DA,DX,INCX,DY,INCY)
*
* -- Reference BLAS level1 routine --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
DOUBLE PRECISION DA
INTEGER INCX,INCY,N
* ..
* .. Array Arguments ..
DOUBLE PRECISION DX(*),DY(*)
* ..
*
* =====================================================================
*
* .. Local Scalars ..
INTEGER I,IX,IY,M,MP1
* ..
* .. Intrinsic Functions ..
INTRINSIC MOD
* ..
IF (N.LE.0) RETURN
IF (DA.EQ.0.0d0) RETURN
IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
*
* code for both increments equal to 1
*
*
* clean-up loop
*
M = MOD(N,4)
IF (M.NE.0) THEN
DO I = 1,M
DY(I) = DY(I) + DA*DX(I)
END DO
END IF
IF (N.LT.4) RETURN
MP1 = M + 1
DO I = MP1,N,4
DY(I) = DY(I) + DA*DX(I)
DY(I+1) = DY(I+1) + DA*DX(I+1)
DY(I+2) = DY(I+2) + DA*DX(I+2)
DY(I+3) = DY(I+3) + DA*DX(I+3)
END DO
ELSE
*
* code for unequal increments or equal increments
* not equal to 1
*
IX = 1
IY = 1
IF (INCX.LT.0) IX = (-N+1)*INCX + 1
IF (INCY.LT.0) IY = (-N+1)*INCY + 1
DO I = 1,N
DY(IY) = DY(IY) + DA*DX(IX)
IX = IX + INCX
IY = IY + INCY
END DO
END IF
RETURN
*
* End of DAXPY
*
END

522
lib/linalg/dbdsqr.cpp Normal file
View File

@ -0,0 +1,522 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static doublereal c_b15 = -.125;
static integer c__1 = 1;
static doublereal c_b49 = 1.;
static doublereal c_b72 = -1.;
int dbdsqr_(char *uplo, integer *n, integer *ncvt, integer *nru, integer *ncc, doublereal *d__,
doublereal *e, doublereal *vt, integer *ldvt, doublereal *u, integer *ldu,
doublereal *c__, integer *ldc, doublereal *work, integer *info, ftnlen uplo_len)
{
integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2;
doublereal d__1, d__2, d__3, d__4;
double pow_lmp_dd(doublereal *, doublereal *), sqrt(doublereal), d_lmp_sign(doublereal *, doublereal *);
integer iterdivn;
doublereal f, g, h__;
integer i__, j, m;
doublereal r__;
integer maxitdivn;
doublereal cs;
integer ll;
doublereal sn, mu;
integer nm1, nm12, nm13, lll;
doublereal eps, sll, tol, abse;
integer idir;
doublereal abss;
integer oldm;
doublereal cosl;
integer isub, iter;
doublereal unfl, sinl, cosr, smin, smax, sinr;
extern int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *,
doublereal *),
dlas2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *),
dscal_(integer *, doublereal *, doublereal *, integer *);
extern logical lsame_(char *, char *, ftnlen, ftnlen);
doublereal oldcs;
extern int dlasr_(char *, char *, char *, integer *, integer *, doublereal *, doublereal *,
doublereal *, integer *, ftnlen, ftnlen, ftnlen);
integer oldll;
doublereal shift, sigmn, oldsn;
extern int dswap_(integer *, doublereal *, integer *, doublereal *, integer *);
doublereal sminl, sigmx;
logical lower;
extern int dlasq1_(integer *, doublereal *, doublereal *, doublereal *, integer *),
dlasv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *,
doublereal *, doublereal *, doublereal *);
extern doublereal dlamch_(char *, ftnlen);
extern int dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *),
xerbla_(char *, integer *, ftnlen);
doublereal sminoa, thresh;
logical rotate;
doublereal tolmul;
--d__;
--e;
vt_dim1 = *ldvt;
vt_offset = 1 + vt_dim1;
vt -= vt_offset;
u_dim1 = *ldu;
u_offset = 1 + u_dim1;
u -= u_offset;
c_dim1 = *ldc;
c_offset = 1 + c_dim1;
c__ -= c_offset;
--work;
*info = 0;
lower = lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1);
if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lower) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*ncvt < 0) {
*info = -3;
} else if (*nru < 0) {
*info = -4;
} else if (*ncc < 0) {
*info = -5;
} else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1, *n)) {
*info = -9;
} else if (*ldu < max(1, *nru)) {
*info = -11;
} else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1, *n)) {
*info = -13;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DBDSQR", &i__1, (ftnlen)6);
return 0;
}
if (*n == 0) {
return 0;
}
if (*n == 1) {
goto L160;
}
rotate = *ncvt > 0 || *nru > 0 || *ncc > 0;
if (!rotate) {
dlasq1_(n, &d__[1], &e[1], &work[1], info);
if (*info != 2) {
return 0;
}
*info = 0;
}
nm1 = *n - 1;
nm12 = nm1 + nm1;
nm13 = nm12 + nm1;
idir = 0;
eps = dlamch_((char *)"Epsilon", (ftnlen)7);
unfl = dlamch_((char *)"Safe minimum", (ftnlen)12);
if (lower) {
i__1 = *n - 1;
for (i__ = 1; i__ <= i__1; ++i__) {
dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
d__[i__] = r__;
e[i__] = sn * d__[i__ + 1];
d__[i__ + 1] = cs * d__[i__ + 1];
work[i__] = cs;
work[nm1 + i__] = sn;
}
if (*nru > 0) {
dlasr_((char *)"R", (char *)"V", (char *)"F", nru, n, &work[1], &work[*n], &u[u_offset], ldu, (ftnlen)1,
(ftnlen)1, (ftnlen)1);
}
if (*ncc > 0) {
dlasr_((char *)"L", (char *)"V", (char *)"F", n, ncc, &work[1], &work[*n], &c__[c_offset], ldc, (ftnlen)1,
(ftnlen)1, (ftnlen)1);
}
}
d__3 = 100., d__4 = pow_lmp_dd(&eps, &c_b15);
d__1 = 10., d__2 = min(d__3, d__4);
tolmul = max(d__1, d__2);
tol = tolmul * eps;
smax = 0.;
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
d__2 = smax, d__3 = (d__1 = d__[i__], abs(d__1));
smax = max(d__2, d__3);
}
i__1 = *n - 1;
for (i__ = 1; i__ <= i__1; ++i__) {
d__2 = smax, d__3 = (d__1 = e[i__], abs(d__1));
smax = max(d__2, d__3);
}
sminl = 0.;
if (tol >= 0.) {
sminoa = abs(d__[1]);
if (sminoa == 0.) {
goto L50;
}
mu = sminoa;
i__1 = *n;
for (i__ = 2; i__ <= i__1; ++i__) {
mu = (d__2 = d__[i__], abs(d__2)) * (mu / (mu + (d__1 = e[i__ - 1], abs(d__1))));
sminoa = min(sminoa, mu);
if (sminoa == 0.) {
goto L50;
}
}
L50:
sminoa /= sqrt((doublereal)(*n));
d__1 = tol * sminoa, d__2 = *n * (*n * unfl) * 6;
thresh = max(d__1, d__2);
} else {
d__1 = abs(tol) * smax, d__2 = *n * (*n * unfl) * 6;
thresh = max(d__1, d__2);
}
maxitdivn = *n * 6;
iterdivn = 0;
iter = -1;
oldll = -1;
oldm = -1;
m = *n;
L60:
if (m <= 1) {
goto L160;
}
if (iter >= *n) {
iter -= *n;
++iterdivn;
if (iterdivn >= maxitdivn) {
goto L200;
}
}
if (tol < 0. && (d__1 = d__[m], abs(d__1)) <= thresh) {
d__[m] = 0.;
}
smax = (d__1 = d__[m], abs(d__1));
smin = smax;
i__1 = m - 1;
for (lll = 1; lll <= i__1; ++lll) {
ll = m - lll;
abss = (d__1 = d__[ll], abs(d__1));
abse = (d__1 = e[ll], abs(d__1));
if (tol < 0. && abss <= thresh) {
d__[ll] = 0.;
}
if (abse <= thresh) {
goto L80;
}
smin = min(smin, abss);
d__1 = max(smax, abss);
smax = max(d__1, abse);
}
ll = 0;
goto L90;
L80:
e[ll] = 0.;
if (ll == m - 1) {
--m;
goto L60;
}
L90:
++ll;
if (ll == m - 1) {
dlasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr, &sinl, &cosl);
d__[m - 1] = sigmx;
e[m - 1] = 0.;
d__[m] = sigmn;
if (*ncvt > 0) {
drot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, &cosr, &sinr);
}
if (*nru > 0) {
drot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], &c__1, &cosl, &sinl);
}
if (*ncc > 0) {
drot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, &cosl, &sinl);
}
m += -2;
goto L60;
}
if (ll > oldm || m < oldll) {
if ((d__1 = d__[ll], abs(d__1)) >= (d__2 = d__[m], abs(d__2))) {
idir = 1;
} else {
idir = 2;
}
}
if (idir == 1) {
if ((d__2 = e[m - 1], abs(d__2)) <= abs(tol) * (d__1 = d__[m], abs(d__1)) ||
tol < 0. && (d__3 = e[m - 1], abs(d__3)) <= thresh) {
e[m - 1] = 0.;
goto L60;
}
if (tol >= 0.) {
mu = (d__1 = d__[ll], abs(d__1));
sminl = mu;
i__1 = m - 1;
for (lll = ll; lll <= i__1; ++lll) {
if ((d__1 = e[lll], abs(d__1)) <= tol * mu) {
e[lll] = 0.;
goto L60;
}
mu = (d__2 = d__[lll + 1], abs(d__2)) * (mu / (mu + (d__1 = e[lll], abs(d__1))));
sminl = min(sminl, mu);
}
}
} else {
if ((d__2 = e[ll], abs(d__2)) <= abs(tol) * (d__1 = d__[ll], abs(d__1)) ||
tol < 0. && (d__3 = e[ll], abs(d__3)) <= thresh) {
e[ll] = 0.;
goto L60;
}
if (tol >= 0.) {
mu = (d__1 = d__[m], abs(d__1));
sminl = mu;
i__1 = ll;
for (lll = m - 1; lll >= i__1; --lll) {
if ((d__1 = e[lll], abs(d__1)) <= tol * mu) {
e[lll] = 0.;
goto L60;
}
mu = (d__2 = d__[lll], abs(d__2)) * (mu / (mu + (d__1 = e[lll], abs(d__1))));
sminl = min(sminl, mu);
}
}
}
oldll = ll;
oldm = m;
d__1 = eps, d__2 = tol * .01;
if (tol >= 0. && *n * tol * (sminl / smax) <= max(d__1, d__2)) {
shift = 0.;
} else {
if (idir == 1) {
sll = (d__1 = d__[ll], abs(d__1));
dlas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__);
} else {
sll = (d__1 = d__[m], abs(d__1));
dlas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__);
}
if (sll > 0.) {
d__1 = shift / sll;
if (d__1 * d__1 < eps) {
shift = 0.;
}
}
}
iter = iter + m - ll;
if (shift == 0.) {
if (idir == 1) {
cs = 1.;
oldcs = 1.;
i__1 = m - 1;
for (i__ = ll; i__ <= i__1; ++i__) {
d__1 = d__[i__] * cs;
dlartg_(&d__1, &e[i__], &cs, &sn, &r__);
if (i__ > ll) {
e[i__ - 1] = oldsn * r__;
}
d__1 = oldcs * r__;
d__2 = d__[i__ + 1] * sn;
dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]);
work[i__ - ll + 1] = cs;
work[i__ - ll + 1 + nm1] = sn;
work[i__ - ll + 1 + nm12] = oldcs;
work[i__ - ll + 1 + nm13] = oldsn;
}
h__ = d__[m] * cs;
d__[m] = h__ * oldcs;
e[m - 1] = h__ * oldsn;
if (*ncvt > 0) {
i__1 = m - ll + 1;
dlasr_((char *)"L", (char *)"V", (char *)"F", &i__1, ncvt, &work[1], &work[*n], &vt[ll + vt_dim1], ldvt,
(ftnlen)1, (ftnlen)1, (ftnlen)1);
}
if (*nru > 0) {
i__1 = m - ll + 1;
dlasr_((char *)"R", (char *)"V", (char *)"F", nru, &i__1, &work[nm12 + 1], &work[nm13 + 1],
&u[ll * u_dim1 + 1], ldu, (ftnlen)1, (ftnlen)1, (ftnlen)1);
}
if (*ncc > 0) {
i__1 = m - ll + 1;
dlasr_((char *)"L", (char *)"V", (char *)"F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + 1],
&c__[ll + c_dim1], ldc, (ftnlen)1, (ftnlen)1, (ftnlen)1);
}
if ((d__1 = e[m - 1], abs(d__1)) <= thresh) {
e[m - 1] = 0.;
}
} else {
cs = 1.;
oldcs = 1.;
i__1 = ll + 1;
for (i__ = m; i__ >= i__1; --i__) {
d__1 = d__[i__] * cs;
dlartg_(&d__1, &e[i__ - 1], &cs, &sn, &r__);
if (i__ < m) {
e[i__] = oldsn * r__;
}
d__1 = oldcs * r__;
d__2 = d__[i__ - 1] * sn;
dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]);
work[i__ - ll] = cs;
work[i__ - ll + nm1] = -sn;
work[i__ - ll + nm12] = oldcs;
work[i__ - ll + nm13] = -oldsn;
}
h__ = d__[ll] * cs;
d__[ll] = h__ * oldcs;
e[ll] = h__ * oldsn;
if (*ncvt > 0) {
i__1 = m - ll + 1;
dlasr_((char *)"L", (char *)"V", (char *)"B", &i__1, ncvt, &work[nm12 + 1], &work[nm13 + 1],
&vt[ll + vt_dim1], ldvt, (ftnlen)1, (ftnlen)1, (ftnlen)1);
}
if (*nru > 0) {
i__1 = m - ll + 1;
dlasr_((char *)"R", (char *)"V", (char *)"B", nru, &i__1, &work[1], &work[*n], &u[ll * u_dim1 + 1], ldu,
(ftnlen)1, (ftnlen)1, (ftnlen)1);
}
if (*ncc > 0) {
i__1 = m - ll + 1;
dlasr_((char *)"L", (char *)"V", (char *)"B", &i__1, ncc, &work[1], &work[*n], &c__[ll + c_dim1], ldc,
(ftnlen)1, (ftnlen)1, (ftnlen)1);
}
if ((d__1 = e[ll], abs(d__1)) <= thresh) {
e[ll] = 0.;
}
}
} else {
if (idir == 1) {
f = ((d__1 = d__[ll], abs(d__1)) - shift) *
(d_lmp_sign(&c_b49, &d__[ll]) + shift / d__[ll]);
g = e[ll];
i__1 = m - 1;
for (i__ = ll; i__ <= i__1; ++i__) {
dlartg_(&f, &g, &cosr, &sinr, &r__);
if (i__ > ll) {
e[i__ - 1] = r__;
}
f = cosr * d__[i__] + sinr * e[i__];
e[i__] = cosr * e[i__] - sinr * d__[i__];
g = sinr * d__[i__ + 1];
d__[i__ + 1] = cosr * d__[i__ + 1];
dlartg_(&f, &g, &cosl, &sinl, &r__);
d__[i__] = r__;
f = cosl * e[i__] + sinl * d__[i__ + 1];
d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__];
if (i__ < m - 1) {
g = sinl * e[i__ + 1];
e[i__ + 1] = cosl * e[i__ + 1];
}
work[i__ - ll + 1] = cosr;
work[i__ - ll + 1 + nm1] = sinr;
work[i__ - ll + 1 + nm12] = cosl;
work[i__ - ll + 1 + nm13] = sinl;
}
e[m - 1] = f;
if (*ncvt > 0) {
i__1 = m - ll + 1;
dlasr_((char *)"L", (char *)"V", (char *)"F", &i__1, ncvt, &work[1], &work[*n], &vt[ll + vt_dim1], ldvt,
(ftnlen)1, (ftnlen)1, (ftnlen)1);
}
if (*nru > 0) {
i__1 = m - ll + 1;
dlasr_((char *)"R", (char *)"V", (char *)"F", nru, &i__1, &work[nm12 + 1], &work[nm13 + 1],
&u[ll * u_dim1 + 1], ldu, (ftnlen)1, (ftnlen)1, (ftnlen)1);
}
if (*ncc > 0) {
i__1 = m - ll + 1;
dlasr_((char *)"L", (char *)"V", (char *)"F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + 1],
&c__[ll + c_dim1], ldc, (ftnlen)1, (ftnlen)1, (ftnlen)1);
}
if ((d__1 = e[m - 1], abs(d__1)) <= thresh) {
e[m - 1] = 0.;
}
} else {
f = ((d__1 = d__[m], abs(d__1)) - shift) * (d_lmp_sign(&c_b49, &d__[m]) + shift / d__[m]);
g = e[m - 1];
i__1 = ll + 1;
for (i__ = m; i__ >= i__1; --i__) {
dlartg_(&f, &g, &cosr, &sinr, &r__);
if (i__ < m) {
e[i__] = r__;
}
f = cosr * d__[i__] + sinr * e[i__ - 1];
e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__];
g = sinr * d__[i__ - 1];
d__[i__ - 1] = cosr * d__[i__ - 1];
dlartg_(&f, &g, &cosl, &sinl, &r__);
d__[i__] = r__;
f = cosl * e[i__ - 1] + sinl * d__[i__ - 1];
d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1];
if (i__ > ll + 1) {
g = sinl * e[i__ - 2];
e[i__ - 2] = cosl * e[i__ - 2];
}
work[i__ - ll] = cosr;
work[i__ - ll + nm1] = -sinr;
work[i__ - ll + nm12] = cosl;
work[i__ - ll + nm13] = -sinl;
}
e[ll] = f;
if ((d__1 = e[ll], abs(d__1)) <= thresh) {
e[ll] = 0.;
}
if (*ncvt > 0) {
i__1 = m - ll + 1;
dlasr_((char *)"L", (char *)"V", (char *)"B", &i__1, ncvt, &work[nm12 + 1], &work[nm13 + 1],
&vt[ll + vt_dim1], ldvt, (ftnlen)1, (ftnlen)1, (ftnlen)1);
}
if (*nru > 0) {
i__1 = m - ll + 1;
dlasr_((char *)"R", (char *)"V", (char *)"B", nru, &i__1, &work[1], &work[*n], &u[ll * u_dim1 + 1], ldu,
(ftnlen)1, (ftnlen)1, (ftnlen)1);
}
if (*ncc > 0) {
i__1 = m - ll + 1;
dlasr_((char *)"L", (char *)"V", (char *)"B", &i__1, ncc, &work[1], &work[*n], &c__[ll + c_dim1], ldc,
(ftnlen)1, (ftnlen)1, (ftnlen)1);
}
}
}
goto L60;
L160:
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
if (d__[i__] < 0.) {
d__[i__] = -d__[i__];
if (*ncvt > 0) {
dscal_(ncvt, &c_b72, &vt[i__ + vt_dim1], ldvt);
}
}
}
i__1 = *n - 1;
for (i__ = 1; i__ <= i__1; ++i__) {
isub = 1;
smin = d__[1];
i__2 = *n + 1 - i__;
for (j = 2; j <= i__2; ++j) {
if (d__[j] <= smin) {
isub = j;
smin = d__[j];
}
}
if (isub != *n + 1 - i__) {
d__[isub] = d__[*n + 1 - i__];
d__[*n + 1 - i__] = smin;
if (*ncvt > 0) {
dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ + vt_dim1], ldvt);
}
if (*nru > 0) {
dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) * u_dim1 + 1], &c__1);
}
if (*ncc > 0) {
dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ + c_dim1], ldc);
}
}
}
goto L220;
L200:
*info = 0;
i__1 = *n - 1;
for (i__ = 1; i__ <= i__1; ++i__) {
if (e[i__] != 0.) {
++(*info);
}
}
L220:
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,864 +0,0 @@
*> \brief \b DBDSQR
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DBDSQR + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dbdsqr.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dbdsqr.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dbdsqr.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U,
* LDU, C, LDC, WORK, INFO )
*
* .. Scalar Arguments ..
* CHARACTER UPLO
* INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU
* ..
* .. Array Arguments ..
* DOUBLE PRECISION C( LDC, * ), D( * ), E( * ), U( LDU, * ),
* $ VT( LDVT, * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DBDSQR computes the singular values and, optionally, the right and/or
*> left singular vectors from the singular value decomposition (SVD) of
*> a real N-by-N (upper or lower) bidiagonal matrix B using the implicit
*> zero-shift QR algorithm. The SVD of B has the form
*>
*> B = Q * S * P**T
*>
*> where S is the diagonal matrix of singular values, Q is an orthogonal
*> matrix of left singular vectors, and P is an orthogonal matrix of
*> right singular vectors. If left singular vectors are requested, this
*> subroutine actually returns U*Q instead of Q, and, if right singular
*> vectors are requested, this subroutine returns P**T*VT instead of
*> P**T, for given real input matrices U and VT. When U and VT are the
*> orthogonal matrices that reduce a general matrix A to bidiagonal
*> form: A = U*B*VT, as computed by DGEBRD, then
*>
*> A = (U*Q) * S * (P**T*VT)
*>
*> is the SVD of A. Optionally, the subroutine may also compute Q**T*C
*> for a given real input matrix C.
*>
*> See "Computing Small Singular Values of Bidiagonal Matrices With
*> Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
*> LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11,
*> no. 5, pp. 873-912, Sept 1990) and
*> "Accurate singular values and differential qd algorithms," by
*> B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics
*> Department, University of California at Berkeley, July 1992
*> for a detailed description of the algorithm.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] UPLO
*> \verbatim
*> UPLO is CHARACTER*1
*> = 'U': B is upper bidiagonal;
*> = 'L': B is lower bidiagonal.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The order of the matrix B. N >= 0.
*> \endverbatim
*>
*> \param[in] NCVT
*> \verbatim
*> NCVT is INTEGER
*> The number of columns of the matrix VT. NCVT >= 0.
*> \endverbatim
*>
*> \param[in] NRU
*> \verbatim
*> NRU is INTEGER
*> The number of rows of the matrix U. NRU >= 0.
*> \endverbatim
*>
*> \param[in] NCC
*> \verbatim
*> NCC is INTEGER
*> The number of columns of the matrix C. NCC >= 0.
*> \endverbatim
*>
*> \param[in,out] D
*> \verbatim
*> D is DOUBLE PRECISION array, dimension (N)
*> On entry, the n diagonal elements of the bidiagonal matrix B.
*> On exit, if INFO=0, the singular values of B in decreasing
*> order.
*> \endverbatim
*>
*> \param[in,out] E
*> \verbatim
*> E is DOUBLE PRECISION array, dimension (N-1)
*> On entry, the N-1 offdiagonal elements of the bidiagonal
*> matrix B.
*> On exit, if INFO = 0, E is destroyed; if INFO > 0, D and E
*> will contain the diagonal and superdiagonal elements of a
*> bidiagonal matrix orthogonally equivalent to the one given
*> as input.
*> \endverbatim
*>
*> \param[in,out] VT
*> \verbatim
*> VT is DOUBLE PRECISION array, dimension (LDVT, NCVT)
*> On entry, an N-by-NCVT matrix VT.
*> On exit, VT is overwritten by P**T * VT.
*> Not referenced if NCVT = 0.
*> \endverbatim
*>
*> \param[in] LDVT
*> \verbatim
*> LDVT is INTEGER
*> The leading dimension of the array VT.
*> LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0.
*> \endverbatim
*>
*> \param[in,out] U
*> \verbatim
*> U is DOUBLE PRECISION array, dimension (LDU, N)
*> On entry, an NRU-by-N matrix U.
*> On exit, U is overwritten by U * Q.
*> Not referenced if NRU = 0.
*> \endverbatim
*>
*> \param[in] LDU
*> \verbatim
*> LDU is INTEGER
*> The leading dimension of the array U. LDU >= max(1,NRU).
*> \endverbatim
*>
*> \param[in,out] C
*> \verbatim
*> C is DOUBLE PRECISION array, dimension (LDC, NCC)
*> On entry, an N-by-NCC matrix C.
*> On exit, C is overwritten by Q**T * C.
*> Not referenced if NCC = 0.
*> \endverbatim
*>
*> \param[in] LDC
*> \verbatim
*> LDC is INTEGER
*> The leading dimension of the array C.
*> LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0.
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (4*(N-1))
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: If INFO = -i, the i-th argument had an illegal value
*> > 0:
*> if NCVT = NRU = NCC = 0,
*> = 1, a split was marked by a positive value in E
*> = 2, current block of Z not diagonalized after 30*N
*> iterations (in inner while loop)
*> = 3, termination criterion of outer while loop not met
*> (program created more than N unreduced blocks)
*> else NCVT = NRU = NCC = 0,
*> the algorithm did not converge; D and E contain the
*> elements of a bidiagonal matrix which is orthogonally
*> similar to the input matrix B; if INFO = i, i
*> elements of E have not converged to zero.
*> \endverbatim
*
*> \par Internal Parameters:
* =========================
*>
*> \verbatim
*> TOLMUL DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8)))
*> TOLMUL controls the convergence criterion of the QR loop.
*> If it is positive, TOLMUL*EPS is the desired relative
*> precision in the computed singular values.
*> If it is negative, abs(TOLMUL*EPS*sigma_max) is the
*> desired absolute accuracy in the computed singular
*> values (corresponds to relative accuracy
*> abs(TOLMUL*EPS) in the largest singular value.
*> abs(TOLMUL) should be between 1 and 1/EPS, and preferably
*> between 10 (for fast convergence) and .1/EPS
*> (for there to be some accuracy in the results).
*> Default is to lose at either one eighth or 2 of the
*> available decimal digits in each computed singular value
*> (whichever is smaller).
*>
*> MAXITR INTEGER, default = 6
*> MAXITR controls the maximum number of passes of the
*> algorithm through its inner loop. The algorithms stops
*> (and so fails to converge) if the number of passes
*> through the inner loop exceeds MAXITR*N**2.
*>
*> \endverbatim
*
*> \par Note:
* ===========
*>
*> \verbatim
*> Bug report from Cezary Dendek.
*> On March 23rd 2017, the INTEGER variable MAXIT = MAXITR*N**2 is
*> removed since it can overflow pretty easily (for N larger or equal
*> than 18,919). We instead use MAXITDIVN = MAXITR*N.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup auxOTHERcomputational
*
* =====================================================================
SUBROUTINE DBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U,
$ LDU, C, LDC, WORK, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
CHARACTER UPLO
INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU
* ..
* .. Array Arguments ..
DOUBLE PRECISION C( LDC, * ), D( * ), E( * ), U( LDU, * ),
$ VT( LDVT, * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO
PARAMETER ( ZERO = 0.0D0 )
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D0 )
DOUBLE PRECISION NEGONE
PARAMETER ( NEGONE = -1.0D0 )
DOUBLE PRECISION HNDRTH
PARAMETER ( HNDRTH = 0.01D0 )
DOUBLE PRECISION TEN
PARAMETER ( TEN = 10.0D0 )
DOUBLE PRECISION HNDRD
PARAMETER ( HNDRD = 100.0D0 )
DOUBLE PRECISION MEIGTH
PARAMETER ( MEIGTH = -0.125D0 )
INTEGER MAXITR
PARAMETER ( MAXITR = 6 )
* ..
* .. Local Scalars ..
LOGICAL LOWER, ROTATE
INTEGER I, IDIR, ISUB, ITER, ITERDIVN, J, LL, LLL, M,
$ MAXITDIVN, NM1, NM12, NM13, OLDLL, OLDM
DOUBLE PRECISION ABSE, ABSS, COSL, COSR, CS, EPS, F, G, H, MU,
$ OLDCS, OLDSN, R, SHIFT, SIGMN, SIGMX, SINL,
$ SINR, SLL, SMAX, SMIN, SMINL, SMINOA,
$ SN, THRESH, TOL, TOLMUL, UNFL
* ..
* .. External Functions ..
LOGICAL LSAME
DOUBLE PRECISION DLAMCH
EXTERNAL LSAME, DLAMCH
* ..
* .. External Subroutines ..
EXTERNAL DLARTG, DLAS2, DLASQ1, DLASR, DLASV2, DROT,
$ DSCAL, DSWAP, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, DBLE, MAX, MIN, SIGN, SQRT
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
LOWER = LSAME( UPLO, 'L' )
IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LOWER ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( NCVT.LT.0 ) THEN
INFO = -3
ELSE IF( NRU.LT.0 ) THEN
INFO = -4
ELSE IF( NCC.LT.0 ) THEN
INFO = -5
ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR.
$ ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN
INFO = -9
ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN
INFO = -11
ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR.
$ ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN
INFO = -13
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DBDSQR', -INFO )
RETURN
END IF
IF( N.EQ.0 )
$ RETURN
IF( N.EQ.1 )
$ GO TO 160
*
* ROTATE is true if any singular vectors desired, false otherwise
*
ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 )
*
* If no singular vectors desired, use qd algorithm
*
IF( .NOT.ROTATE ) THEN
CALL DLASQ1( N, D, E, WORK, INFO )
*
* If INFO equals 2, dqds didn't finish, try to finish
*
IF( INFO .NE. 2 ) RETURN
INFO = 0
END IF
*
NM1 = N - 1
NM12 = NM1 + NM1
NM13 = NM12 + NM1
IDIR = 0
*
* Get machine constants
*
EPS = DLAMCH( 'Epsilon' )
UNFL = DLAMCH( 'Safe minimum' )
*
* If matrix lower bidiagonal, rotate to be upper bidiagonal
* by applying Givens rotations on the left
*
IF( LOWER ) THEN
DO 10 I = 1, N - 1
CALL DLARTG( D( I ), E( I ), CS, SN, R )
D( I ) = R
E( I ) = SN*D( I+1 )
D( I+1 ) = CS*D( I+1 )
WORK( I ) = CS
WORK( NM1+I ) = SN
10 CONTINUE
*
* Update singular vectors if desired
*
IF( NRU.GT.0 )
$ CALL DLASR( 'R', 'V', 'F', NRU, N, WORK( 1 ), WORK( N ), U,
$ LDU )
IF( NCC.GT.0 )
$ CALL DLASR( 'L', 'V', 'F', N, NCC, WORK( 1 ), WORK( N ), C,
$ LDC )
END IF
*
* Compute singular values to relative accuracy TOL
* (By setting TOL to be negative, algorithm will compute
* singular values to absolute accuracy ABS(TOL)*norm(input matrix))
*
TOLMUL = MAX( TEN, MIN( HNDRD, EPS**MEIGTH ) )
TOL = TOLMUL*EPS
*
* Compute approximate maximum, minimum singular values
*
SMAX = ZERO
DO 20 I = 1, N
SMAX = MAX( SMAX, ABS( D( I ) ) )
20 CONTINUE
DO 30 I = 1, N - 1
SMAX = MAX( SMAX, ABS( E( I ) ) )
30 CONTINUE
SMINL = ZERO
IF( TOL.GE.ZERO ) THEN
*
* Relative accuracy desired
*
SMINOA = ABS( D( 1 ) )
IF( SMINOA.EQ.ZERO )
$ GO TO 50
MU = SMINOA
DO 40 I = 2, N
MU = ABS( D( I ) )*( MU / ( MU+ABS( E( I-1 ) ) ) )
SMINOA = MIN( SMINOA, MU )
IF( SMINOA.EQ.ZERO )
$ GO TO 50
40 CONTINUE
50 CONTINUE
SMINOA = SMINOA / SQRT( DBLE( N ) )
THRESH = MAX( TOL*SMINOA, MAXITR*(N*(N*UNFL)) )
ELSE
*
* Absolute accuracy desired
*
THRESH = MAX( ABS( TOL )*SMAX, MAXITR*(N*(N*UNFL)) )
END IF
*
* Prepare for main iteration loop for the singular values
* (MAXIT is the maximum number of passes through the inner
* loop permitted before nonconvergence signalled.)
*
MAXITDIVN = MAXITR*N
ITERDIVN = 0
ITER = -1
OLDLL = -1
OLDM = -1
*
* M points to last element of unconverged part of matrix
*
M = N
*
* Begin main iteration loop
*
60 CONTINUE
*
* Check for convergence or exceeding iteration count
*
IF( M.LE.1 )
$ GO TO 160
*
IF( ITER.GE.N ) THEN
ITER = ITER - N
ITERDIVN = ITERDIVN + 1
IF( ITERDIVN.GE.MAXITDIVN )
$ GO TO 200
END IF
*
* Find diagonal block of matrix to work on
*
IF( TOL.LT.ZERO .AND. ABS( D( M ) ).LE.THRESH )
$ D( M ) = ZERO
SMAX = ABS( D( M ) )
SMIN = SMAX
DO 70 LLL = 1, M - 1
LL = M - LLL
ABSS = ABS( D( LL ) )
ABSE = ABS( E( LL ) )
IF( TOL.LT.ZERO .AND. ABSS.LE.THRESH )
$ D( LL ) = ZERO
IF( ABSE.LE.THRESH )
$ GO TO 80
SMIN = MIN( SMIN, ABSS )
SMAX = MAX( SMAX, ABSS, ABSE )
70 CONTINUE
LL = 0
GO TO 90
80 CONTINUE
E( LL ) = ZERO
*
* Matrix splits since E(LL) = 0
*
IF( LL.EQ.M-1 ) THEN
*
* Convergence of bottom singular value, return to top of loop
*
M = M - 1
GO TO 60
END IF
90 CONTINUE
LL = LL + 1
*
* E(LL) through E(M-1) are nonzero, E(LL-1) is zero
*
IF( LL.EQ.M-1 ) THEN
*
* 2 by 2 block, handle separately
*
CALL DLASV2( D( M-1 ), E( M-1 ), D( M ), SIGMN, SIGMX, SINR,
$ COSR, SINL, COSL )
D( M-1 ) = SIGMX
E( M-1 ) = ZERO
D( M ) = SIGMN
*
* Compute singular vectors, if desired
*
IF( NCVT.GT.0 )
$ CALL DROT( NCVT, VT( M-1, 1 ), LDVT, VT( M, 1 ), LDVT, COSR,
$ SINR )
IF( NRU.GT.0 )
$ CALL DROT( NRU, U( 1, M-1 ), 1, U( 1, M ), 1, COSL, SINL )
IF( NCC.GT.0 )
$ CALL DROT( NCC, C( M-1, 1 ), LDC, C( M, 1 ), LDC, COSL,
$ SINL )
M = M - 2
GO TO 60
END IF
*
* If working on new submatrix, choose shift direction
* (from larger end diagonal element towards smaller)
*
IF( LL.GT.OLDM .OR. M.LT.OLDLL ) THEN
IF( ABS( D( LL ) ).GE.ABS( D( M ) ) ) THEN
*
* Chase bulge from top (big end) to bottom (small end)
*
IDIR = 1
ELSE
*
* Chase bulge from bottom (big end) to top (small end)
*
IDIR = 2
END IF
END IF
*
* Apply convergence tests
*
IF( IDIR.EQ.1 ) THEN
*
* Run convergence test in forward direction
* First apply standard test to bottom of matrix
*
IF( ABS( E( M-1 ) ).LE.ABS( TOL )*ABS( D( M ) ) .OR.
$ ( TOL.LT.ZERO .AND. ABS( E( M-1 ) ).LE.THRESH ) ) THEN
E( M-1 ) = ZERO
GO TO 60
END IF
*
IF( TOL.GE.ZERO ) THEN
*
* If relative accuracy desired,
* apply convergence criterion forward
*
MU = ABS( D( LL ) )
SMINL = MU
DO 100 LLL = LL, M - 1
IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN
E( LLL ) = ZERO
GO TO 60
END IF
MU = ABS( D( LLL+1 ) )*( MU / ( MU+ABS( E( LLL ) ) ) )
SMINL = MIN( SMINL, MU )
100 CONTINUE
END IF
*
ELSE
*
* Run convergence test in backward direction
* First apply standard test to top of matrix
*
IF( ABS( E( LL ) ).LE.ABS( TOL )*ABS( D( LL ) ) .OR.
$ ( TOL.LT.ZERO .AND. ABS( E( LL ) ).LE.THRESH ) ) THEN
E( LL ) = ZERO
GO TO 60
END IF
*
IF( TOL.GE.ZERO ) THEN
*
* If relative accuracy desired,
* apply convergence criterion backward
*
MU = ABS( D( M ) )
SMINL = MU
DO 110 LLL = M - 1, LL, -1
IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN
E( LLL ) = ZERO
GO TO 60
END IF
MU = ABS( D( LLL ) )*( MU / ( MU+ABS( E( LLL ) ) ) )
SMINL = MIN( SMINL, MU )
110 CONTINUE
END IF
END IF
OLDLL = LL
OLDM = M
*
* Compute shift. First, test if shifting would ruin relative
* accuracy, and if so set the shift to zero.
*
IF( TOL.GE.ZERO .AND. N*TOL*( SMINL / SMAX ).LE.
$ MAX( EPS, HNDRTH*TOL ) ) THEN
*
* Use a zero shift to avoid loss of relative accuracy
*
SHIFT = ZERO
ELSE
*
* Compute the shift from 2-by-2 block at end of matrix
*
IF( IDIR.EQ.1 ) THEN
SLL = ABS( D( LL ) )
CALL DLAS2( D( M-1 ), E( M-1 ), D( M ), SHIFT, R )
ELSE
SLL = ABS( D( M ) )
CALL DLAS2( D( LL ), E( LL ), D( LL+1 ), SHIFT, R )
END IF
*
* Test if shift negligible, and if so set to zero
*
IF( SLL.GT.ZERO ) THEN
IF( ( SHIFT / SLL )**2.LT.EPS )
$ SHIFT = ZERO
END IF
END IF
*
* Increment iteration count
*
ITER = ITER + M - LL
*
* If SHIFT = 0, do simplified QR iteration
*
IF( SHIFT.EQ.ZERO ) THEN
IF( IDIR.EQ.1 ) THEN
*
* Chase bulge from top to bottom
* Save cosines and sines for later singular vector updates
*
CS = ONE
OLDCS = ONE
DO 120 I = LL, M - 1
CALL DLARTG( D( I )*CS, E( I ), CS, SN, R )
IF( I.GT.LL )
$ E( I-1 ) = OLDSN*R
CALL DLARTG( OLDCS*R, D( I+1 )*SN, OLDCS, OLDSN, D( I ) )
WORK( I-LL+1 ) = CS
WORK( I-LL+1+NM1 ) = SN
WORK( I-LL+1+NM12 ) = OLDCS
WORK( I-LL+1+NM13 ) = OLDSN
120 CONTINUE
H = D( M )*CS
D( M ) = H*OLDCS
E( M-1 ) = H*OLDSN
*
* Update singular vectors
*
IF( NCVT.GT.0 )
$ CALL DLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ),
$ WORK( N ), VT( LL, 1 ), LDVT )
IF( NRU.GT.0 )
$ CALL DLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ),
$ WORK( NM13+1 ), U( 1, LL ), LDU )
IF( NCC.GT.0 )
$ CALL DLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ),
$ WORK( NM13+1 ), C( LL, 1 ), LDC )
*
* Test convergence
*
IF( ABS( E( M-1 ) ).LE.THRESH )
$ E( M-1 ) = ZERO
*
ELSE
*
* Chase bulge from bottom to top
* Save cosines and sines for later singular vector updates
*
CS = ONE
OLDCS = ONE
DO 130 I = M, LL + 1, -1
CALL DLARTG( D( I )*CS, E( I-1 ), CS, SN, R )
IF( I.LT.M )
$ E( I ) = OLDSN*R
CALL DLARTG( OLDCS*R, D( I-1 )*SN, OLDCS, OLDSN, D( I ) )
WORK( I-LL ) = CS
WORK( I-LL+NM1 ) = -SN
WORK( I-LL+NM12 ) = OLDCS
WORK( I-LL+NM13 ) = -OLDSN
130 CONTINUE
H = D( LL )*CS
D( LL ) = H*OLDCS
E( LL ) = H*OLDSN
*
* Update singular vectors
*
IF( NCVT.GT.0 )
$ CALL DLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ),
$ WORK( NM13+1 ), VT( LL, 1 ), LDVT )
IF( NRU.GT.0 )
$ CALL DLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ),
$ WORK( N ), U( 1, LL ), LDU )
IF( NCC.GT.0 )
$ CALL DLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ),
$ WORK( N ), C( LL, 1 ), LDC )
*
* Test convergence
*
IF( ABS( E( LL ) ).LE.THRESH )
$ E( LL ) = ZERO
END IF
ELSE
*
* Use nonzero shift
*
IF( IDIR.EQ.1 ) THEN
*
* Chase bulge from top to bottom
* Save cosines and sines for later singular vector updates
*
F = ( ABS( D( LL ) )-SHIFT )*
$ ( SIGN( ONE, D( LL ) )+SHIFT / D( LL ) )
G = E( LL )
DO 140 I = LL, M - 1
CALL DLARTG( F, G, COSR, SINR, R )
IF( I.GT.LL )
$ E( I-1 ) = R
F = COSR*D( I ) + SINR*E( I )
E( I ) = COSR*E( I ) - SINR*D( I )
G = SINR*D( I+1 )
D( I+1 ) = COSR*D( I+1 )
CALL DLARTG( F, G, COSL, SINL, R )
D( I ) = R
F = COSL*E( I ) + SINL*D( I+1 )
D( I+1 ) = COSL*D( I+1 ) - SINL*E( I )
IF( I.LT.M-1 ) THEN
G = SINL*E( I+1 )
E( I+1 ) = COSL*E( I+1 )
END IF
WORK( I-LL+1 ) = COSR
WORK( I-LL+1+NM1 ) = SINR
WORK( I-LL+1+NM12 ) = COSL
WORK( I-LL+1+NM13 ) = SINL
140 CONTINUE
E( M-1 ) = F
*
* Update singular vectors
*
IF( NCVT.GT.0 )
$ CALL DLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ),
$ WORK( N ), VT( LL, 1 ), LDVT )
IF( NRU.GT.0 )
$ CALL DLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ),
$ WORK( NM13+1 ), U( 1, LL ), LDU )
IF( NCC.GT.0 )
$ CALL DLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ),
$ WORK( NM13+1 ), C( LL, 1 ), LDC )
*
* Test convergence
*
IF( ABS( E( M-1 ) ).LE.THRESH )
$ E( M-1 ) = ZERO
*
ELSE
*
* Chase bulge from bottom to top
* Save cosines and sines for later singular vector updates
*
F = ( ABS( D( M ) )-SHIFT )*( SIGN( ONE, D( M ) )+SHIFT /
$ D( M ) )
G = E( M-1 )
DO 150 I = M, LL + 1, -1
CALL DLARTG( F, G, COSR, SINR, R )
IF( I.LT.M )
$ E( I ) = R
F = COSR*D( I ) + SINR*E( I-1 )
E( I-1 ) = COSR*E( I-1 ) - SINR*D( I )
G = SINR*D( I-1 )
D( I-1 ) = COSR*D( I-1 )
CALL DLARTG( F, G, COSL, SINL, R )
D( I ) = R
F = COSL*E( I-1 ) + SINL*D( I-1 )
D( I-1 ) = COSL*D( I-1 ) - SINL*E( I-1 )
IF( I.GT.LL+1 ) THEN
G = SINL*E( I-2 )
E( I-2 ) = COSL*E( I-2 )
END IF
WORK( I-LL ) = COSR
WORK( I-LL+NM1 ) = -SINR
WORK( I-LL+NM12 ) = COSL
WORK( I-LL+NM13 ) = -SINL
150 CONTINUE
E( LL ) = F
*
* Test convergence
*
IF( ABS( E( LL ) ).LE.THRESH )
$ E( LL ) = ZERO
*
* Update singular vectors if desired
*
IF( NCVT.GT.0 )
$ CALL DLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ),
$ WORK( NM13+1 ), VT( LL, 1 ), LDVT )
IF( NRU.GT.0 )
$ CALL DLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ),
$ WORK( N ), U( 1, LL ), LDU )
IF( NCC.GT.0 )
$ CALL DLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ),
$ WORK( N ), C( LL, 1 ), LDC )
END IF
END IF
*
* QR iteration finished, go back and check convergence
*
GO TO 60
*
* All singular values converged, so make them positive
*
160 CONTINUE
DO 170 I = 1, N
IF( D( I ).LT.ZERO ) THEN
D( I ) = -D( I )
*
* Change sign of singular vectors, if desired
*
IF( NCVT.GT.0 )
$ CALL DSCAL( NCVT, NEGONE, VT( I, 1 ), LDVT )
END IF
170 CONTINUE
*
* Sort the singular values into decreasing order (insertion sort on
* singular values, but only one transposition per singular vector)
*
DO 190 I = 1, N - 1
*
* Scan for smallest D(I)
*
ISUB = 1
SMIN = D( 1 )
DO 180 J = 2, N + 1 - I
IF( D( J ).LE.SMIN ) THEN
ISUB = J
SMIN = D( J )
END IF
180 CONTINUE
IF( ISUB.NE.N+1-I ) THEN
*
* Swap singular values and vectors
*
D( ISUB ) = D( N+1-I )
D( N+1-I ) = SMIN
IF( NCVT.GT.0 )
$ CALL DSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( N+1-I, 1 ),
$ LDVT )
IF( NRU.GT.0 )
$ CALL DSWAP( NRU, U( 1, ISUB ), 1, U( 1, N+1-I ), 1 )
IF( NCC.GT.0 )
$ CALL DSWAP( NCC, C( ISUB, 1 ), LDC, C( N+1-I, 1 ), LDC )
END IF
190 CONTINUE
GO TO 220
*
* Maximum number of iterations exceeded, failure to converge
*
200 CONTINUE
INFO = 0
DO 210 I = 1, N - 1
IF( E( I ).NE.ZERO )
$ INFO = INFO + 1
210 CONTINUE
220 CONTINUE
RETURN
*
* End of DBDSQR
*
END

14
lib/linalg/dcabs1.cpp Normal file
View File

@ -0,0 +1,14 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
doublereal dcabs1_(doublecomplex *z__)
{
doublereal ret_val, d__1, d__2;
double d_lmp_imag(doublecomplex *);
ret_val = (d__1 = z__->r, abs(d__1)) + (d__2 = d_lmp_imag(z__), abs(d__2));
return ret_val;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,66 +0,0 @@
*> \brief \b DCABS1
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* DOUBLE PRECISION FUNCTION DCABS1(Z)
*
* .. Scalar Arguments ..
* COMPLEX*16 Z
* ..
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DCABS1 computes |Re(.)| + |Im(.)| of a double complex number
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] Z
*> \verbatim
*> Z is COMPLEX*16
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup double_blas_level1
*
* =====================================================================
DOUBLE PRECISION FUNCTION DCABS1(Z)
*
* -- Reference BLAS level1 routine --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
COMPLEX*16 Z
* ..
* ..
* =====================================================================
*
* .. Intrinsic Functions ..
INTRINSIC ABS,DBLE,DIMAG
*
DCABS1 = ABS(DBLE(Z)) + ABS(DIMAG(Z))
RETURN
*
* End of DCABS1
*
END

56
lib/linalg/dcopy.cpp Normal file
View File

@ -0,0 +1,56 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dcopy_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy)
{
integer i__1;
integer i__, m, ix, iy, mp1;
--dy;
--dx;
if (*n <= 0) {
return 0;
}
if (*incx == 1 && *incy == 1) {
m = *n % 7;
if (m != 0) {
i__1 = m;
for (i__ = 1; i__ <= i__1; ++i__) {
dy[i__] = dx[i__];
}
if (*n < 7) {
return 0;
}
}
mp1 = m + 1;
i__1 = *n;
for (i__ = mp1; i__ <= i__1; i__ += 7) {
dy[i__] = dx[i__];
dy[i__ + 1] = dx[i__ + 1];
dy[i__ + 2] = dx[i__ + 2];
dy[i__ + 3] = dx[i__ + 3];
dy[i__ + 4] = dx[i__ + 4];
dy[i__ + 5] = dx[i__ + 5];
dy[i__ + 6] = dx[i__ + 6];
}
} else {
ix = 1;
iy = 1;
if (*incx < 0) {
ix = (-(*n) + 1) * *incx + 1;
}
if (*incy < 0) {
iy = (-(*n) + 1) * *incy + 1;
}
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
dy[iy] = dx[ix];
ix += *incx;
iy += *incy;
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,146 +0,0 @@
*> \brief \b DCOPY
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* SUBROUTINE DCOPY(N,DX,INCX,DY,INCY)
*
* .. Scalar Arguments ..
* INTEGER INCX,INCY,N
* ..
* .. Array Arguments ..
* DOUBLE PRECISION DX(*),DY(*)
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DCOPY copies a vector, x, to a vector, y.
*> uses unrolled loops for increments equal to 1.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> number of elements in input vector(s)
*> \endverbatim
*>
*> \param[in] DX
*> \verbatim
*> DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
*> \endverbatim
*>
*> \param[in] INCX
*> \verbatim
*> INCX is INTEGER
*> storage spacing between elements of DX
*> \endverbatim
*>
*> \param[out] DY
*> \verbatim
*> DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
*> \endverbatim
*>
*> \param[in] INCY
*> \verbatim
*> INCY is INTEGER
*> storage spacing between elements of DY
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup double_blas_level1
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> jack dongarra, linpack, 3/11/78.
*> modified 12/3/93, array(1) declarations changed to array(*)
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DCOPY(N,DX,INCX,DY,INCY)
*
* -- Reference BLAS level1 routine --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INCX,INCY,N
* ..
* .. Array Arguments ..
DOUBLE PRECISION DX(*),DY(*)
* ..
*
* =====================================================================
*
* .. Local Scalars ..
INTEGER I,IX,IY,M,MP1
* ..
* .. Intrinsic Functions ..
INTRINSIC MOD
* ..
IF (N.LE.0) RETURN
IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
*
* code for both increments equal to 1
*
*
* clean-up loop
*
M = MOD(N,7)
IF (M.NE.0) THEN
DO I = 1,M
DY(I) = DX(I)
END DO
IF (N.LT.7) RETURN
END IF
MP1 = M + 1
DO I = MP1,N,7
DY(I) = DX(I)
DY(I+1) = DX(I+1)
DY(I+2) = DX(I+2)
DY(I+3) = DX(I+3)
DY(I+4) = DX(I+4)
DY(I+5) = DX(I+5)
DY(I+6) = DX(I+6)
END DO
ELSE
*
* code for unequal increments or equal increments
* not equal to 1
*
IX = 1
IY = 1
IF (INCX.LT.0) IX = (-N+1)*INCX + 1
IF (INCY.LT.0) IY = (-N+1)*INCY + 1
DO I = 1,N
DY(IY) = DX(IX)
IX = IX + INCX
IY = IY + INCY
END DO
END IF
RETURN
*
* End of DCOPY
*
END

58
lib/linalg/ddot.cpp Normal file
View File

@ -0,0 +1,58 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy)
{
integer i__1;
doublereal ret_val;
integer i__, m, ix, iy, mp1;
doublereal dtemp;
--dy;
--dx;
ret_val = 0.;
dtemp = 0.;
if (*n <= 0) {
return ret_val;
}
if (*incx == 1 && *incy == 1) {
m = *n % 5;
if (m != 0) {
i__1 = m;
for (i__ = 1; i__ <= i__1; ++i__) {
dtemp += dx[i__] * dy[i__];
}
if (*n < 5) {
ret_val = dtemp;
return ret_val;
}
}
mp1 = m + 1;
i__1 = *n;
for (i__ = mp1; i__ <= i__1; i__ += 5) {
dtemp = dtemp + dx[i__] * dy[i__] + dx[i__ + 1] * dy[i__ + 1] +
dx[i__ + 2] * dy[i__ + 2] + dx[i__ + 3] * dy[i__ + 3] +
dx[i__ + 4] * dy[i__ + 4];
}
} else {
ix = 1;
iy = 1;
if (*incx < 0) {
ix = (-(*n) + 1) * *incx + 1;
}
if (*incy < 0) {
iy = (-(*n) + 1) * *incy + 1;
}
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
dtemp += dx[ix] * dy[iy];
ix += *incx;
iy += *incy;
}
}
ret_val = dtemp;
return ret_val;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,148 +0,0 @@
*> \brief \b DDOT
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY)
*
* .. Scalar Arguments ..
* INTEGER INCX,INCY,N
* ..
* .. Array Arguments ..
* DOUBLE PRECISION DX(*),DY(*)
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DDOT forms the dot product of two vectors.
*> uses unrolled loops for increments equal to one.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> number of elements in input vector(s)
*> \endverbatim
*>
*> \param[in] DX
*> \verbatim
*> DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
*> \endverbatim
*>
*> \param[in] INCX
*> \verbatim
*> INCX is INTEGER
*> storage spacing between elements of DX
*> \endverbatim
*>
*> \param[in] DY
*> \verbatim
*> DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
*> \endverbatim
*>
*> \param[in] INCY
*> \verbatim
*> INCY is INTEGER
*> storage spacing between elements of DY
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup double_blas_level1
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> jack dongarra, linpack, 3/11/78.
*> modified 12/3/93, array(1) declarations changed to array(*)
*> \endverbatim
*>
* =====================================================================
DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY)
*
* -- Reference BLAS level1 routine --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INCX,INCY,N
* ..
* .. Array Arguments ..
DOUBLE PRECISION DX(*),DY(*)
* ..
*
* =====================================================================
*
* .. Local Scalars ..
DOUBLE PRECISION DTEMP
INTEGER I,IX,IY,M,MP1
* ..
* .. Intrinsic Functions ..
INTRINSIC MOD
* ..
DDOT = 0.0d0
DTEMP = 0.0d0
IF (N.LE.0) RETURN
IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
*
* code for both increments equal to 1
*
*
* clean-up loop
*
M = MOD(N,5)
IF (M.NE.0) THEN
DO I = 1,M
DTEMP = DTEMP + DX(I)*DY(I)
END DO
IF (N.LT.5) THEN
DDOT=DTEMP
RETURN
END IF
END IF
MP1 = M + 1
DO I = MP1,N,5
DTEMP = DTEMP + DX(I)*DY(I) + DX(I+1)*DY(I+1) +
$ DX(I+2)*DY(I+2) + DX(I+3)*DY(I+3) + DX(I+4)*DY(I+4)
END DO
ELSE
*
* code for unequal increments or equal increments
* not equal to 1
*
IX = 1
IY = 1
IF (INCX.LT.0) IX = (-N+1)*INCX + 1
IF (INCY.LT.0) IY = (-N+1)*INCY + 1
DO I = 1,N
DTEMP = DTEMP + DX(IX)*DY(IY)
IX = IX + INCX
IY = IY + INCY
END DO
END IF
DDOT = DTEMP
RETURN
*
* End of DDOT
*
END

105
lib/linalg/dgebd2.cpp Normal file
View File

@ -0,0 +1,105 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
int dgebd2_(integer *m, integer *n, doublereal *a, integer *lda, doublereal *d__, doublereal *e,
doublereal *tauq, doublereal *taup, doublereal *work, integer *info)
{
integer a_dim1, a_offset, i__1, i__2, i__3;
integer i__;
extern int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *,
doublereal *, integer *, doublereal *, ftnlen),
dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *),
xerbla_(char *, integer *, ftnlen);
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--d__;
--e;
--tauq;
--taup;
--work;
*info = 0;
if (*m < 0) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*lda < max(1, *m)) {
*info = -4;
}
if (*info < 0) {
i__1 = -(*info);
xerbla_((char *)"DGEBD2", &i__1, (ftnlen)6);
return 0;
}
if (*m >= *n) {
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
i__2 = *m - i__ + 1;
i__3 = i__ + 1;
dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3, *m) + i__ * a_dim1], &c__1,
&tauq[i__]);
d__[i__] = a[i__ + i__ * a_dim1];
a[i__ + i__ * a_dim1] = 1.;
if (i__ < *n) {
i__2 = *m - i__ + 1;
i__3 = *n - i__;
dlarf_((char *)"Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tauq[i__],
&a[i__ + (i__ + 1) * a_dim1], lda, &work[1], (ftnlen)4);
}
a[i__ + i__ * a_dim1] = d__[i__];
if (i__ < *n) {
i__2 = *n - i__;
i__3 = i__ + 2;
dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min(i__3, *n) * a_dim1], lda,
&taup[i__]);
e[i__] = a[i__ + (i__ + 1) * a_dim1];
a[i__ + (i__ + 1) * a_dim1] = 1.;
i__2 = *m - i__;
i__3 = *n - i__;
dlarf_((char *)"Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1], lda, &taup[i__],
&a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1], (ftnlen)5);
a[i__ + (i__ + 1) * a_dim1] = e[i__];
} else {
taup[i__] = 0.;
}
}
} else {
i__1 = *m;
for (i__ = 1; i__ <= i__1; ++i__) {
i__2 = *n - i__ + 1;
i__3 = i__ + 1;
dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3, *n) * a_dim1], lda,
&taup[i__]);
d__[i__] = a[i__ + i__ * a_dim1];
a[i__ + i__ * a_dim1] = 1.;
if (i__ < *m) {
i__2 = *m - i__;
i__3 = *n - i__ + 1;
dlarf_((char *)"Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[i__],
&a[i__ + 1 + i__ * a_dim1], lda, &work[1], (ftnlen)5);
}
a[i__ + i__ * a_dim1] = d__[i__];
if (i__ < *m) {
i__2 = *m - i__;
i__3 = i__ + 2;
dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3, *m) + i__ * a_dim1], &c__1,
&tauq[i__]);
e[i__] = a[i__ + 1 + i__ * a_dim1];
a[i__ + 1 + i__ * a_dim1] = 1.;
i__2 = *m - i__;
i__3 = *n - i__;
dlarf_((char *)"Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tauq[i__],
&a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1], (ftnlen)4);
a[i__ + 1 + i__ * a_dim1] = e[i__];
} else {
tauq[i__] = 0.;
}
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,317 +0,0 @@
*> \brief \b DGEBD2 reduces a general matrix to bidiagonal form using an unblocked algorithm.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGEBD2 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgebd2.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgebd2.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgebd2.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ),
* $ TAUQ( * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGEBD2 reduces a real general m by n matrix A to upper or lower
*> bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
*>
*> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows in the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns in the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the m by n general matrix to be reduced.
*> On exit,
*> if m >= n, the diagonal and the first superdiagonal are
*> overwritten with the upper bidiagonal matrix B; the
*> elements below the diagonal, with the array TAUQ, represent
*> the orthogonal matrix Q as a product of elementary
*> reflectors, and the elements above the first superdiagonal,
*> with the array TAUP, represent the orthogonal matrix P as
*> a product of elementary reflectors;
*> if m < n, the diagonal and the first subdiagonal are
*> overwritten with the lower bidiagonal matrix B; the
*> elements below the first subdiagonal, with the array TAUQ,
*> represent the orthogonal matrix Q as a product of
*> elementary reflectors, and the elements above the diagonal,
*> with the array TAUP, represent the orthogonal matrix P as
*> a product of elementary reflectors.
*> See Further Details.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] D
*> \verbatim
*> D is DOUBLE PRECISION array, dimension (min(M,N))
*> The diagonal elements of the bidiagonal matrix B:
*> D(i) = A(i,i).
*> \endverbatim
*>
*> \param[out] E
*> \verbatim
*> E is DOUBLE PRECISION array, dimension (min(M,N)-1)
*> The off-diagonal elements of the bidiagonal matrix B:
*> if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
*> if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
*> \endverbatim
*>
*> \param[out] TAUQ
*> \verbatim
*> TAUQ is DOUBLE PRECISION array, dimension (min(M,N))
*> The scalar factors of the elementary reflectors which
*> represent the orthogonal matrix Q. See Further Details.
*> \endverbatim
*>
*> \param[out] TAUP
*> \verbatim
*> TAUP is DOUBLE PRECISION array, dimension (min(M,N))
*> The scalar factors of the elementary reflectors which
*> represent the orthogonal matrix P. See Further Details.
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (max(M,N))
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit.
*> < 0: if INFO = -i, the i-th argument had an illegal value.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEcomputational
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> The matrices Q and P are represented as products of elementary
*> reflectors:
*>
*> If m >= n,
*>
*> Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1)
*>
*> Each H(i) and G(i) has the form:
*>
*> H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T
*>
*> where tauq and taup are real scalars, and v and u are real vectors;
*> v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
*> u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
*> tauq is stored in TAUQ(i) and taup in TAUP(i).
*>
*> If m < n,
*>
*> Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m)
*>
*> Each H(i) and G(i) has the form:
*>
*> H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T
*>
*> where tauq and taup are real scalars, and v and u are real vectors;
*> v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
*> u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
*> tauq is stored in TAUQ(i) and taup in TAUP(i).
*>
*> The contents of A on exit are illustrated by the following examples:
*>
*> m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
*>
*> ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 )
*> ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 )
*> ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 )
*> ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 )
*> ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 )
*> ( v1 v2 v3 v4 v5 )
*>
*> where d and e denote diagonal and off-diagonal elements of B, vi
*> denotes an element of the vector defining H(i), and ui an element of
*> the vector defining G(i).
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ),
$ TAUQ( * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
INTEGER I
* ..
* .. External Subroutines ..
EXTERNAL DLARF, DLARFG, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input parameters
*
INFO = 0
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -4
END IF
IF( INFO.LT.0 ) THEN
CALL XERBLA( 'DGEBD2', -INFO )
RETURN
END IF
*
IF( M.GE.N ) THEN
*
* Reduce to upper bidiagonal form
*
DO 10 I = 1, N
*
* Generate elementary reflector H(i) to annihilate A(i+1:m,i)
*
CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1,
$ TAUQ( I ) )
D( I ) = A( I, I )
A( I, I ) = ONE
*
* Apply H(i) to A(i:m,i+1:n) from the left
*
IF( I.LT.N )
$ CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAUQ( I ),
$ A( I, I+1 ), LDA, WORK )
A( I, I ) = D( I )
*
IF( I.LT.N ) THEN
*
* Generate elementary reflector G(i) to annihilate
* A(i,i+2:n)
*
CALL DLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ),
$ LDA, TAUP( I ) )
E( I ) = A( I, I+1 )
A( I, I+1 ) = ONE
*
* Apply G(i) to A(i+1:m,i+1:n) from the right
*
CALL DLARF( 'Right', M-I, N-I, A( I, I+1 ), LDA,
$ TAUP( I ), A( I+1, I+1 ), LDA, WORK )
A( I, I+1 ) = E( I )
ELSE
TAUP( I ) = ZERO
END IF
10 CONTINUE
ELSE
*
* Reduce to lower bidiagonal form
*
DO 20 I = 1, M
*
* Generate elementary reflector G(i) to annihilate A(i,i+1:n)
*
CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA,
$ TAUP( I ) )
D( I ) = A( I, I )
A( I, I ) = ONE
*
* Apply G(i) to A(i+1:m,i:n) from the right
*
IF( I.LT.M )
$ CALL DLARF( 'Right', M-I, N-I+1, A( I, I ), LDA,
$ TAUP( I ), A( I+1, I ), LDA, WORK )
A( I, I ) = D( I )
*
IF( I.LT.M ) THEN
*
* Generate elementary reflector H(i) to annihilate
* A(i+2:m,i)
*
CALL DLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1,
$ TAUQ( I ) )
E( I ) = A( I+1, I )
A( I+1, I ) = ONE
*
* Apply H(i) to A(i+1:m,i+1:n) from the left
*
CALL DLARF( 'Left', M-I, N-I, A( I+1, I ), 1, TAUQ( I ),
$ A( I+1, I+1 ), LDA, WORK )
A( I+1, I ) = E( I )
ELSE
TAUQ( I ) = ZERO
END IF
20 CONTINUE
END IF
RETURN
*
* End of DGEBD2
*
END

129
lib/linalg/dgebrd.cpp Normal file
View File

@ -0,0 +1,129 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
static integer c_n1 = -1;
static integer c__3 = 3;
static integer c__2 = 2;
static doublereal c_b21 = -1.;
static doublereal c_b22 = 1.;
int dgebrd_(integer *m, integer *n, doublereal *a, integer *lda, doublereal *d__, doublereal *e,
doublereal *tauq, doublereal *taup, doublereal *work, integer *lwork, integer *info)
{
integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
integer i__, j, nb, nx, ws;
extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
ftnlen, ftnlen);
integer nbmin, iinfo, minmn;
extern int dgebd2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
doublereal *, doublereal *, doublereal *, integer *),
dlabrd_(integer *, integer *, integer *, doublereal *, integer *, doublereal *,
doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *,
integer *),
xerbla_(char *, integer *, ftnlen);
extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
ftnlen, ftnlen);
integer ldwrkx, ldwrky, lwkopt;
logical lquery;
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--d__;
--e;
--tauq;
--taup;
--work;
*info = 0;
i__1 = 1, i__2 = ilaenv_(&c__1, (char *)"DGEBRD", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
nb = max(i__1, i__2);
lwkopt = (*m + *n) * nb;
work[1] = (doublereal)lwkopt;
lquery = *lwork == -1;
if (*m < 0) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*lda < max(1, *m)) {
*info = -4;
} else {
i__1 = max(1, *m);
if (*lwork < max(i__1, *n) && !lquery) {
*info = -10;
}
}
if (*info < 0) {
i__1 = -(*info);
xerbla_((char *)"DGEBRD", &i__1, (ftnlen)6);
return 0;
} else if (lquery) {
return 0;
}
minmn = min(*m, *n);
if (minmn == 0) {
work[1] = 1.;
return 0;
}
ws = max(*m, *n);
ldwrkx = *m;
ldwrky = *n;
if (nb > 1 && nb < minmn) {
i__1 = nb, i__2 = ilaenv_(&c__3, (char *)"DGEBRD", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
nx = max(i__1, i__2);
if (nx < minmn) {
ws = (*m + *n) * nb;
if (*lwork < ws) {
nbmin = ilaenv_(&c__2, (char *)"DGEBRD", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
if (*lwork >= (*m + *n) * nbmin) {
nb = *lwork / (*m + *n);
} else {
nb = 1;
nx = minmn;
}
}
}
} else {
nx = minmn;
}
i__1 = minmn - nx;
i__2 = nb;
for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
i__3 = *m - i__ + 1;
i__4 = *n - i__ + 1;
dlabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], &tauq[i__],
&taup[i__], &work[1], &ldwrkx, &work[ldwrkx * nb + 1], &ldwrky);
i__3 = *m - i__ - nb + 1;
i__4 = *n - i__ - nb + 1;
dgemm_((char *)"No transpose", (char *)"Transpose", &i__3, &i__4, &nb, &c_b21, &a[i__ + nb + i__ * a_dim1],
lda, &work[ldwrkx * nb + nb + 1], &ldwrky, &c_b22,
&a[i__ + nb + (i__ + nb) * a_dim1], lda, (ftnlen)12, (ftnlen)9);
i__3 = *m - i__ - nb + 1;
i__4 = *n - i__ - nb + 1;
dgemm_((char *)"No transpose", (char *)"No transpose", &i__3, &i__4, &nb, &c_b21, &work[nb + 1], &ldwrkx,
&a[i__ + (i__ + nb) * a_dim1], lda, &c_b22, &a[i__ + nb + (i__ + nb) * a_dim1], lda,
(ftnlen)12, (ftnlen)12);
if (*m >= *n) {
i__3 = i__ + nb - 1;
for (j = i__; j <= i__3; ++j) {
a[j + j * a_dim1] = d__[j];
a[j + (j + 1) * a_dim1] = e[j];
}
} else {
i__3 = i__ + nb - 1;
for (j = i__; j <= i__3; ++j) {
a[j + j * a_dim1] = d__[j];
a[j + 1 + j * a_dim1] = e[j];
}
}
}
i__2 = *m - i__ + 1;
i__1 = *n - i__ + 1;
dgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], &tauq[i__], &taup[i__],
&work[1], &iinfo);
work[1] = (doublereal)ws;
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,349 +0,0 @@
*> \brief \b DGEBRD
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGEBRD + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgebrd.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgebrd.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgebrd.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK,
* INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, LWORK, M, N
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ),
* $ TAUQ( * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGEBRD reduces a general real M-by-N matrix A to upper or lower
*> bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
*>
*> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows in the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns in the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the M-by-N general matrix to be reduced.
*> On exit,
*> if m >= n, the diagonal and the first superdiagonal are
*> overwritten with the upper bidiagonal matrix B; the
*> elements below the diagonal, with the array TAUQ, represent
*> the orthogonal matrix Q as a product of elementary
*> reflectors, and the elements above the first superdiagonal,
*> with the array TAUP, represent the orthogonal matrix P as
*> a product of elementary reflectors;
*> if m < n, the diagonal and the first subdiagonal are
*> overwritten with the lower bidiagonal matrix B; the
*> elements below the first subdiagonal, with the array TAUQ,
*> represent the orthogonal matrix Q as a product of
*> elementary reflectors, and the elements above the diagonal,
*> with the array TAUP, represent the orthogonal matrix P as
*> a product of elementary reflectors.
*> See Further Details.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] D
*> \verbatim
*> D is DOUBLE PRECISION array, dimension (min(M,N))
*> The diagonal elements of the bidiagonal matrix B:
*> D(i) = A(i,i).
*> \endverbatim
*>
*> \param[out] E
*> \verbatim
*> E is DOUBLE PRECISION array, dimension (min(M,N)-1)
*> The off-diagonal elements of the bidiagonal matrix B:
*> if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
*> if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
*> \endverbatim
*>
*> \param[out] TAUQ
*> \verbatim
*> TAUQ is DOUBLE PRECISION array, dimension (min(M,N))
*> The scalar factors of the elementary reflectors which
*> represent the orthogonal matrix Q. See Further Details.
*> \endverbatim
*>
*> \param[out] TAUP
*> \verbatim
*> TAUP is DOUBLE PRECISION array, dimension (min(M,N))
*> The scalar factors of the elementary reflectors which
*> represent the orthogonal matrix P. See Further Details.
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> \verbatim
*> LWORK is INTEGER
*> The length of the array WORK. LWORK >= max(1,M,N).
*> For optimum performance LWORK >= (M+N)*NB, where NB
*> is the optimal blocksize.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
*> this value as the first entry of the WORK array, and no error
*> message related to LWORK is issued by XERBLA.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEcomputational
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> The matrices Q and P are represented as products of elementary
*> reflectors:
*>
*> If m >= n,
*>
*> Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1)
*>
*> Each H(i) and G(i) has the form:
*>
*> H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T
*>
*> where tauq and taup are real scalars, and v and u are real vectors;
*> v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
*> u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
*> tauq is stored in TAUQ(i) and taup in TAUP(i).
*>
*> If m < n,
*>
*> Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m)
*>
*> Each H(i) and G(i) has the form:
*>
*> H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T
*>
*> where tauq and taup are real scalars, and v and u are real vectors;
*> v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
*> u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
*> tauq is stored in TAUQ(i) and taup in TAUP(i).
*>
*> The contents of A on exit are illustrated by the following examples:
*>
*> m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
*>
*> ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 )
*> ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 )
*> ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 )
*> ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 )
*> ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 )
*> ( v1 v2 v3 v4 v5 )
*>
*> where d and e denote diagonal and off-diagonal elements of B, vi
*> denotes an element of the vector defining H(i), and ui an element of
*> the vector defining G(i).
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK,
$ INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LWORK, M, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ),
$ TAUQ( * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER I, IINFO, J, LDWRKX, LDWRKY, LWKOPT, MINMN, NB,
$ NBMIN, NX, WS
* ..
* .. External Subroutines ..
EXTERNAL DGEBD2, DGEMM, DLABRD, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC DBLE, MAX, MIN
* ..
* .. External Functions ..
INTEGER ILAENV
EXTERNAL ILAENV
* ..
* .. Executable Statements ..
*
* Test the input parameters
*
INFO = 0
NB = MAX( 1, ILAENV( 1, 'DGEBRD', ' ', M, N, -1, -1 ) )
LWKOPT = ( M+N )*NB
WORK( 1 ) = DBLE( LWKOPT )
LQUERY = ( LWORK.EQ.-1 )
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -4
ELSE IF( LWORK.LT.MAX( 1, M, N ) .AND. .NOT.LQUERY ) THEN
INFO = -10
END IF
IF( INFO.LT.0 ) THEN
CALL XERBLA( 'DGEBRD', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
RETURN
END IF
*
* Quick return if possible
*
MINMN = MIN( M, N )
IF( MINMN.EQ.0 ) THEN
WORK( 1 ) = 1
RETURN
END IF
*
WS = MAX( M, N )
LDWRKX = M
LDWRKY = N
*
IF( NB.GT.1 .AND. NB.LT.MINMN ) THEN
*
* Set the crossover point NX.
*
NX = MAX( NB, ILAENV( 3, 'DGEBRD', ' ', M, N, -1, -1 ) )
*
* Determine when to switch from blocked to unblocked code.
*
IF( NX.LT.MINMN ) THEN
WS = ( M+N )*NB
IF( LWORK.LT.WS ) THEN
*
* Not enough work space for the optimal NB, consider using
* a smaller block size.
*
NBMIN = ILAENV( 2, 'DGEBRD', ' ', M, N, -1, -1 )
IF( LWORK.GE.( M+N )*NBMIN ) THEN
NB = LWORK / ( M+N )
ELSE
NB = 1
NX = MINMN
END IF
END IF
END IF
ELSE
NX = MINMN
END IF
*
DO 30 I = 1, MINMN - NX, NB
*
* Reduce rows and columns i:i+nb-1 to bidiagonal form and return
* the matrices X and Y which are needed to update the unreduced
* part of the matrix
*
CALL DLABRD( M-I+1, N-I+1, NB, A( I, I ), LDA, D( I ), E( I ),
$ TAUQ( I ), TAUP( I ), WORK, LDWRKX,
$ WORK( LDWRKX*NB+1 ), LDWRKY )
*
* Update the trailing submatrix A(i+nb:m,i+nb:n), using an update
* of the form A := A - V*Y**T - X*U**T
*
CALL DGEMM( 'No transpose', 'Transpose', M-I-NB+1, N-I-NB+1,
$ NB, -ONE, A( I+NB, I ), LDA,
$ WORK( LDWRKX*NB+NB+1 ), LDWRKY, ONE,
$ A( I+NB, I+NB ), LDA )
CALL DGEMM( 'No transpose', 'No transpose', M-I-NB+1, N-I-NB+1,
$ NB, -ONE, WORK( NB+1 ), LDWRKX, A( I, I+NB ), LDA,
$ ONE, A( I+NB, I+NB ), LDA )
*
* Copy diagonal and off-diagonal elements of B back into A
*
IF( M.GE.N ) THEN
DO 10 J = I, I + NB - 1
A( J, J ) = D( J )
A( J, J+1 ) = E( J )
10 CONTINUE
ELSE
DO 20 J = I, I + NB - 1
A( J, J ) = D( J )
A( J+1, J ) = E( J )
20 CONTINUE
END IF
30 CONTINUE
*
* Use unblocked code to reduce the remainder of the matrix
*
CALL DGEBD2( M-I+1, N-I+1, A( I, I ), LDA, D( I ), E( I ),
$ TAUQ( I ), TAUP( I ), WORK, IINFO )
WORK( 1 ) = WS
RETURN
*
* End of DGEBRD
*
END

101
lib/linalg/dgecon.cpp Normal file
View File

@ -0,0 +1,101 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
int dgecon_(char *norm, integer *n, doublereal *a, integer *lda, doublereal *anorm,
doublereal *rcond, doublereal *work, integer *iwork, integer *info, ftnlen norm_len)
{
integer a_dim1, a_offset, i__1;
doublereal d__1;
doublereal sl;
integer ix;
doublereal su;
integer kase, kase1;
doublereal scale;
extern logical lsame_(char *, char *, ftnlen, ftnlen);
integer isave[3];
extern int drscl_(integer *, doublereal *, doublereal *, integer *),
dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *,
integer *);
extern doublereal dlamch_(char *, ftnlen);
extern integer idamax_(integer *, doublereal *, integer *);
extern int xerbla_(char *, integer *, ftnlen);
doublereal ainvnm;
extern int dlatrs_(char *, char *, char *, char *, integer *, doublereal *, integer *,
doublereal *, doublereal *, doublereal *, integer *, ftnlen, ftnlen, ftnlen,
ftnlen);
logical onenrm;
char normin[1];
doublereal smlnum;
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--work;
--iwork;
*info = 0;
onenrm = *(unsigned char *)norm == '1' || lsame_(norm, (char *)"O", (ftnlen)1, (ftnlen)1);
if (!onenrm && !lsame_(norm, (char *)"I", (ftnlen)1, (ftnlen)1)) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*lda < max(1, *n)) {
*info = -4;
} else if (*anorm < 0.) {
*info = -5;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGECON", &i__1, (ftnlen)6);
return 0;
}
*rcond = 0.;
if (*n == 0) {
*rcond = 1.;
return 0;
} else if (*anorm == 0.) {
return 0;
}
smlnum = dlamch_((char *)"Safe minimum", (ftnlen)12);
ainvnm = 0.;
*(unsigned char *)normin = 'N';
if (onenrm) {
kase1 = 1;
} else {
kase1 = 2;
}
kase = 0;
L10:
dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave);
if (kase != 0) {
if (kase == kase1) {
dlatrs_((char *)"Lower", (char *)"No transpose", (char *)"Unit", normin, n, &a[a_offset], lda, &work[1], &sl,
&work[(*n << 1) + 1], info, (ftnlen)5, (ftnlen)12, (ftnlen)4, (ftnlen)1);
dlatrs_((char *)"Upper", (char *)"No transpose", (char *)"Non-unit", normin, n, &a[a_offset], lda, &work[1],
&su, &work[*n * 3 + 1], info, (ftnlen)5, (ftnlen)12, (ftnlen)8, (ftnlen)1);
} else {
dlatrs_((char *)"Upper", (char *)"Transpose", (char *)"Non-unit", normin, n, &a[a_offset], lda, &work[1], &su,
&work[*n * 3 + 1], info, (ftnlen)5, (ftnlen)9, (ftnlen)8, (ftnlen)1);
dlatrs_((char *)"Lower", (char *)"Transpose", (char *)"Unit", normin, n, &a[a_offset], lda, &work[1], &sl,
&work[(*n << 1) + 1], info, (ftnlen)5, (ftnlen)9, (ftnlen)4, (ftnlen)1);
}
scale = sl * su;
*(unsigned char *)normin = 'Y';
if (scale != 1.) {
ix = idamax_(n, &work[1], &c__1);
if (scale < (d__1 = work[ix], abs(d__1)) * smlnum || scale == 0.) {
goto L20;
}
drscl_(n, &scale, &work[1], &c__1);
}
goto L10;
}
if (ainvnm != 0.) {
*rcond = 1. / ainvnm / *anorm;
}
L20:
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,258 +0,0 @@
*> \brief \b DGECON
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGECON + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgecon.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgecon.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgecon.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK,
* INFO )
*
* .. Scalar Arguments ..
* CHARACTER NORM
* INTEGER INFO, LDA, N
* DOUBLE PRECISION ANORM, RCOND
* ..
* .. Array Arguments ..
* INTEGER IWORK( * )
* DOUBLE PRECISION A( LDA, * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGECON estimates the reciprocal of the condition number of a general
*> real matrix A, in either the 1-norm or the infinity-norm, using
*> the LU factorization computed by DGETRF.
*>
*> An estimate is obtained for norm(inv(A)), and the reciprocal of the
*> condition number is computed as
*> RCOND = 1 / ( norm(A) * norm(inv(A)) ).
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] NORM
*> \verbatim
*> NORM is CHARACTER*1
*> Specifies whether the 1-norm condition number or the
*> infinity-norm condition number is required:
*> = '1' or 'O': 1-norm;
*> = 'I': Infinity-norm.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The order of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> The factors L and U from the factorization A = P*L*U
*> as computed by DGETRF.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,N).
*> \endverbatim
*>
*> \param[in] ANORM
*> \verbatim
*> ANORM is DOUBLE PRECISION
*> If NORM = '1' or 'O', the 1-norm of the original matrix A.
*> If NORM = 'I', the infinity-norm of the original matrix A.
*> \endverbatim
*>
*> \param[out] RCOND
*> \verbatim
*> RCOND is DOUBLE PRECISION
*> The reciprocal of the condition number of the matrix A,
*> computed as RCOND = 1/(norm(A) * norm(inv(A))).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (4*N)
*> \endverbatim
*>
*> \param[out] IWORK
*> \verbatim
*> IWORK is INTEGER array, dimension (N)
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEcomputational
*
* =====================================================================
SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK,
$ INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
CHARACTER NORM
INTEGER INFO, LDA, N
DOUBLE PRECISION ANORM, RCOND
* ..
* .. Array Arguments ..
INTEGER IWORK( * )
DOUBLE PRECISION A( LDA, * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE, ZERO
PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 )
* ..
* .. Local Scalars ..
LOGICAL ONENRM
CHARACTER NORMIN
INTEGER IX, KASE, KASE1
DOUBLE PRECISION AINVNM, SCALE, SL, SMLNUM, SU
* ..
* .. Local Arrays ..
INTEGER ISAVE( 3 )
* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER IDAMAX
DOUBLE PRECISION DLAMCH
EXTERNAL LSAME, IDAMAX, DLAMCH
* ..
* .. External Subroutines ..
EXTERNAL DLACN2, DLATRS, DRSCL, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, MAX
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
INFO = -4
ELSE IF( ANORM.LT.ZERO ) THEN
INFO = -5
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGECON', -INFO )
RETURN
END IF
*
* Quick return if possible
*
RCOND = ZERO
IF( N.EQ.0 ) THEN
RCOND = ONE
RETURN
ELSE IF( ANORM.EQ.ZERO ) THEN
RETURN
END IF
*
SMLNUM = DLAMCH( 'Safe minimum' )
*
* Estimate the norm of inv(A).
*
AINVNM = ZERO
NORMIN = 'N'
IF( ONENRM ) THEN
KASE1 = 1
ELSE
KASE1 = 2
END IF
KASE = 0
10 CONTINUE
CALL DLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE )
IF( KASE.NE.0 ) THEN
IF( KASE.EQ.KASE1 ) THEN
*
* Multiply by inv(L).
*
CALL DLATRS( 'Lower', 'No transpose', 'Unit', NORMIN, N, A,
$ LDA, WORK, SL, WORK( 2*N+1 ), INFO )
*
* Multiply by inv(U).
*
CALL DLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N,
$ A, LDA, WORK, SU, WORK( 3*N+1 ), INFO )
ELSE
*
* Multiply by inv(U**T).
*
CALL DLATRS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, A,
$ LDA, WORK, SU, WORK( 3*N+1 ), INFO )
*
* Multiply by inv(L**T).
*
CALL DLATRS( 'Lower', 'Transpose', 'Unit', NORMIN, N, A,
$ LDA, WORK, SL, WORK( 2*N+1 ), INFO )
END IF
*
* Divide X by 1/(SL*SU) if doing so will not cause overflow.
*
SCALE = SL*SU
NORMIN = 'Y'
IF( SCALE.NE.ONE ) THEN
IX = IDAMAX( N, WORK, 1 )
IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO )
$ GO TO 20
CALL DRSCL( N, SCALE, WORK, 1 )
END IF
GO TO 10
END IF
*
* Compute the estimate of the reciprocal condition number.
*
IF( AINVNM.NE.ZERO )
$ RCOND = ( ONE / AINVNM ) / ANORM
*
20 CONTINUE
RETURN
*
* End of DGECON
*
END

53
lib/linalg/dgelq2.cpp Normal file
View File

@ -0,0 +1,53 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dgelq2_(integer *m, integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal *work,
integer *info)
{
integer a_dim1, a_offset, i__1, i__2, i__3;
integer i__, k;
doublereal aii;
extern int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *,
doublereal *, integer *, doublereal *, ftnlen),
dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *),
xerbla_(char *, integer *, ftnlen);
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--tau;
--work;
*info = 0;
if (*m < 0) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*lda < max(1, *m)) {
*info = -4;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGELQ2", &i__1, (ftnlen)6);
return 0;
}
k = min(*m, *n);
i__1 = k;
for (i__ = 1; i__ <= i__1; ++i__) {
i__2 = *n - i__ + 1;
i__3 = i__ + 1;
dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3, *n) * a_dim1], lda, &tau[i__]);
if (i__ < *m) {
aii = a[i__ + i__ * a_dim1];
a[i__ + i__ * a_dim1] = 1.;
i__2 = *m - i__;
i__3 = *n - i__ + 1;
dlarf_((char *)"Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__],
&a[i__ + 1 + i__ * a_dim1], lda, &work[1], (ftnlen)5);
a[i__ + i__ * a_dim1] = aii;
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,197 +0,0 @@
*> \brief \b DGELQ2 computes the LQ factorization of a general rectangular matrix using an unblocked algorithm.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGELQ2 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelq2.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelq2.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelq2.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGELQ2( M, N, A, LDA, TAU, WORK, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGELQ2 computes an LQ factorization of a real m-by-n matrix A:
*>
*> A = ( L 0 ) * Q
*>
*> where:
*>
*> Q is a n-by-n orthogonal matrix;
*> L is a lower-triangular m-by-m matrix;
*> 0 is a m-by-(n-m) zero matrix, if m < n.
*>
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the m by n matrix A.
*> On exit, the elements on and below the diagonal of the array
*> contain the m by min(m,n) lower trapezoidal matrix L (L is
*> lower triangular if m <= n); the elements above the diagonal,
*> with the array TAU, represent the orthogonal matrix Q as a
*> product of elementary reflectors (see Further Details).
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] TAU
*> \verbatim
*> TAU is DOUBLE PRECISION array, dimension (min(M,N))
*> The scalar factors of the elementary reflectors (see Further
*> Details).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (M)
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEcomputational
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> The matrix Q is represented as a product of elementary reflectors
*>
*> Q = H(k) . . . H(2) H(1), where k = min(m,n).
*>
*> Each H(i) has the form
*>
*> H(i) = I - tau * v * v**T
*>
*> where tau is a real scalar, and v is a real vector with
*> v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
*> and tau in TAU(i).
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DGELQ2( M, N, A, LDA, TAU, WORK, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
INTEGER I, K
DOUBLE PRECISION AII
* ..
* .. External Subroutines ..
EXTERNAL DLARF, DLARFG, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input arguments
*
INFO = 0
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -4
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGELQ2', -INFO )
RETURN
END IF
*
K = MIN( M, N )
*
DO 10 I = 1, K
*
* Generate elementary reflector H(i) to annihilate A(i,i+1:n)
*
CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA,
$ TAU( I ) )
IF( I.LT.M ) THEN
*
* Apply H(i) to A(i+1:m,i:n) from the right
*
AII = A( I, I )
A( I, I ) = ONE
CALL DLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAU( I ),
$ A( I+1, I ), LDA, WORK )
A( I, I ) = AII
END IF
10 CONTINUE
RETURN
*
* End of DGELQ2
*
END

106
lib/linalg/dgelqf.cpp Normal file
View File

@ -0,0 +1,106 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
static integer c_n1 = -1;
static integer c__3 = 3;
static integer c__2 = 2;
int dgelqf_(integer *m, integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal *work,
integer *lwork, integer *info)
{
integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
extern int dgelq2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
integer *),
dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *,
integer *, doublereal *, integer *, doublereal *, integer *, doublereal *,
integer *, ftnlen, ftnlen, ftnlen, ftnlen),
dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *,
doublereal *, integer *, ftnlen, ftnlen),
xerbla_(char *, integer *, ftnlen);
extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
ftnlen, ftnlen);
integer ldwork, lwkopt;
logical lquery;
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--tau;
--work;
*info = 0;
nb = ilaenv_(&c__1, (char *)"DGELQF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
lwkopt = *m * nb;
work[1] = (doublereal)lwkopt;
lquery = *lwork == -1;
if (*m < 0) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*lda < max(1, *m)) {
*info = -4;
} else if (*lwork < max(1, *m) && !lquery) {
*info = -7;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGELQF", &i__1, (ftnlen)6);
return 0;
} else if (lquery) {
return 0;
}
k = min(*m, *n);
if (k == 0) {
work[1] = 1.;
return 0;
}
nbmin = 2;
nx = 0;
iws = *m;
if (nb > 1 && nb < k) {
i__1 = 0, i__2 = ilaenv_(&c__3, (char *)"DGELQF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
nx = max(i__1, i__2);
if (nx < k) {
ldwork = *m;
iws = ldwork * nb;
if (*lwork < iws) {
nb = *lwork / ldwork;
i__1 = 2,
i__2 = ilaenv_(&c__2, (char *)"DGELQF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
nbmin = max(i__1, i__2);
}
}
}
if (nb >= nbmin && nb < k && nx < k) {
i__1 = k - nx;
i__2 = nb;
for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
i__3 = k - i__ + 1;
ib = min(i__3, nb);
i__3 = *n - i__ + 1;
dgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1], &iinfo);
if (i__ + ib <= *m) {
i__3 = *n - i__ + 1;
dlarft_((char *)"Forward", (char *)"Rowwise", &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__],
&work[1], &ldwork, (ftnlen)7, (ftnlen)7);
i__3 = *m - i__ - ib + 1;
i__4 = *n - i__ + 1;
dlarfb_((char *)"Right", (char *)"No transpose", (char *)"Forward", (char *)"Rowwise", &i__3, &i__4, &ib,
&a[i__ + i__ * a_dim1], lda, &work[1], &ldwork, &a[i__ + ib + i__ * a_dim1],
lda, &work[ib + 1], &ldwork, (ftnlen)5, (ftnlen)12, (ftnlen)7, (ftnlen)7);
}
}
} else {
i__ = 1;
}
if (i__ <= k) {
i__2 = *m - i__ + 1;
i__1 = *n - i__ + 1;
dgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1], &iinfo);
}
work[1] = (doublereal)iws;
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,274 +0,0 @@
*> \brief \b DGELQF
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGELQF + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelqf.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelqf.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelqf.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, LWORK, M, N
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGELQF computes an LQ factorization of a real M-by-N matrix A:
*>
*> A = ( L 0 ) * Q
*>
*> where:
*>
*> Q is a N-by-N orthogonal matrix;
*> L is a lower-triangular M-by-M matrix;
*> 0 is a M-by-(N-M) zero matrix, if M < N.
*>
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the M-by-N matrix A.
*> On exit, the elements on and below the diagonal of the array
*> contain the m-by-min(m,n) lower trapezoidal matrix L (L is
*> lower triangular if m <= n); the elements above the diagonal,
*> with the array TAU, represent the orthogonal matrix Q as a
*> product of elementary reflectors (see Further Details).
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] TAU
*> \verbatim
*> TAU is DOUBLE PRECISION array, dimension (min(M,N))
*> The scalar factors of the elementary reflectors (see Further
*> Details).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> \verbatim
*> LWORK is INTEGER
*> The dimension of the array WORK. LWORK >= max(1,M).
*> For optimum performance LWORK >= M*NB, where NB is the
*> optimal blocksize.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
*> this value as the first entry of the WORK array, and no error
*> message related to LWORK is issued by XERBLA.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEcomputational
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> The matrix Q is represented as a product of elementary reflectors
*>
*> Q = H(k) . . . H(2) H(1), where k = min(m,n).
*>
*> Each H(i) has the form
*>
*> H(i) = I - tau * v * v**T
*>
*> where tau is a real scalar, and v is a real vector with
*> v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
*> and tau in TAU(i).
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LWORK, M, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB,
$ NBMIN, NX
* ..
* .. External Subroutines ..
EXTERNAL DGELQ2, DLARFB, DLARFT, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. External Functions ..
INTEGER ILAENV
EXTERNAL ILAENV
* ..
* .. Executable Statements ..
*
* Test the input arguments
*
INFO = 0
NB = ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
LWKOPT = M*NB
WORK( 1 ) = LWKOPT
LQUERY = ( LWORK.EQ.-1 )
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -4
ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN
INFO = -7
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGELQF', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
RETURN
END IF
*
* Quick return if possible
*
K = MIN( M, N )
IF( K.EQ.0 ) THEN
WORK( 1 ) = 1
RETURN
END IF
*
NBMIN = 2
NX = 0
IWS = M
IF( NB.GT.1 .AND. NB.LT.K ) THEN
*
* Determine when to cross over from blocked to unblocked code.
*
NX = MAX( 0, ILAENV( 3, 'DGELQF', ' ', M, N, -1, -1 ) )
IF( NX.LT.K ) THEN
*
* Determine if workspace is large enough for blocked code.
*
LDWORK = M
IWS = LDWORK*NB
IF( LWORK.LT.IWS ) THEN
*
* Not enough workspace to use optimal NB: reduce NB and
* determine the minimum value of NB.
*
NB = LWORK / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'DGELQF', ' ', M, N, -1,
$ -1 ) )
END IF
END IF
END IF
*
IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
*
* Use blocked code initially
*
DO 10 I = 1, K - NX, NB
IB = MIN( K-I+1, NB )
*
* Compute the LQ factorization of the current block
* A(i:i+ib-1,i:n)
*
CALL DGELQ2( IB, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
$ IINFO )
IF( I+IB.LE.M ) THEN
*
* Form the triangular factor of the block reflector
* H = H(i) H(i+1) . . . H(i+ib-1)
*
CALL DLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ),
$ LDA, TAU( I ), WORK, LDWORK )
*
* Apply H to A(i+ib:m,i:n) from the right
*
CALL DLARFB( 'Right', 'No transpose', 'Forward',
$ 'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ),
$ LDA, WORK, LDWORK, A( I+IB, I ), LDA,
$ WORK( IB+1 ), LDWORK )
END IF
10 CONTINUE
ELSE
I = 1
END IF
*
* Use unblocked code to factor the last or only block.
*
IF( I.LE.K )
$ CALL DGELQ2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
$ IINFO )
*
WORK( 1 ) = IWS
RETURN
*
* End of DGELQF
*
END

341
lib/linalg/dgelsd.cpp Normal file
View File

@ -0,0 +1,341 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__6 = 6;
static integer c_n1 = -1;
static integer c__9 = 9;
static integer c__0 = 0;
static integer c__1 = 1;
static doublereal c_b82 = 0.;
int dgelsd_(integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b,
integer *ldb, doublereal *s, doublereal *rcond, integer *rank, doublereal *work,
integer *lwork, integer *iwork, integer *info)
{
integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
double log(doublereal);
integer ie, il, mm;
doublereal eps, anrm, bnrm;
integer itau, nlvl, iascl, ibscl;
doublereal sfmin;
integer minmn, maxmn, itaup, itauq, mnthr, nwork;
extern int dlabad_(doublereal *, doublereal *),
dgebrd_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
doublereal *, doublereal *, doublereal *, integer *, integer *);
extern doublereal dlamch_(char *, ftnlen),
dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *, ftnlen);
extern int dgelqf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
integer *, integer *),
dlalsd_(char *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *,
integer *, doublereal *, integer *, doublereal *, integer *, integer *, ftnlen),
dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *,
doublereal *, integer *, integer *, ftnlen),
dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
integer *, integer *),
dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *,
ftnlen),
dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *,
ftnlen),
xerbla_(char *, integer *, ftnlen);
extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
ftnlen, ftnlen);
doublereal bignum;
extern int dormbr_(char *, char *, char *, integer *, integer *, integer *, doublereal *,
integer *, doublereal *, doublereal *, integer *, doublereal *, integer *,
integer *, ftnlen, ftnlen, ftnlen);
integer wlalsd;
extern int dormlq_(char *, char *, integer *, integer *, integer *, doublereal *, integer *,
doublereal *, doublereal *, integer *, doublereal *, integer *, integer *,
ftnlen, ftnlen);
integer ldwork;
extern int dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *,
doublereal *, doublereal *, integer *, doublereal *, integer *, integer *,
ftnlen, ftnlen);
integer liwork, minwrk, maxwrk;
doublereal smlnum;
logical lquery;
integer smlsiz;
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
b_dim1 = *ldb;
b_offset = 1 + b_dim1;
b -= b_offset;
--s;
--work;
--iwork;
*info = 0;
minmn = min(*m, *n);
maxmn = max(*m, *n);
mnthr = ilaenv_(&c__6, (char *)"DGELSD", (char *)" ", m, n, nrhs, &c_n1, (ftnlen)6, (ftnlen)1);
lquery = *lwork == -1;
if (*m < 0) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*nrhs < 0) {
*info = -3;
} else if (*lda < max(1, *m)) {
*info = -5;
} else if (*ldb < max(1, maxmn)) {
*info = -7;
}
smlsiz = ilaenv_(&c__9, (char *)"DGELSD", (char *)" ", &c__0, &c__0, &c__0, &c__0, (ftnlen)6, (ftnlen)1);
minwrk = 1;
liwork = 1;
minmn = max(1, minmn);
i__1 = (integer)(log((doublereal)minmn / (doublereal)(smlsiz + 1)) / log(2.)) + 1;
nlvl = max(i__1, 0);
if (*info == 0) {
maxwrk = 0;
liwork = minmn * 3 * nlvl + minmn * 11;
mm = *m;
if (*m >= *n && *m >= mnthr) {
mm = *n;
i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, (char *)"DGEQRF", (char *)" ", m, n, &c_n1, &c_n1,
(ftnlen)6, (ftnlen)1);
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, (char *)"DORMQR", (char *)"LT", m, nrhs, n, &c_n1,
(ftnlen)6, (ftnlen)2);
maxwrk = max(i__1, i__2);
}
if (*m >= *n) {
i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, (char *)"DGEBRD", (char *)" ", &mm, n, &c_n1,
&c_n1, (ftnlen)6, (ftnlen)1);
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, (char *)"DORMBR", (char *)"QLT", &mm, nrhs, n,
&c_n1, (ftnlen)6, (ftnlen)3);
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, (char *)"DORMBR", (char *)"PLN", n, nrhs, n,
&c_n1, (ftnlen)6, (ftnlen)3);
maxwrk = max(i__1, i__2);
i__1 = smlsiz + 1;
wlalsd = *n * 9 + (*n << 1) * smlsiz + (*n << 3) * nlvl + *n * *nrhs + i__1 * i__1;
i__1 = maxwrk, i__2 = *n * 3 + wlalsd;
maxwrk = max(i__1, i__2);
i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1, i__2),
i__2 = *n * 3 + wlalsd;
minwrk = max(i__1, i__2);
}
if (*n > *m) {
i__1 = smlsiz + 1;
wlalsd = *m * 9 + (*m << 1) * smlsiz + (*m << 3) * nlvl + *m * *nrhs + i__1 * i__1;
if (*n >= mnthr) {
maxwrk = *m + *m * ilaenv_(&c__1, (char *)"DGELQF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6,
(ftnlen)1);
i__1 = maxwrk, i__2 = *m * *m + (*m << 2) +
(*m << 1) * ilaenv_(&c__1, (char *)"DGEBRD", (char *)" ", m, m, &c_n1, &c_n1,
(ftnlen)6, (ftnlen)1);
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *m * *m + (*m << 2) +
*nrhs * ilaenv_(&c__1, (char *)"DORMBR", (char *)"QLT", m, nrhs, m, &c_n1,
(ftnlen)6, (ftnlen)3);
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *m * *m + (*m << 2) +
(*m - 1) * ilaenv_(&c__1, (char *)"DORMBR", (char *)"PLN", m, nrhs, m, &c_n1,
(ftnlen)6, (ftnlen)3);
maxwrk = max(i__1, i__2);
if (*nrhs > 1) {
i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
maxwrk = max(i__1, i__2);
} else {
i__1 = maxwrk, i__2 = *m * *m + (*m << 1);
maxwrk = max(i__1, i__2);
}
i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, (char *)"DORMLQ", (char *)"LT", n, nrhs, m, &c_n1,
(ftnlen)6, (ftnlen)2);
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + wlalsd;
maxwrk = max(i__1, i__2);
i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3, i__4), i__3 = max(i__3, *nrhs),
i__4 = *n - *m * 3;
i__1 = maxwrk, i__2 = (*m << 2) + *m * *m + max(i__3, i__4);
maxwrk = max(i__1, i__2);
} else {
maxwrk = *m * 3 + (*n + *m) * ilaenv_(&c__1, (char *)"DGEBRD", (char *)" ", m, n, &c_n1, &c_n1,
(ftnlen)6, (ftnlen)1);
i__1 = maxwrk, i__2 = *m * 3 + *nrhs * ilaenv_(&c__1, (char *)"DORMBR", (char *)"QLT", m, nrhs, n,
&c_n1, (ftnlen)6, (ftnlen)3);
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *m * 3 + *m * ilaenv_(&c__1, (char *)"DORMBR", (char *)"PLN", n, nrhs, m,
&c_n1, (ftnlen)6, (ftnlen)3);
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *m * 3 + wlalsd;
maxwrk = max(i__1, i__2);
}
i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *m, i__1 = max(i__1, i__2),
i__2 = *m * 3 + wlalsd;
minwrk = max(i__1, i__2);
}
minwrk = min(minwrk, maxwrk);
work[1] = (doublereal)maxwrk;
iwork[1] = liwork;
if (*lwork < minwrk && !lquery) {
*info = -12;
}
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGELSD", &i__1, (ftnlen)6);
return 0;
} else if (lquery) {
goto L10;
}
if (*m == 0 || *n == 0) {
*rank = 0;
return 0;
}
eps = dlamch_((char *)"P", (ftnlen)1);
sfmin = dlamch_((char *)"S", (ftnlen)1);
smlnum = sfmin / eps;
bignum = 1. / smlnum;
dlabad_(&smlnum, &bignum);
anrm = dlange_((char *)"M", m, n, &a[a_offset], lda, &work[1], (ftnlen)1);
iascl = 0;
if (anrm > 0. && anrm < smlnum) {
dlascl_((char *)"G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, info, (ftnlen)1);
iascl = 1;
} else if (anrm > bignum) {
dlascl_((char *)"G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, info, (ftnlen)1);
iascl = 2;
} else if (anrm == 0.) {
i__1 = max(*m, *n);
dlaset_((char *)"F", &i__1, nrhs, &c_b82, &c_b82, &b[b_offset], ldb, (ftnlen)1);
dlaset_((char *)"F", &minmn, &c__1, &c_b82, &c_b82, &s[1], &c__1, (ftnlen)1);
*rank = 0;
goto L10;
}
bnrm = dlange_((char *)"M", m, nrhs, &b[b_offset], ldb, &work[1], (ftnlen)1);
ibscl = 0;
if (bnrm > 0. && bnrm < smlnum) {
dlascl_((char *)"G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
ibscl = 1;
} else if (bnrm > bignum) {
dlascl_((char *)"G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
ibscl = 2;
}
if (*m < *n) {
i__1 = *n - *m;
dlaset_((char *)"F", &i__1, nrhs, &c_b82, &c_b82, &b[*m + 1 + b_dim1], ldb, (ftnlen)1);
}
if (*m >= *n) {
mm = *m;
if (*m >= mnthr) {
mm = *n;
itau = 1;
nwork = itau + *n;
i__1 = *lwork - nwork + 1;
dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, info);
i__1 = *lwork - nwork + 1;
dormqr_((char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[b_offset], ldb,
&work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1);
if (*n > 1) {
i__1 = *n - 1;
i__2 = *n - 1;
dlaset_((char *)"L", &i__1, &i__2, &c_b82, &c_b82, &a[a_dim1 + 2], lda, (ftnlen)1);
}
}
ie = 1;
itauq = ie + *n;
itaup = itauq + *n;
nwork = itaup + *n;
i__1 = *lwork - nwork + 1;
dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup],
&work[nwork], &i__1, info);
i__1 = *lwork - nwork + 1;
dormbr_((char *)"Q", (char *)"L", (char *)"T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], &b[b_offset], ldb,
&work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
dlalsd_((char *)"U", &smlsiz, n, nrhs, &s[1], &work[ie], &b[b_offset], ldb, rcond, rank,
&work[nwork], &iwork[1], info, (ftnlen)1);
if (*info != 0) {
goto L10;
}
i__1 = *lwork - nwork + 1;
dormbr_((char *)"P", (char *)"L", (char *)"N", n, nrhs, n, &a[a_offset], lda, &work[itaup], &b[b_offset], ldb,
&work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
} else {
i__1 = *m, i__2 = (*m << 1) - 4, i__1 = max(i__1, i__2), i__1 = max(i__1, *nrhs),
i__2 = *n - *m * 3, i__1 = max(i__1, i__2);
if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1, wlalsd)) {
ldwork = *m;
i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3, i__4), i__3 = max(i__3, *nrhs),
i__4 = *n - *m * 3;
i__1 = (*m << 2) + *m * *lda + max(i__3, i__4), i__2 = *m * *lda + *m + *m * *nrhs,
i__1 = max(i__1, i__2), i__2 = (*m << 2) + *m * *lda + wlalsd;
if (*lwork >= max(i__1, i__2)) {
ldwork = *lda;
}
itau = 1;
nwork = *m + 1;
i__1 = *lwork - nwork + 1;
dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, info);
il = nwork;
dlacpy_((char *)"L", m, m, &a[a_offset], lda, &work[il], &ldwork, (ftnlen)1);
i__1 = *m - 1;
i__2 = *m - 1;
dlaset_((char *)"U", &i__1, &i__2, &c_b82, &c_b82, &work[il + ldwork], &ldwork, (ftnlen)1);
ie = il + ldwork * *m;
itauq = ie + *m;
itaup = itauq + *m;
nwork = itaup + *m;
i__1 = *lwork - nwork + 1;
dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], &work[itaup],
&work[nwork], &i__1, info);
i__1 = *lwork - nwork + 1;
dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, m, &work[il], &ldwork, &work[itauq], &b[b_offset], ldb,
&work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
dlalsd_((char *)"U", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], ldb, rcond, rank,
&work[nwork], &iwork[1], info, (ftnlen)1);
if (*info != 0) {
goto L10;
}
i__1 = *lwork - nwork + 1;
dormbr_((char *)"P", (char *)"L", (char *)"N", m, nrhs, m, &work[il], &ldwork, &work[itaup], &b[b_offset], ldb,
&work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
i__1 = *n - *m;
dlaset_((char *)"F", &i__1, nrhs, &c_b82, &c_b82, &b[*m + 1 + b_dim1], ldb, (ftnlen)1);
nwork = itau + *m;
i__1 = *lwork - nwork + 1;
dormlq_((char *)"L", (char *)"T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[b_offset], ldb,
&work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1);
} else {
ie = 1;
itauq = ie + *m;
itaup = itauq + *m;
nwork = itaup + *m;
i__1 = *lwork - nwork + 1;
dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup],
&work[nwork], &i__1, info);
i__1 = *lwork - nwork + 1;
dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, &work[itauq], &b[b_offset], ldb,
&work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
dlalsd_((char *)"L", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], ldb, rcond, rank,
&work[nwork], &iwork[1], info, (ftnlen)1);
if (*info != 0) {
goto L10;
}
i__1 = *lwork - nwork + 1;
dormbr_((char *)"P", (char *)"L", (char *)"N", n, nrhs, m, &a[a_offset], lda, &work[itaup], &b[b_offset], ldb,
&work[nwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
}
}
if (iascl == 1) {
dlascl_((char *)"G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
dlascl_((char *)"G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &minmn, info, (ftnlen)1);
} else if (iascl == 2) {
dlascl_((char *)"G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
dlascl_((char *)"G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &minmn, info, (ftnlen)1);
}
if (ibscl == 1) {
dlascl_((char *)"G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
} else if (ibscl == 2) {
dlascl_((char *)"G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
}
L10:
work[1] = (doublereal)maxwrk;
iwork[1] = liwork;
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,626 +0,0 @@
*> \brief <b> DGELSD computes the minimum-norm solution to a linear least squares problem for GE matrices</b>
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGELSD + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelsd.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelsd.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelsd.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
* WORK, LWORK, IWORK, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
* DOUBLE PRECISION RCOND
* ..
* .. Array Arguments ..
* INTEGER IWORK( * )
* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGELSD computes the minimum-norm solution to a real linear least
*> squares problem:
*> minimize 2-norm(| b - A*x |)
*> using the singular value decomposition (SVD) of A. A is an M-by-N
*> matrix which may be rank-deficient.
*>
*> Several right hand side vectors b and solution vectors x can be
*> handled in a single call; they are stored as the columns of the
*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution
*> matrix X.
*>
*> The problem is solved in three steps:
*> (1) Reduce the coefficient matrix A to bidiagonal form with
*> Householder transformations, reducing the original problem
*> into a "bidiagonal least squares problem" (BLS)
*> (2) Solve the BLS using a divide and conquer approach.
*> (3) Apply back all the Householder transformations to solve
*> the original least squares problem.
*>
*> The effective rank of A is determined by treating as zero those
*> singular values which are less than RCOND times the largest singular
*> value.
*>
*> The divide and conquer algorithm makes very mild assumptions about
*> floating point arithmetic. It will work on machines with a guard
*> digit in add/subtract, or on those binary machines without guard
*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
*> Cray-2. It could conceivably fail on hexadecimal or decimal machines
*> without guard digits, but we know of none.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of A. N >= 0.
*> \endverbatim
*>
*> \param[in] NRHS
*> \verbatim
*> NRHS is INTEGER
*> The number of right hand sides, i.e., the number of columns
*> of the matrices B and X. NRHS >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the M-by-N matrix A.
*> On exit, A has been destroyed.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[in,out] B
*> \verbatim
*> B is DOUBLE PRECISION array, dimension (LDB,NRHS)
*> On entry, the M-by-NRHS right hand side matrix B.
*> On exit, B is overwritten by the N-by-NRHS solution
*> matrix X. If m >= n and RANK = n, the residual
*> sum-of-squares for the solution in the i-th column is given
*> by the sum of squares of elements n+1:m in that column.
*> \endverbatim
*>
*> \param[in] LDB
*> \verbatim
*> LDB is INTEGER
*> The leading dimension of the array B. LDB >= max(1,max(M,N)).
*> \endverbatim
*>
*> \param[out] S
*> \verbatim
*> S is DOUBLE PRECISION array, dimension (min(M,N))
*> The singular values of A in decreasing order.
*> The condition number of A in the 2-norm = S(1)/S(min(m,n)).
*> \endverbatim
*>
*> \param[in] RCOND
*> \verbatim
*> RCOND is DOUBLE PRECISION
*> RCOND is used to determine the effective rank of A.
*> Singular values S(i) <= RCOND*S(1) are treated as zero.
*> If RCOND < 0, machine precision is used instead.
*> \endverbatim
*>
*> \param[out] RANK
*> \verbatim
*> RANK is INTEGER
*> The effective rank of A, i.e., the number of singular values
*> which are greater than RCOND*S(1).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> \verbatim
*> LWORK is INTEGER
*> The dimension of the array WORK. LWORK must be at least 1.
*> The exact minimum amount of workspace needed depends on M,
*> N and NRHS. As long as LWORK is at least
*> 12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2,
*> if M is greater than or equal to N or
*> 12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2,
*> if M is less than N, the code will execute correctly.
*> SMLSIZ is returned by ILAENV and is equal to the maximum
*> size of the subproblems at the bottom of the computation
*> tree (usually about 25), and
*> NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
*> this value as the first entry of the WORK array, and no error
*> message related to LWORK is issued by XERBLA.
*> \endverbatim
*>
*> \param[out] IWORK
*> \verbatim
*> IWORK is INTEGER array, dimension (MAX(1,LIWORK))
*> LIWORK >= max(1, 3 * MINMN * NLVL + 11 * MINMN),
*> where MINMN = MIN( M,N ).
*> On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value.
*> > 0: the algorithm for computing the SVD failed to converge;
*> if INFO = i, i off-diagonal elements of an intermediate
*> bidiagonal form did not converge to zero.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEsolve
*
*> \par Contributors:
* ==================
*>
*> Ming Gu and Ren-Cang Li, Computer Science Division, University of
*> California at Berkeley, USA \n
*> Osni Marques, LBNL/NERSC, USA \n
*
* =====================================================================
SUBROUTINE DGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
$ WORK, LWORK, IWORK, INFO )
*
* -- LAPACK driver routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
DOUBLE PRECISION RCOND
* ..
* .. Array Arguments ..
INTEGER IWORK( * )
DOUBLE PRECISION A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO, ONE, TWO
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 )
* ..
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER IASCL, IBSCL, IE, IL, ITAU, ITAUP, ITAUQ,
$ LDWORK, LIWORK, MAXMN, MAXWRK, MINMN, MINWRK,
$ MM, MNTHR, NLVL, NWORK, SMLSIZ, WLALSD
DOUBLE PRECISION ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM
* ..
* .. External Subroutines ..
EXTERNAL DGEBRD, DGELQF, DGEQRF, DLABAD, DLACPY, DLALSD,
$ DLASCL, DLASET, DORMBR, DORMLQ, DORMQR, XERBLA
* ..
* .. External Functions ..
INTEGER ILAENV
DOUBLE PRECISION DLAMCH, DLANGE
EXTERNAL ILAENV, DLAMCH, DLANGE
* ..
* .. Intrinsic Functions ..
INTRINSIC DBLE, INT, LOG, MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input arguments.
*
INFO = 0
MINMN = MIN( M, N )
MAXMN = MAX( M, N )
MNTHR = ILAENV( 6, 'DGELSD', ' ', M, N, NRHS, -1 )
LQUERY = ( LWORK.EQ.-1 )
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( NRHS.LT.0 ) THEN
INFO = -3
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -5
ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN
INFO = -7
END IF
*
SMLSIZ = ILAENV( 9, 'DGELSD', ' ', 0, 0, 0, 0 )
*
* Compute workspace.
* (Note: Comments in the code beginning "Workspace:" describe the
* minimal amount of workspace needed at that point in the code,
* as well as the preferred amount for good performance.
* NB refers to the optimal block size for the immediately
* following subroutine, as returned by ILAENV.)
*
MINWRK = 1
LIWORK = 1
MINMN = MAX( 1, MINMN )
NLVL = MAX( INT( LOG( DBLE( MINMN ) / DBLE( SMLSIZ+1 ) ) /
$ LOG( TWO ) ) + 1, 0 )
*
IF( INFO.EQ.0 ) THEN
MAXWRK = 0
LIWORK = 3*MINMN*NLVL + 11*MINMN
MM = M
IF( M.GE.N .AND. M.GE.MNTHR ) THEN
*
* Path 1a - overdetermined, with many more rows than columns.
*
MM = N
MAXWRK = MAX( MAXWRK, N+N*ILAENV( 1, 'DGEQRF', ' ', M, N,
$ -1, -1 ) )
MAXWRK = MAX( MAXWRK, N+NRHS*
$ ILAENV( 1, 'DORMQR', 'LT', M, NRHS, N, -1 ) )
END IF
IF( M.GE.N ) THEN
*
* Path 1 - overdetermined or exactly determined.
*
MAXWRK = MAX( MAXWRK, 3*N+( MM+N )*
$ ILAENV( 1, 'DGEBRD', ' ', MM, N, -1, -1 ) )
MAXWRK = MAX( MAXWRK, 3*N+NRHS*
$ ILAENV( 1, 'DORMBR', 'QLT', MM, NRHS, N, -1 ) )
MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
$ ILAENV( 1, 'DORMBR', 'PLN', N, NRHS, N, -1 ) )
WLALSD = 9*N+2*N*SMLSIZ+8*N*NLVL+N*NRHS+(SMLSIZ+1)**2
MAXWRK = MAX( MAXWRK, 3*N+WLALSD )
MINWRK = MAX( 3*N+MM, 3*N+NRHS, 3*N+WLALSD )
END IF
IF( N.GT.M ) THEN
WLALSD = 9*M+2*M*SMLSIZ+8*M*NLVL+M*NRHS+(SMLSIZ+1)**2
IF( N.GE.MNTHR ) THEN
*
* Path 2a - underdetermined, with many more columns
* than rows.
*
MAXWRK = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
MAXWRK = MAX( MAXWRK, M*M+4*M+2*M*
$ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
MAXWRK = MAX( MAXWRK, M*M+4*M+NRHS*
$ ILAENV( 1, 'DORMBR', 'QLT', M, NRHS, M, -1 ) )
MAXWRK = MAX( MAXWRK, M*M+4*M+( M-1 )*
$ ILAENV( 1, 'DORMBR', 'PLN', M, NRHS, M, -1 ) )
IF( NRHS.GT.1 ) THEN
MAXWRK = MAX( MAXWRK, M*M+M+M*NRHS )
ELSE
MAXWRK = MAX( MAXWRK, M*M+2*M )
END IF
MAXWRK = MAX( MAXWRK, M+NRHS*
$ ILAENV( 1, 'DORMLQ', 'LT', N, NRHS, M, -1 ) )
MAXWRK = MAX( MAXWRK, M*M+4*M+WLALSD )
! XXX: Ensure the Path 2a case below is triggered. The workspace
! calculation should use queries for all routines eventually.
MAXWRK = MAX( MAXWRK,
$ 4*M+M*M+MAX( M, 2*M-4, NRHS, N-3*M ) )
ELSE
*
* Path 2 - remaining underdetermined cases.
*
MAXWRK = 3*M + ( N+M )*ILAENV( 1, 'DGEBRD', ' ', M, N,
$ -1, -1 )
MAXWRK = MAX( MAXWRK, 3*M+NRHS*
$ ILAENV( 1, 'DORMBR', 'QLT', M, NRHS, N, -1 ) )
MAXWRK = MAX( MAXWRK, 3*M+M*
$ ILAENV( 1, 'DORMBR', 'PLN', N, NRHS, M, -1 ) )
MAXWRK = MAX( MAXWRK, 3*M+WLALSD )
END IF
MINWRK = MAX( 3*M+NRHS, 3*M+M, 3*M+WLALSD )
END IF
MINWRK = MIN( MINWRK, MAXWRK )
WORK( 1 ) = MAXWRK
IWORK( 1 ) = LIWORK
IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
INFO = -12
END IF
END IF
*
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGELSD', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
GO TO 10
END IF
*
* Quick return if possible.
*
IF( M.EQ.0 .OR. N.EQ.0 ) THEN
RANK = 0
RETURN
END IF
*
* Get machine parameters.
*
EPS = DLAMCH( 'P' )
SFMIN = DLAMCH( 'S' )
SMLNUM = SFMIN / EPS
BIGNUM = ONE / SMLNUM
CALL DLABAD( SMLNUM, BIGNUM )
*
* Scale A if max entry outside range [SMLNUM,BIGNUM].
*
ANRM = DLANGE( 'M', M, N, A, LDA, WORK )
IASCL = 0
IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
*
* Scale matrix norm up to SMLNUM.
*
CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
IASCL = 1
ELSE IF( ANRM.GT.BIGNUM ) THEN
*
* Scale matrix norm down to BIGNUM.
*
CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
IASCL = 2
ELSE IF( ANRM.EQ.ZERO ) THEN
*
* Matrix all zero. Return zero solution.
*
CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 )
RANK = 0
GO TO 10
END IF
*
* Scale B if max entry outside range [SMLNUM,BIGNUM].
*
BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK )
IBSCL = 0
IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
*
* Scale matrix norm up to SMLNUM.
*
CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
IBSCL = 1
ELSE IF( BNRM.GT.BIGNUM ) THEN
*
* Scale matrix norm down to BIGNUM.
*
CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
IBSCL = 2
END IF
*
* If M < N make sure certain entries of B are zero.
*
IF( M.LT.N )
$ CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
*
* Overdetermined case.
*
IF( M.GE.N ) THEN
*
* Path 1 - overdetermined or exactly determined.
*
MM = M
IF( M.GE.MNTHR ) THEN
*
* Path 1a - overdetermined, with many more rows than columns.
*
MM = N
ITAU = 1
NWORK = ITAU + N
*
* Compute A=Q*R.
* (Workspace: need 2*N, prefer N+N*NB)
*
CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
$ LWORK-NWORK+1, INFO )
*
* Multiply B by transpose(Q).
* (Workspace: need N+NRHS, prefer N+NRHS*NB)
*
CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B,
$ LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
*
* Zero out below R.
*
IF( N.GT.1 ) THEN
CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
END IF
END IF
*
IE = 1
ITAUQ = IE + N
ITAUP = ITAUQ + N
NWORK = ITAUP + N
*
* Bidiagonalize R in A.
* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
*
CALL DGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
$ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
$ INFO )
*
* Multiply B by transpose of left bidiagonalizing vectors of R.
* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
*
CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ),
$ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
*
* Solve the bidiagonal least squares problem.
*
CALL DLALSD( 'U', SMLSIZ, N, NRHS, S, WORK( IE ), B, LDB,
$ RCOND, RANK, WORK( NWORK ), IWORK, INFO )
IF( INFO.NE.0 ) THEN
GO TO 10
END IF
*
* Multiply B by right bidiagonalizing vectors of R.
*
CALL DORMBR( 'P', 'L', 'N', N, NRHS, N, A, LDA, WORK( ITAUP ),
$ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
*
ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+
$ MAX( M, 2*M-4, NRHS, N-3*M, WLALSD ) ) THEN
*
* Path 2a - underdetermined, with many more columns than rows
* and sufficient workspace for an efficient algorithm.
*
LDWORK = M
IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ),
$ M*LDA+M+M*NRHS, 4*M+M*LDA+WLALSD ) )LDWORK = LDA
ITAU = 1
NWORK = M + 1
*
* Compute A=L*Q.
* (Workspace: need 2*M, prefer M+M*NB)
*
CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
$ LWORK-NWORK+1, INFO )
IL = NWORK
*
* Copy L to WORK(IL), zeroing out above its diagonal.
*
CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK )
CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ),
$ LDWORK )
IE = IL + LDWORK*M
ITAUQ = IE + M
ITAUP = ITAUQ + M
NWORK = ITAUP + M
*
* Bidiagonalize L in WORK(IL).
* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
*
CALL DGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ),
$ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
$ LWORK-NWORK+1, INFO )
*
* Multiply B by transpose of left bidiagonalizing vectors of L.
* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
*
CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK,
$ WORK( ITAUQ ), B, LDB, WORK( NWORK ),
$ LWORK-NWORK+1, INFO )
*
* Solve the bidiagonal least squares problem.
*
CALL DLALSD( 'U', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB,
$ RCOND, RANK, WORK( NWORK ), IWORK, INFO )
IF( INFO.NE.0 ) THEN
GO TO 10
END IF
*
* Multiply B by right bidiagonalizing vectors of L.
*
CALL DORMBR( 'P', 'L', 'N', M, NRHS, M, WORK( IL ), LDWORK,
$ WORK( ITAUP ), B, LDB, WORK( NWORK ),
$ LWORK-NWORK+1, INFO )
*
* Zero out below first M rows of B.
*
CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
NWORK = ITAU + M
*
* Multiply transpose(Q) by B.
* (Workspace: need M+NRHS, prefer M+NRHS*NB)
*
CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B,
$ LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
*
ELSE
*
* Path 2 - remaining underdetermined cases.
*
IE = 1
ITAUQ = IE + M
ITAUP = ITAUQ + M
NWORK = ITAUP + M
*
* Bidiagonalize A.
* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
*
CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
$ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
$ INFO )
*
* Multiply B by transpose of left bidiagonalizing vectors.
* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
*
CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ),
$ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
*
* Solve the bidiagonal least squares problem.
*
CALL DLALSD( 'L', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB,
$ RCOND, RANK, WORK( NWORK ), IWORK, INFO )
IF( INFO.NE.0 ) THEN
GO TO 10
END IF
*
* Multiply B by right bidiagonalizing vectors of A.
*
CALL DORMBR( 'P', 'L', 'N', N, NRHS, M, A, LDA, WORK( ITAUP ),
$ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
*
END IF
*
* Undo scaling.
*
IF( IASCL.EQ.1 ) THEN
CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
$ INFO )
ELSE IF( IASCL.EQ.2 ) THEN
CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
$ INFO )
END IF
IF( IBSCL.EQ.1 ) THEN
CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
ELSE IF( IBSCL.EQ.2 ) THEN
CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
END IF
*
10 CONTINUE
WORK( 1 ) = MAXWRK
IWORK( 1 ) = LIWORK
RETURN
*
* End of DGELSD
*
END

466
lib/linalg/dgelss.cpp Normal file
View File

@ -0,0 +1,466 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__6 = 6;
static integer c_n1 = -1;
static integer c__0 = 0;
static doublereal c_b46 = 0.;
static integer c__1 = 1;
static doublereal c_b79 = 1.;
int dgelss_(integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b,
integer *ldb, doublereal *s, doublereal *rcond, integer *rank, doublereal *work,
integer *lwork, integer *info)
{
integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
doublereal d__1;
integer i__, bl, ie, il, mm;
doublereal dum[1], eps, thr, anrm, bnrm;
integer itau, lwork_dgebrd__, lwork_dgelqf__, lwork_dgeqrf__, lwork_dorgbr__, lwork_dormbr__,
lwork_dormlq__, lwork_dormqr__;
extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
ftnlen, ftnlen);
integer iascl, ibscl;
extern int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *,
doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen),
drscl_(integer *, doublereal *, doublereal *, integer *);
integer chunk;
doublereal sfmin;
integer minmn;
extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *);
integer maxmn, itaup, itauq, mnthr, iwork;
extern int dlabad_(doublereal *, doublereal *),
dgebrd_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
doublereal *, doublereal *, doublereal *, integer *, integer *);
extern doublereal dlamch_(char *, ftnlen),
dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *, ftnlen);
integer bdspac;
extern int dgelqf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
integer *, integer *),
dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *,
doublereal *, integer *, integer *, ftnlen),
dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
integer *, integer *),
dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *,
ftnlen),
dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *,
ftnlen),
xerbla_(char *, integer *, ftnlen),
dbdsqr_(char *, integer *, integer *, integer *, integer *, doublereal *, doublereal *,
doublereal *, integer *, doublereal *, integer *, doublereal *, integer *,
doublereal *, integer *, ftnlen),
dorgbr_(char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *,
doublereal *, integer *, integer *, ftnlen);
doublereal bignum;
extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
ftnlen, ftnlen);
extern int dormbr_(char *, char *, char *, integer *, integer *, integer *, doublereal *,
integer *, doublereal *, doublereal *, integer *, doublereal *, integer *,
integer *, ftnlen, ftnlen, ftnlen),
dormlq_(char *, char *, integer *, integer *, integer *, doublereal *, integer *,
doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, ftnlen,
ftnlen);
integer ldwork;
extern int dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *,
doublereal *, doublereal *, integer *, doublereal *, integer *, integer *,
ftnlen, ftnlen);
integer minwrk, maxwrk;
doublereal smlnum;
logical lquery;
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
b_dim1 = *ldb;
b_offset = 1 + b_dim1;
b -= b_offset;
--s;
--work;
*info = 0;
minmn = min(*m, *n);
maxmn = max(*m, *n);
lquery = *lwork == -1;
if (*m < 0) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*nrhs < 0) {
*info = -3;
} else if (*lda < max(1, *m)) {
*info = -5;
} else if (*ldb < max(1, maxmn)) {
*info = -7;
}
if (*info == 0) {
minwrk = 1;
maxwrk = 1;
if (minmn > 0) {
mm = *m;
mnthr = ilaenv_(&c__6, (char *)"DGELSS", (char *)" ", m, n, nrhs, &c_n1, (ftnlen)6, (ftnlen)1);
if (*m >= *n && *m >= mnthr) {
dgeqrf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, info);
lwork_dgeqrf__ = (integer)dum[0];
dormqr_((char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, dum, &b[b_offset], ldb, dum, &c_n1,
info, (ftnlen)1, (ftnlen)1);
lwork_dormqr__ = (integer)dum[0];
mm = *n;
i__1 = maxwrk, i__2 = *n + lwork_dgeqrf__;
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *n + lwork_dormqr__;
maxwrk = max(i__1, i__2);
}
if (*m >= *n) {
i__1 = 1, i__2 = *n * 5;
bdspac = max(i__1, i__2);
dgebrd_(&mm, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1, info);
lwork_dgebrd__ = (integer)dum[0];
dormbr_((char *)"Q", (char *)"L", (char *)"T", &mm, nrhs, n, &a[a_offset], lda, dum, &b[b_offset], ldb, dum,
&c_n1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
lwork_dormbr__ = (integer)dum[0];
dorgbr_((char *)"P", n, n, n, &a[a_offset], lda, dum, dum, &c_n1, info, (ftnlen)1);
lwork_dorgbr__ = (integer)dum[0];
i__1 = maxwrk, i__2 = *n * 3 + lwork_dgebrd__;
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *n * 3 + lwork_dormbr__;
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *n * 3 + lwork_dorgbr__;
maxwrk = max(i__1, i__2);
maxwrk = max(maxwrk, bdspac);
i__1 = maxwrk, i__2 = *n * *nrhs;
maxwrk = max(i__1, i__2);
i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1, i__2);
minwrk = max(i__1, bdspac);
maxwrk = max(minwrk, maxwrk);
}
if (*n > *m) {
i__1 = 1, i__2 = *m * 5;
bdspac = max(i__1, i__2);
i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *n, i__1 = max(i__1, i__2);
minwrk = max(i__1, bdspac);
if (*n >= mnthr) {
dgelqf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, info);
lwork_dgelqf__ = (integer)dum[0];
dgebrd_(m, m, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1, info);
lwork_dgebrd__ = (integer)dum[0];
dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, dum, &b[b_offset], ldb,
dum, &c_n1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
lwork_dormbr__ = (integer)dum[0];
dorgbr_((char *)"P", m, m, m, &a[a_offset], lda, dum, dum, &c_n1, info, (ftnlen)1);
lwork_dorgbr__ = (integer)dum[0];
dormlq_((char *)"L", (char *)"T", n, nrhs, m, &a[a_offset], lda, dum, &b[b_offset], ldb, dum,
&c_n1, info, (ftnlen)1, (ftnlen)1);
lwork_dormlq__ = (integer)dum[0];
maxwrk = *m + lwork_dgelqf__;
i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + lwork_dgebrd__;
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + lwork_dormbr__;
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + lwork_dorgbr__;
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *m * *m + *m + bdspac;
maxwrk = max(i__1, i__2);
if (*nrhs > 1) {
i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
maxwrk = max(i__1, i__2);
} else {
i__1 = maxwrk, i__2 = *m * *m + (*m << 1);
maxwrk = max(i__1, i__2);
}
i__1 = maxwrk, i__2 = *m + lwork_dormlq__;
maxwrk = max(i__1, i__2);
} else {
dgebrd_(m, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1, info);
lwork_dgebrd__ = (integer)dum[0];
dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, m, &a[a_offset], lda, dum, &b[b_offset], ldb,
dum, &c_n1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
lwork_dormbr__ = (integer)dum[0];
dorgbr_((char *)"P", m, n, m, &a[a_offset], lda, dum, dum, &c_n1, info, (ftnlen)1);
lwork_dorgbr__ = (integer)dum[0];
maxwrk = *m * 3 + lwork_dgebrd__;
i__1 = maxwrk, i__2 = *m * 3 + lwork_dormbr__;
maxwrk = max(i__1, i__2);
i__1 = maxwrk, i__2 = *m * 3 + lwork_dorgbr__;
maxwrk = max(i__1, i__2);
maxwrk = max(maxwrk, bdspac);
i__1 = maxwrk, i__2 = *n * *nrhs;
maxwrk = max(i__1, i__2);
}
}
maxwrk = max(minwrk, maxwrk);
}
work[1] = (doublereal)maxwrk;
if (*lwork < minwrk && !lquery) {
*info = -12;
}
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGELSS", &i__1, (ftnlen)6);
return 0;
} else if (lquery) {
return 0;
}
if (*m == 0 || *n == 0) {
*rank = 0;
return 0;
}
eps = dlamch_((char *)"P", (ftnlen)1);
sfmin = dlamch_((char *)"S", (ftnlen)1);
smlnum = sfmin / eps;
bignum = 1. / smlnum;
dlabad_(&smlnum, &bignum);
anrm = dlange_((char *)"M", m, n, &a[a_offset], lda, &work[1], (ftnlen)1);
iascl = 0;
if (anrm > 0. && anrm < smlnum) {
dlascl_((char *)"G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, info, (ftnlen)1);
iascl = 1;
} else if (anrm > bignum) {
dlascl_((char *)"G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, info, (ftnlen)1);
iascl = 2;
} else if (anrm == 0.) {
i__1 = max(*m, *n);
dlaset_((char *)"F", &i__1, nrhs, &c_b46, &c_b46, &b[b_offset], ldb, (ftnlen)1);
dlaset_((char *)"F", &minmn, &c__1, &c_b46, &c_b46, &s[1], &minmn, (ftnlen)1);
*rank = 0;
goto L70;
}
bnrm = dlange_((char *)"M", m, nrhs, &b[b_offset], ldb, &work[1], (ftnlen)1);
ibscl = 0;
if (bnrm > 0. && bnrm < smlnum) {
dlascl_((char *)"G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
ibscl = 1;
} else if (bnrm > bignum) {
dlascl_((char *)"G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
ibscl = 2;
}
if (*m >= *n) {
mm = *m;
if (*m >= mnthr) {
mm = *n;
itau = 1;
iwork = itau + *n;
i__1 = *lwork - iwork + 1;
dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__1, info);
i__1 = *lwork - iwork + 1;
dormqr_((char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[b_offset], ldb,
&work[iwork], &i__1, info, (ftnlen)1, (ftnlen)1);
if (*n > 1) {
i__1 = *n - 1;
i__2 = *n - 1;
dlaset_((char *)"L", &i__1, &i__2, &c_b46, &c_b46, &a[a_dim1 + 2], lda, (ftnlen)1);
}
}
ie = 1;
itauq = ie + *n;
itaup = itauq + *n;
iwork = itaup + *n;
i__1 = *lwork - iwork + 1;
dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup],
&work[iwork], &i__1, info);
i__1 = *lwork - iwork + 1;
dormbr_((char *)"Q", (char *)"L", (char *)"T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], &b[b_offset], ldb,
&work[iwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
i__1 = *lwork - iwork + 1;
dorgbr_((char *)"P", n, n, n, &a[a_offset], lda, &work[itaup], &work[iwork], &i__1, info,
(ftnlen)1);
iwork = ie + *n;
dbdsqr_((char *)"U", n, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], lda, dum, &c__1,
&b[b_offset], ldb, &work[iwork], info, (ftnlen)1);
if (*info != 0) {
goto L70;
}
d__1 = *rcond * s[1];
thr = max(d__1, sfmin);
if (*rcond < 0.) {
d__1 = eps * s[1];
thr = max(d__1, sfmin);
}
*rank = 0;
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
if (s[i__] > thr) {
drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb);
++(*rank);
} else {
dlaset_((char *)"F", &c__1, nrhs, &c_b46, &c_b46, &b[i__ + b_dim1], ldb, (ftnlen)1);
}
}
if (*lwork >= *ldb * *nrhs && *nrhs > 1) {
dgemm_((char *)"T", (char *)"N", n, nrhs, n, &c_b79, &a[a_offset], lda, &b[b_offset], ldb, &c_b46,
&work[1], ldb, (ftnlen)1, (ftnlen)1);
dlacpy_((char *)"G", n, nrhs, &work[1], ldb, &b[b_offset], ldb, (ftnlen)1);
} else if (*nrhs > 1) {
chunk = *lwork / *n;
i__1 = *nrhs;
i__2 = chunk;
for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
i__3 = *nrhs - i__ + 1;
bl = min(i__3, chunk);
dgemm_((char *)"T", (char *)"N", n, &bl, n, &c_b79, &a[a_offset], lda, &b[i__ * b_dim1 + 1], ldb,
&c_b46, &work[1], n, (ftnlen)1, (ftnlen)1);
dlacpy_((char *)"G", n, &bl, &work[1], n, &b[i__ * b_dim1 + 1], ldb, (ftnlen)1);
}
} else {
dgemv_((char *)"T", n, n, &c_b79, &a[a_offset], lda, &b[b_offset], &c__1, &c_b46, &work[1],
&c__1, (ftnlen)1);
dcopy_(n, &work[1], &c__1, &b[b_offset], &c__1);
}
} else {
i__2 = *m, i__1 = (*m << 1) - 4, i__2 = max(i__2, i__1), i__2 = max(i__2, *nrhs),
i__1 = *n - *m * 3;
if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__2, i__1)) {
ldwork = *m;
i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3, i__4), i__3 = max(i__3, *nrhs),
i__4 = *n - *m * 3;
i__2 = (*m << 2) + *m * *lda + max(i__3, i__4), i__1 = *m * *lda + *m + *m * *nrhs;
if (*lwork >= max(i__2, i__1)) {
ldwork = *lda;
}
itau = 1;
iwork = *m + 1;
i__2 = *lwork - iwork + 1;
dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__2, info);
il = iwork;
dlacpy_((char *)"L", m, m, &a[a_offset], lda, &work[il], &ldwork, (ftnlen)1);
i__2 = *m - 1;
i__1 = *m - 1;
dlaset_((char *)"U", &i__2, &i__1, &c_b46, &c_b46, &work[il + ldwork], &ldwork, (ftnlen)1);
ie = il + ldwork * *m;
itauq = ie + *m;
itaup = itauq + *m;
iwork = itaup + *m;
i__2 = *lwork - iwork + 1;
dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], &work[itaup],
&work[iwork], &i__2, info);
i__2 = *lwork - iwork + 1;
dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, m, &work[il], &ldwork, &work[itauq], &b[b_offset], ldb,
&work[iwork], &i__2, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
i__2 = *lwork - iwork + 1;
dorgbr_((char *)"P", m, m, m, &work[il], &ldwork, &work[itaup], &work[iwork], &i__2, info,
(ftnlen)1);
iwork = ie + *m;
dbdsqr_((char *)"U", m, m, &c__0, nrhs, &s[1], &work[ie], &work[il], &ldwork, &a[a_offset], lda,
&b[b_offset], ldb, &work[iwork], info, (ftnlen)1);
if (*info != 0) {
goto L70;
}
d__1 = *rcond * s[1];
thr = max(d__1, sfmin);
if (*rcond < 0.) {
d__1 = eps * s[1];
thr = max(d__1, sfmin);
}
*rank = 0;
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
if (s[i__] > thr) {
drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb);
++(*rank);
} else {
dlaset_((char *)"F", &c__1, nrhs, &c_b46, &c_b46, &b[i__ + b_dim1], ldb, (ftnlen)1);
}
}
iwork = ie;
if (*lwork >= *ldb * *nrhs + iwork - 1 && *nrhs > 1) {
dgemm_((char *)"T", (char *)"N", m, nrhs, m, &c_b79, &work[il], &ldwork, &b[b_offset], ldb, &c_b46,
&work[iwork], ldb, (ftnlen)1, (ftnlen)1);
dlacpy_((char *)"G", m, nrhs, &work[iwork], ldb, &b[b_offset], ldb, (ftnlen)1);
} else if (*nrhs > 1) {
chunk = (*lwork - iwork + 1) / *m;
i__2 = *nrhs;
i__1 = chunk;
for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
i__3 = *nrhs - i__ + 1;
bl = min(i__3, chunk);
dgemm_((char *)"T", (char *)"N", m, &bl, m, &c_b79, &work[il], &ldwork, &b[i__ * b_dim1 + 1],
ldb, &c_b46, &work[iwork], m, (ftnlen)1, (ftnlen)1);
dlacpy_((char *)"G", m, &bl, &work[iwork], m, &b[i__ * b_dim1 + 1], ldb, (ftnlen)1);
}
} else {
dgemv_((char *)"T", m, m, &c_b79, &work[il], &ldwork, &b[b_dim1 + 1], &c__1, &c_b46,
&work[iwork], &c__1, (ftnlen)1);
dcopy_(m, &work[iwork], &c__1, &b[b_dim1 + 1], &c__1);
}
i__1 = *n - *m;
dlaset_((char *)"F", &i__1, nrhs, &c_b46, &c_b46, &b[*m + 1 + b_dim1], ldb, (ftnlen)1);
iwork = itau + *m;
i__1 = *lwork - iwork + 1;
dormlq_((char *)"L", (char *)"T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[b_offset], ldb,
&work[iwork], &i__1, info, (ftnlen)1, (ftnlen)1);
} else {
ie = 1;
itauq = ie + *m;
itaup = itauq + *m;
iwork = itaup + *m;
i__1 = *lwork - iwork + 1;
dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup],
&work[iwork], &i__1, info);
i__1 = *lwork - iwork + 1;
dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, &work[itauq], &b[b_offset], ldb,
&work[iwork], &i__1, info, (ftnlen)1, (ftnlen)1, (ftnlen)1);
i__1 = *lwork - iwork + 1;
dorgbr_((char *)"P", m, n, m, &a[a_offset], lda, &work[itaup], &work[iwork], &i__1, info,
(ftnlen)1);
iwork = ie + *m;
dbdsqr_((char *)"L", m, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], lda, dum, &c__1,
&b[b_offset], ldb, &work[iwork], info, (ftnlen)1);
if (*info != 0) {
goto L70;
}
d__1 = *rcond * s[1];
thr = max(d__1, sfmin);
if (*rcond < 0.) {
d__1 = eps * s[1];
thr = max(d__1, sfmin);
}
*rank = 0;
i__1 = *m;
for (i__ = 1; i__ <= i__1; ++i__) {
if (s[i__] > thr) {
drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb);
++(*rank);
} else {
dlaset_((char *)"F", &c__1, nrhs, &c_b46, &c_b46, &b[i__ + b_dim1], ldb, (ftnlen)1);
}
}
if (*lwork >= *ldb * *nrhs && *nrhs > 1) {
dgemm_((char *)"T", (char *)"N", n, nrhs, m, &c_b79, &a[a_offset], lda, &b[b_offset], ldb, &c_b46,
&work[1], ldb, (ftnlen)1, (ftnlen)1);
dlacpy_((char *)"F", n, nrhs, &work[1], ldb, &b[b_offset], ldb, (ftnlen)1);
} else if (*nrhs > 1) {
chunk = *lwork / *n;
i__1 = *nrhs;
i__2 = chunk;
for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
i__3 = *nrhs - i__ + 1;
bl = min(i__3, chunk);
dgemm_((char *)"T", (char *)"N", n, &bl, m, &c_b79, &a[a_offset], lda, &b[i__ * b_dim1 + 1],
ldb, &c_b46, &work[1], n, (ftnlen)1, (ftnlen)1);
dlacpy_((char *)"F", n, &bl, &work[1], n, &b[i__ * b_dim1 + 1], ldb, (ftnlen)1);
}
} else {
dgemv_((char *)"T", m, n, &c_b79, &a[a_offset], lda, &b[b_offset], &c__1, &c_b46, &work[1],
&c__1, (ftnlen)1);
dcopy_(n, &work[1], &c__1, &b[b_offset], &c__1);
}
}
}
if (iascl == 1) {
dlascl_((char *)"G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
dlascl_((char *)"G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &minmn, info, (ftnlen)1);
} else if (iascl == 2) {
dlascl_((char *)"G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
dlascl_((char *)"G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &minmn, info, (ftnlen)1);
}
if (ibscl == 1) {
dlascl_((char *)"G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
} else if (ibscl == 2) {
dlascl_((char *)"G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, info, (ftnlen)1);
}
L70:
work[1] = (doublereal)maxwrk;
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,744 +0,0 @@
*> \brief <b> DGELSS solves overdetermined or underdetermined systems for GE matrices</b>
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGELSS + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelss.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelss.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelss.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
* WORK, LWORK, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
* DOUBLE PRECISION RCOND
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGELSS computes the minimum norm solution to a real linear least
*> squares problem:
*>
*> Minimize 2-norm(| b - A*x |).
*>
*> using the singular value decomposition (SVD) of A. A is an M-by-N
*> matrix which may be rank-deficient.
*>
*> Several right hand side vectors b and solution vectors x can be
*> handled in a single call; they are stored as the columns of the
*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
*> X.
*>
*> The effective rank of A is determined by treating as zero those
*> singular values which are less than RCOND times the largest singular
*> value.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in] NRHS
*> \verbatim
*> NRHS is INTEGER
*> The number of right hand sides, i.e., the number of columns
*> of the matrices B and X. NRHS >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the M-by-N matrix A.
*> On exit, the first min(m,n) rows of A are overwritten with
*> its right singular vectors, stored rowwise.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[in,out] B
*> \verbatim
*> B is DOUBLE PRECISION array, dimension (LDB,NRHS)
*> On entry, the M-by-NRHS right hand side matrix B.
*> On exit, B is overwritten by the N-by-NRHS solution
*> matrix X. If m >= n and RANK = n, the residual
*> sum-of-squares for the solution in the i-th column is given
*> by the sum of squares of elements n+1:m in that column.
*> \endverbatim
*>
*> \param[in] LDB
*> \verbatim
*> LDB is INTEGER
*> The leading dimension of the array B. LDB >= max(1,max(M,N)).
*> \endverbatim
*>
*> \param[out] S
*> \verbatim
*> S is DOUBLE PRECISION array, dimension (min(M,N))
*> The singular values of A in decreasing order.
*> The condition number of A in the 2-norm = S(1)/S(min(m,n)).
*> \endverbatim
*>
*> \param[in] RCOND
*> \verbatim
*> RCOND is DOUBLE PRECISION
*> RCOND is used to determine the effective rank of A.
*> Singular values S(i) <= RCOND*S(1) are treated as zero.
*> If RCOND < 0, machine precision is used instead.
*> \endverbatim
*>
*> \param[out] RANK
*> \verbatim
*> RANK is INTEGER
*> The effective rank of A, i.e., the number of singular values
*> which are greater than RCOND*S(1).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> \verbatim
*> LWORK is INTEGER
*> The dimension of the array WORK. LWORK >= 1, and also:
*> LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS )
*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
*> this value as the first entry of the WORK array, and no error
*> message related to LWORK is issued by XERBLA.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value.
*> > 0: the algorithm for computing the SVD failed to converge;
*> if INFO = i, i off-diagonal elements of an intermediate
*> bidiagonal form did not converge to zero.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEsolve
*
* =====================================================================
SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
$ WORK, LWORK, INFO )
*
* -- LAPACK driver routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
DOUBLE PRECISION RCOND
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER BDSPAC, BL, CHUNK, I, IASCL, IBSCL, IE, IL,
$ ITAU, ITAUP, ITAUQ, IWORK, LDWORK, MAXMN,
$ MAXWRK, MINMN, MINWRK, MM, MNTHR
INTEGER LWORK_DGEQRF, LWORK_DORMQR, LWORK_DGEBRD,
$ LWORK_DORMBR, LWORK_DORGBR, LWORK_DORMLQ,
$ LWORK_DGELQF
DOUBLE PRECISION ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM, THR
* ..
* .. Local Arrays ..
DOUBLE PRECISION DUM( 1 )
* ..
* .. External Subroutines ..
EXTERNAL DBDSQR, DCOPY, DGEBRD, DGELQF, DGEMM, DGEMV,
$ DGEQRF, DLABAD, DLACPY, DLASCL, DLASET, DORGBR,
$ DORMBR, DORMLQ, DORMQR, DRSCL, XERBLA
* ..
* .. External Functions ..
INTEGER ILAENV
DOUBLE PRECISION DLAMCH, DLANGE
EXTERNAL ILAENV, DLAMCH, DLANGE
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input arguments
*
INFO = 0
MINMN = MIN( M, N )
MAXMN = MAX( M, N )
LQUERY = ( LWORK.EQ.-1 )
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( NRHS.LT.0 ) THEN
INFO = -3
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -5
ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN
INFO = -7
END IF
*
* Compute workspace
* (Note: Comments in the code beginning "Workspace:" describe the
* minimal amount of workspace needed at that point in the code,
* as well as the preferred amount for good performance.
* NB refers to the optimal block size for the immediately
* following subroutine, as returned by ILAENV.)
*
IF( INFO.EQ.0 ) THEN
MINWRK = 1
MAXWRK = 1
IF( MINMN.GT.0 ) THEN
MM = M
MNTHR = ILAENV( 6, 'DGELSS', ' ', M, N, NRHS, -1 )
IF( M.GE.N .AND. M.GE.MNTHR ) THEN
*
* Path 1a - overdetermined, with many more rows than
* columns
*
* Compute space needed for DGEQRF
CALL DGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, INFO )
LWORK_DGEQRF = INT( DUM(1) )
* Compute space needed for DORMQR
CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, DUM(1), B,
$ LDB, DUM(1), -1, INFO )
LWORK_DORMQR = INT( DUM(1) )
MM = N
MAXWRK = MAX( MAXWRK, N + LWORK_DGEQRF )
MAXWRK = MAX( MAXWRK, N + LWORK_DORMQR )
END IF
IF( M.GE.N ) THEN
*
* Path 1 - overdetermined or exactly determined
*
* Compute workspace needed for DBDSQR
*
BDSPAC = MAX( 1, 5*N )
* Compute space needed for DGEBRD
CALL DGEBRD( MM, N, A, LDA, S, DUM(1), DUM(1),
$ DUM(1), DUM(1), -1, INFO )
LWORK_DGEBRD = INT( DUM(1) )
* Compute space needed for DORMBR
CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, DUM(1),
$ B, LDB, DUM(1), -1, INFO )
LWORK_DORMBR = INT( DUM(1) )
* Compute space needed for DORGBR
CALL DORGBR( 'P', N, N, N, A, LDA, DUM(1),
$ DUM(1), -1, INFO )
LWORK_DORGBR = INT( DUM(1) )
* Compute total workspace needed
MAXWRK = MAX( MAXWRK, 3*N + LWORK_DGEBRD )
MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORMBR )
MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR )
MAXWRK = MAX( MAXWRK, BDSPAC )
MAXWRK = MAX( MAXWRK, N*NRHS )
MINWRK = MAX( 3*N + MM, 3*N + NRHS, BDSPAC )
MAXWRK = MAX( MINWRK, MAXWRK )
END IF
IF( N.GT.M ) THEN
*
* Compute workspace needed for DBDSQR
*
BDSPAC = MAX( 1, 5*M )
MINWRK = MAX( 3*M+NRHS, 3*M+N, BDSPAC )
IF( N.GE.MNTHR ) THEN
*
* Path 2a - underdetermined, with many more columns
* than rows
*
* Compute space needed for DGELQF
CALL DGELQF( M, N, A, LDA, DUM(1), DUM(1),
$ -1, INFO )
LWORK_DGELQF = INT( DUM(1) )
* Compute space needed for DGEBRD
CALL DGEBRD( M, M, A, LDA, S, DUM(1), DUM(1),
$ DUM(1), DUM(1), -1, INFO )
LWORK_DGEBRD = INT( DUM(1) )
* Compute space needed for DORMBR
CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA,
$ DUM(1), B, LDB, DUM(1), -1, INFO )
LWORK_DORMBR = INT( DUM(1) )
* Compute space needed for DORGBR
CALL DORGBR( 'P', M, M, M, A, LDA, DUM(1),
$ DUM(1), -1, INFO )
LWORK_DORGBR = INT( DUM(1) )
* Compute space needed for DORMLQ
CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, DUM(1),
$ B, LDB, DUM(1), -1, INFO )
LWORK_DORMLQ = INT( DUM(1) )
* Compute total workspace needed
MAXWRK = M + LWORK_DGELQF
MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_DGEBRD )
MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_DORMBR )
MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_DORGBR )
MAXWRK = MAX( MAXWRK, M*M + M + BDSPAC )
IF( NRHS.GT.1 ) THEN
MAXWRK = MAX( MAXWRK, M*M + M + M*NRHS )
ELSE
MAXWRK = MAX( MAXWRK, M*M + 2*M )
END IF
MAXWRK = MAX( MAXWRK, M + LWORK_DORMLQ )
ELSE
*
* Path 2 - underdetermined
*
* Compute space needed for DGEBRD
CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
$ DUM(1), DUM(1), -1, INFO )
LWORK_DGEBRD = INT( DUM(1) )
* Compute space needed for DORMBR
CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, A, LDA,
$ DUM(1), B, LDB, DUM(1), -1, INFO )
LWORK_DORMBR = INT( DUM(1) )
* Compute space needed for DORGBR
CALL DORGBR( 'P', M, N, M, A, LDA, DUM(1),
$ DUM(1), -1, INFO )
LWORK_DORGBR = INT( DUM(1) )
MAXWRK = 3*M + LWORK_DGEBRD
MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORMBR )
MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR )
MAXWRK = MAX( MAXWRK, BDSPAC )
MAXWRK = MAX( MAXWRK, N*NRHS )
END IF
END IF
MAXWRK = MAX( MINWRK, MAXWRK )
END IF
WORK( 1 ) = MAXWRK
*
IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY )
$ INFO = -12
END IF
*
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGELSS', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
RETURN
END IF
*
* Quick return if possible
*
IF( M.EQ.0 .OR. N.EQ.0 ) THEN
RANK = 0
RETURN
END IF
*
* Get machine parameters
*
EPS = DLAMCH( 'P' )
SFMIN = DLAMCH( 'S' )
SMLNUM = SFMIN / EPS
BIGNUM = ONE / SMLNUM
CALL DLABAD( SMLNUM, BIGNUM )
*
* Scale A if max element outside range [SMLNUM,BIGNUM]
*
ANRM = DLANGE( 'M', M, N, A, LDA, WORK )
IASCL = 0
IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
*
* Scale matrix norm up to SMLNUM
*
CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
IASCL = 1
ELSE IF( ANRM.GT.BIGNUM ) THEN
*
* Scale matrix norm down to BIGNUM
*
CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
IASCL = 2
ELSE IF( ANRM.EQ.ZERO ) THEN
*
* Matrix all zero. Return zero solution.
*
CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, MINMN )
RANK = 0
GO TO 70
END IF
*
* Scale B if max element outside range [SMLNUM,BIGNUM]
*
BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK )
IBSCL = 0
IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
*
* Scale matrix norm up to SMLNUM
*
CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
IBSCL = 1
ELSE IF( BNRM.GT.BIGNUM ) THEN
*
* Scale matrix norm down to BIGNUM
*
CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
IBSCL = 2
END IF
*
* Overdetermined case
*
IF( M.GE.N ) THEN
*
* Path 1 - overdetermined or exactly determined
*
MM = M
IF( M.GE.MNTHR ) THEN
*
* Path 1a - overdetermined, with many more rows than columns
*
MM = N
ITAU = 1
IWORK = ITAU + N
*
* Compute A=Q*R
* (Workspace: need 2*N, prefer N+N*NB)
*
CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
$ LWORK-IWORK+1, INFO )
*
* Multiply B by transpose(Q)
* (Workspace: need N+NRHS, prefer N+NRHS*NB)
*
CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B,
$ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
*
* Zero out below R
*
IF( N.GT.1 )
$ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
END IF
*
IE = 1
ITAUQ = IE + N
ITAUP = ITAUQ + N
IWORK = ITAUP + N
*
* Bidiagonalize R in A
* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
*
CALL DGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
$ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
$ INFO )
*
* Multiply B by transpose of left bidiagonalizing vectors of R
* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
*
CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ),
$ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
*
* Generate right bidiagonalizing vectors of R in A
* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
*
CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, INFO )
IWORK = IE + N
*
* Perform bidiagonal QR iteration
* multiply B by transpose of left singular vectors
* compute right singular vectors in A
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'U', N, N, 0, NRHS, S, WORK( IE ), A, LDA, DUM,
$ 1, B, LDB, WORK( IWORK ), INFO )
IF( INFO.NE.0 )
$ GO TO 70
*
* Multiply B by reciprocals of singular values
*
THR = MAX( RCOND*S( 1 ), SFMIN )
IF( RCOND.LT.ZERO )
$ THR = MAX( EPS*S( 1 ), SFMIN )
RANK = 0
DO 10 I = 1, N
IF( S( I ).GT.THR ) THEN
CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
RANK = RANK + 1
ELSE
CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
END IF
10 CONTINUE
*
* Multiply B by right singular vectors
* (Workspace: need N, prefer N*NRHS)
*
IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, A, LDA, B, LDB, ZERO,
$ WORK, LDB )
CALL DLACPY( 'G', N, NRHS, WORK, LDB, B, LDB )
ELSE IF( NRHS.GT.1 ) THEN
CHUNK = LWORK / N
DO 20 I = 1, NRHS, CHUNK
BL = MIN( NRHS-I+1, CHUNK )
CALL DGEMM( 'T', 'N', N, BL, N, ONE, A, LDA, B( 1, I ),
$ LDB, ZERO, WORK, N )
CALL DLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB )
20 CONTINUE
ELSE
CALL DGEMV( 'T', N, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
CALL DCOPY( N, WORK, 1, B, 1 )
END IF
*
ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+
$ MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN
*
* Path 2a - underdetermined, with many more columns than rows
* and sufficient workspace for an efficient algorithm
*
LDWORK = M
IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ),
$ M*LDA+M+M*NRHS ) )LDWORK = LDA
ITAU = 1
IWORK = M + 1
*
* Compute A=L*Q
* (Workspace: need 2*M, prefer M+M*NB)
*
CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
$ LWORK-IWORK+1, INFO )
IL = IWORK
*
* Copy L to WORK(IL), zeroing out above it
*
CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK )
CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ),
$ LDWORK )
IE = IL + LDWORK*M
ITAUQ = IE + M
ITAUP = ITAUQ + M
IWORK = ITAUP + M
*
* Bidiagonalize L in WORK(IL)
* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
*
CALL DGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ),
$ WORK( ITAUQ ), WORK( ITAUP ), WORK( IWORK ),
$ LWORK-IWORK+1, INFO )
*
* Multiply B by transpose of left bidiagonalizing vectors of L
* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
*
CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK,
$ WORK( ITAUQ ), B, LDB, WORK( IWORK ),
$ LWORK-IWORK+1, INFO )
*
* Generate right bidiagonalizing vectors of R in WORK(IL)
* (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB)
*
CALL DORGBR( 'P', M, M, M, WORK( IL ), LDWORK, WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, INFO )
IWORK = IE + M
*
* Perform bidiagonal QR iteration,
* computing right singular vectors of L in WORK(IL) and
* multiplying B by transpose of left singular vectors
* (Workspace: need M*M+M+BDSPAC)
*
CALL DBDSQR( 'U', M, M, 0, NRHS, S, WORK( IE ), WORK( IL ),
$ LDWORK, A, LDA, B, LDB, WORK( IWORK ), INFO )
IF( INFO.NE.0 )
$ GO TO 70
*
* Multiply B by reciprocals of singular values
*
THR = MAX( RCOND*S( 1 ), SFMIN )
IF( RCOND.LT.ZERO )
$ THR = MAX( EPS*S( 1 ), SFMIN )
RANK = 0
DO 30 I = 1, M
IF( S( I ).GT.THR ) THEN
CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
RANK = RANK + 1
ELSE
CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
END IF
30 CONTINUE
IWORK = IE
*
* Multiply B by right singular vectors of L in WORK(IL)
* (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS)
*
IF( LWORK.GE.LDB*NRHS+IWORK-1 .AND. NRHS.GT.1 ) THEN
CALL DGEMM( 'T', 'N', M, NRHS, M, ONE, WORK( IL ), LDWORK,
$ B, LDB, ZERO, WORK( IWORK ), LDB )
CALL DLACPY( 'G', M, NRHS, WORK( IWORK ), LDB, B, LDB )
ELSE IF( NRHS.GT.1 ) THEN
CHUNK = ( LWORK-IWORK+1 ) / M
DO 40 I = 1, NRHS, CHUNK
BL = MIN( NRHS-I+1, CHUNK )
CALL DGEMM( 'T', 'N', M, BL, M, ONE, WORK( IL ), LDWORK,
$ B( 1, I ), LDB, ZERO, WORK( IWORK ), M )
CALL DLACPY( 'G', M, BL, WORK( IWORK ), M, B( 1, I ),
$ LDB )
40 CONTINUE
ELSE
CALL DGEMV( 'T', M, M, ONE, WORK( IL ), LDWORK, B( 1, 1 ),
$ 1, ZERO, WORK( IWORK ), 1 )
CALL DCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 )
END IF
*
* Zero out below first M rows of B
*
CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
IWORK = ITAU + M
*
* Multiply transpose(Q) by B
* (Workspace: need M+NRHS, prefer M+NRHS*NB)
*
CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B,
$ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
*
ELSE
*
* Path 2 - remaining underdetermined cases
*
IE = 1
ITAUQ = IE + M
ITAUP = ITAUQ + M
IWORK = ITAUP + M
*
* Bidiagonalize A
* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
*
CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
$ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
$ INFO )
*
* Multiply B by transpose of left bidiagonalizing vectors
* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
*
CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ),
$ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
*
* Generate right bidiagonalizing vectors in A
* (Workspace: need 4*M, prefer 3*M+M*NB)
*
CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, INFO )
IWORK = IE + M
*
* Perform bidiagonal QR iteration,
* computing right singular vectors of A in A and
* multiplying B by transpose of left singular vectors
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'L', M, N, 0, NRHS, S, WORK( IE ), A, LDA, DUM,
$ 1, B, LDB, WORK( IWORK ), INFO )
IF( INFO.NE.0 )
$ GO TO 70
*
* Multiply B by reciprocals of singular values
*
THR = MAX( RCOND*S( 1 ), SFMIN )
IF( RCOND.LT.ZERO )
$ THR = MAX( EPS*S( 1 ), SFMIN )
RANK = 0
DO 50 I = 1, M
IF( S( I ).GT.THR ) THEN
CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
RANK = RANK + 1
ELSE
CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
END IF
50 CONTINUE
*
* Multiply B by right singular vectors of A
* (Workspace: need N, prefer N*NRHS)
*
IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
CALL DGEMM( 'T', 'N', N, NRHS, M, ONE, A, LDA, B, LDB, ZERO,
$ WORK, LDB )
CALL DLACPY( 'F', N, NRHS, WORK, LDB, B, LDB )
ELSE IF( NRHS.GT.1 ) THEN
CHUNK = LWORK / N
DO 60 I = 1, NRHS, CHUNK
BL = MIN( NRHS-I+1, CHUNK )
CALL DGEMM( 'T', 'N', N, BL, M, ONE, A, LDA, B( 1, I ),
$ LDB, ZERO, WORK, N )
CALL DLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB )
60 CONTINUE
ELSE
CALL DGEMV( 'T', M, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
CALL DCOPY( N, WORK, 1, B, 1 )
END IF
END IF
*
* Undo scaling
*
IF( IASCL.EQ.1 ) THEN
CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
$ INFO )
ELSE IF( IASCL.EQ.2 ) THEN
CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
$ INFO )
END IF
IF( IBSCL.EQ.1 ) THEN
CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
ELSE IF( IBSCL.EQ.2 ) THEN
CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
END IF
*
70 CONTINUE
WORK( 1 ) = MAXWRK
RETURN
*
* End of DGELSS
*
END

173
lib/linalg/dgemm.cpp Normal file
View File

@ -0,0 +1,173 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dgemm_(char *transa, char *transb, integer *m, integer *n, integer *k, doublereal *alpha,
doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *beta,
doublereal *c__, integer *ldc, ftnlen transa_len, ftnlen transb_len)
{
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3;
integer i__, j, l, info;
logical nota, notb;
doublereal temp;
extern logical lsame_(char *, char *, ftnlen, ftnlen);
integer nrowa, nrowb;
extern int xerbla_(char *, integer *, ftnlen);
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
b_dim1 = *ldb;
b_offset = 1 + b_dim1;
b -= b_offset;
c_dim1 = *ldc;
c_offset = 1 + c_dim1;
c__ -= c_offset;
nota = lsame_(transa, (char *)"N", (ftnlen)1, (ftnlen)1);
notb = lsame_(transb, (char *)"N", (ftnlen)1, (ftnlen)1);
if (nota) {
nrowa = *m;
} else {
nrowa = *k;
}
if (notb) {
nrowb = *k;
} else {
nrowb = *n;
}
info = 0;
if (!nota && !lsame_(transa, (char *)"C", (ftnlen)1, (ftnlen)1) &&
!lsame_(transa, (char *)"T", (ftnlen)1, (ftnlen)1)) {
info = 1;
} else if (!notb && !lsame_(transb, (char *)"C", (ftnlen)1, (ftnlen)1) &&
!lsame_(transb, (char *)"T", (ftnlen)1, (ftnlen)1)) {
info = 2;
} else if (*m < 0) {
info = 3;
} else if (*n < 0) {
info = 4;
} else if (*k < 0) {
info = 5;
} else if (*lda < max(1, nrowa)) {
info = 8;
} else if (*ldb < max(1, nrowb)) {
info = 10;
} else if (*ldc < max(1, *m)) {
info = 13;
}
if (info != 0) {
xerbla_((char *)"DGEMM ", &info, (ftnlen)6);
return 0;
}
if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
return 0;
}
if (*alpha == 0.) {
if (*beta == 0.) {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
c__[i__ + j * c_dim1] = 0.;
}
}
} else {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
}
}
}
return 0;
}
if (notb) {
if (nota) {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
if (*beta == 0.) {
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
c__[i__ + j * c_dim1] = 0.;
}
} else if (*beta != 1.) {
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
}
}
i__2 = *k;
for (l = 1; l <= i__2; ++l) {
temp = *alpha * b[l + j * b_dim1];
i__3 = *m;
for (i__ = 1; i__ <= i__3; ++i__) {
c__[i__ + j * c_dim1] += temp * a[i__ + l * a_dim1];
}
}
}
} else {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
temp = 0.;
i__3 = *k;
for (l = 1; l <= i__3; ++l) {
temp += a[l + i__ * a_dim1] * b[l + j * b_dim1];
}
if (*beta == 0.) {
c__[i__ + j * c_dim1] = *alpha * temp;
} else {
c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[i__ + j * c_dim1];
}
}
}
}
} else {
if (nota) {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
if (*beta == 0.) {
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
c__[i__ + j * c_dim1] = 0.;
}
} else if (*beta != 1.) {
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
}
}
i__2 = *k;
for (l = 1; l <= i__2; ++l) {
temp = *alpha * b[j + l * b_dim1];
i__3 = *m;
for (i__ = 1; i__ <= i__3; ++i__) {
c__[i__ + j * c_dim1] += temp * a[i__ + l * a_dim1];
}
}
}
} else {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
temp = 0.;
i__3 = *k;
for (l = 1; l <= i__3; ++l) {
temp += a[l + i__ * a_dim1] * b[j + l * b_dim1];
}
if (*beta == 0.) {
c__[i__ + j * c_dim1] = *alpha * temp;
} else {
c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[i__ + j * c_dim1];
}
}
}
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,379 +0,0 @@
*> \brief \b DGEMM
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
*
* .. Scalar Arguments ..
* DOUBLE PRECISION ALPHA,BETA
* INTEGER K,LDA,LDB,LDC,M,N
* CHARACTER TRANSA,TRANSB
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGEMM performs one of the matrix-matrix operations
*>
*> C := alpha*op( A )*op( B ) + beta*C,
*>
*> where op( X ) is one of
*>
*> op( X ) = X or op( X ) = X**T,
*>
*> alpha and beta are scalars, and A, B and C are matrices, with op( A )
*> an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] TRANSA
*> \verbatim
*> TRANSA is CHARACTER*1
*> On entry, TRANSA specifies the form of op( A ) to be used in
*> the matrix multiplication as follows:
*>
*> TRANSA = 'N' or 'n', op( A ) = A.
*>
*> TRANSA = 'T' or 't', op( A ) = A**T.
*>
*> TRANSA = 'C' or 'c', op( A ) = A**T.
*> \endverbatim
*>
*> \param[in] TRANSB
*> \verbatim
*> TRANSB is CHARACTER*1
*> On entry, TRANSB specifies the form of op( B ) to be used in
*> the matrix multiplication as follows:
*>
*> TRANSB = 'N' or 'n', op( B ) = B.
*>
*> TRANSB = 'T' or 't', op( B ) = B**T.
*>
*> TRANSB = 'C' or 'c', op( B ) = B**T.
*> \endverbatim
*>
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> On entry, M specifies the number of rows of the matrix
*> op( A ) and of the matrix C. M must be at least zero.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> On entry, N specifies the number of columns of the matrix
*> op( B ) and the number of columns of the matrix C. N must be
*> at least zero.
*> \endverbatim
*>
*> \param[in] K
*> \verbatim
*> K is INTEGER
*> On entry, K specifies the number of columns of the matrix
*> op( A ) and the number of rows of the matrix op( B ). K must
*> be at least zero.
*> \endverbatim
*>
*> \param[in] ALPHA
*> \verbatim
*> ALPHA is DOUBLE PRECISION.
*> On entry, ALPHA specifies the scalar alpha.
*> \endverbatim
*>
*> \param[in] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension ( LDA, ka ), where ka is
*> k when TRANSA = 'N' or 'n', and is m otherwise.
*> Before entry with TRANSA = 'N' or 'n', the leading m by k
*> part of the array A must contain the matrix A, otherwise
*> the leading k by m part of the array A must contain the
*> matrix A.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> On entry, LDA specifies the first dimension of A as declared
*> in the calling (sub) program. When TRANSA = 'N' or 'n' then
*> LDA must be at least max( 1, m ), otherwise LDA must be at
*> least max( 1, k ).
*> \endverbatim
*>
*> \param[in] B
*> \verbatim
*> B is DOUBLE PRECISION array, dimension ( LDB, kb ), where kb is
*> n when TRANSB = 'N' or 'n', and is k otherwise.
*> Before entry with TRANSB = 'N' or 'n', the leading k by n
*> part of the array B must contain the matrix B, otherwise
*> the leading n by k part of the array B must contain the
*> matrix B.
*> \endverbatim
*>
*> \param[in] LDB
*> \verbatim
*> LDB is INTEGER
*> On entry, LDB specifies the first dimension of B as declared
*> in the calling (sub) program. When TRANSB = 'N' or 'n' then
*> LDB must be at least max( 1, k ), otherwise LDB must be at
*> least max( 1, n ).
*> \endverbatim
*>
*> \param[in] BETA
*> \verbatim
*> BETA is DOUBLE PRECISION.
*> On entry, BETA specifies the scalar beta. When BETA is
*> supplied as zero then C need not be set on input.
*> \endverbatim
*>
*> \param[in,out] C
*> \verbatim
*> C is DOUBLE PRECISION array, dimension ( LDC, N )
*> Before entry, the leading m by n part of the array C must
*> contain the matrix C, except when beta is zero, in which
*> case C need not be set on entry.
*> On exit, the array C is overwritten by the m by n matrix
*> ( alpha*op( A )*op( B ) + beta*C ).
*> \endverbatim
*>
*> \param[in] LDC
*> \verbatim
*> LDC is INTEGER
*> On entry, LDC specifies the first dimension of C as declared
*> in the calling (sub) program. LDC must be at least
*> max( 1, m ).
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup double_blas_level3
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> Level 3 Blas routine.
*>
*> -- Written on 8-February-1989.
*> Jack Dongarra, Argonne National Laboratory.
*> Iain Duff, AERE Harwell.
*> Jeremy Du Croz, Numerical Algorithms Group Ltd.
*> Sven Hammarling, Numerical Algorithms Group Ltd.
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
*
* -- Reference BLAS level3 routine --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
DOUBLE PRECISION ALPHA,BETA
INTEGER K,LDA,LDB,LDC,M,N
CHARACTER TRANSA,TRANSB
* ..
* .. Array Arguments ..
DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
* ..
*
* =====================================================================
*
* .. External Functions ..
LOGICAL LSAME
EXTERNAL LSAME
* ..
* .. External Subroutines ..
EXTERNAL XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX
* ..
* .. Local Scalars ..
DOUBLE PRECISION TEMP
INTEGER I,INFO,J,L,NROWA,NROWB
LOGICAL NOTA,NOTB
* ..
* .. Parameters ..
DOUBLE PRECISION ONE,ZERO
PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
* ..
*
* Set NOTA and NOTB as true if A and B respectively are not
* transposed and set NROWA and NROWB as the number of rows of A
* and B respectively.
*
NOTA = LSAME(TRANSA,'N')
NOTB = LSAME(TRANSB,'N')
IF (NOTA) THEN
NROWA = M
ELSE
NROWA = K
END IF
IF (NOTB) THEN
NROWB = K
ELSE
NROWB = N
END IF
*
* Test the input parameters.
*
INFO = 0
IF ((.NOT.NOTA) .AND. (.NOT.LSAME(TRANSA,'C')) .AND.
+ (.NOT.LSAME(TRANSA,'T'))) THEN
INFO = 1
ELSE IF ((.NOT.NOTB) .AND. (.NOT.LSAME(TRANSB,'C')) .AND.
+ (.NOT.LSAME(TRANSB,'T'))) THEN
INFO = 2
ELSE IF (M.LT.0) THEN
INFO = 3
ELSE IF (N.LT.0) THEN
INFO = 4
ELSE IF (K.LT.0) THEN
INFO = 5
ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
INFO = 8
ELSE IF (LDB.LT.MAX(1,NROWB)) THEN
INFO = 10
ELSE IF (LDC.LT.MAX(1,M)) THEN
INFO = 13
END IF
IF (INFO.NE.0) THEN
CALL XERBLA('DGEMM ',INFO)
RETURN
END IF
*
* Quick return if possible.
*
IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
+ (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
*
* And if alpha.eq.zero.
*
IF (ALPHA.EQ.ZERO) THEN
IF (BETA.EQ.ZERO) THEN
DO 20 J = 1,N
DO 10 I = 1,M
C(I,J) = ZERO
10 CONTINUE
20 CONTINUE
ELSE
DO 40 J = 1,N
DO 30 I = 1,M
C(I,J) = BETA*C(I,J)
30 CONTINUE
40 CONTINUE
END IF
RETURN
END IF
*
* Start the operations.
*
IF (NOTB) THEN
IF (NOTA) THEN
*
* Form C := alpha*A*B + beta*C.
*
DO 90 J = 1,N
IF (BETA.EQ.ZERO) THEN
DO 50 I = 1,M
C(I,J) = ZERO
50 CONTINUE
ELSE IF (BETA.NE.ONE) THEN
DO 60 I = 1,M
C(I,J) = BETA*C(I,J)
60 CONTINUE
END IF
DO 80 L = 1,K
TEMP = ALPHA*B(L,J)
DO 70 I = 1,M
C(I,J) = C(I,J) + TEMP*A(I,L)
70 CONTINUE
80 CONTINUE
90 CONTINUE
ELSE
*
* Form C := alpha*A**T*B + beta*C
*
DO 120 J = 1,N
DO 110 I = 1,M
TEMP = ZERO
DO 100 L = 1,K
TEMP = TEMP + A(L,I)*B(L,J)
100 CONTINUE
IF (BETA.EQ.ZERO) THEN
C(I,J) = ALPHA*TEMP
ELSE
C(I,J) = ALPHA*TEMP + BETA*C(I,J)
END IF
110 CONTINUE
120 CONTINUE
END IF
ELSE
IF (NOTA) THEN
*
* Form C := alpha*A*B**T + beta*C
*
DO 170 J = 1,N
IF (BETA.EQ.ZERO) THEN
DO 130 I = 1,M
C(I,J) = ZERO
130 CONTINUE
ELSE IF (BETA.NE.ONE) THEN
DO 140 I = 1,M
C(I,J) = BETA*C(I,J)
140 CONTINUE
END IF
DO 160 L = 1,K
TEMP = ALPHA*B(J,L)
DO 150 I = 1,M
C(I,J) = C(I,J) + TEMP*A(I,L)
150 CONTINUE
160 CONTINUE
170 CONTINUE
ELSE
*
* Form C := alpha*A**T*B**T + beta*C
*
DO 200 J = 1,N
DO 190 I = 1,M
TEMP = ZERO
DO 180 L = 1,K
TEMP = TEMP + A(L,I)*B(J,L)
180 CONTINUE
IF (BETA.EQ.ZERO) THEN
C(I,J) = ALPHA*TEMP
ELSE
C(I,J) = ALPHA*TEMP + BETA*C(I,J)
END IF
190 CONTINUE
200 CONTINUE
END IF
END IF
*
RETURN
*
* End of DGEMM
*
END

149
lib/linalg/dgemv.cpp Normal file
View File

@ -0,0 +1,149 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dgemv_(char *trans, integer *m, integer *n, doublereal *alpha, doublereal *a, integer *lda,
doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy,
ftnlen trans_len)
{
integer a_dim1, a_offset, i__1, i__2;
integer i__, j, ix, iy, jx, jy, kx, ky, info;
doublereal temp;
integer lenx, leny;
extern logical lsame_(char *, char *, ftnlen, ftnlen);
extern int xerbla_(char *, integer *, ftnlen);
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--x;
--y;
info = 0;
if (!lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && !lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1) &&
!lsame_(trans, (char *)"C", (ftnlen)1, (ftnlen)1)) {
info = 1;
} else if (*m < 0) {
info = 2;
} else if (*n < 0) {
info = 3;
} else if (*lda < max(1, *m)) {
info = 6;
} else if (*incx == 0) {
info = 8;
} else if (*incy == 0) {
info = 11;
}
if (info != 0) {
xerbla_((char *)"DGEMV ", &info, (ftnlen)6);
return 0;
}
if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) {
return 0;
}
if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) {
lenx = *n;
leny = *m;
} else {
lenx = *m;
leny = *n;
}
if (*incx > 0) {
kx = 1;
} else {
kx = 1 - (lenx - 1) * *incx;
}
if (*incy > 0) {
ky = 1;
} else {
ky = 1 - (leny - 1) * *incy;
}
if (*beta != 1.) {
if (*incy == 1) {
if (*beta == 0.) {
i__1 = leny;
for (i__ = 1; i__ <= i__1; ++i__) {
y[i__] = 0.;
}
} else {
i__1 = leny;
for (i__ = 1; i__ <= i__1; ++i__) {
y[i__] = *beta * y[i__];
}
}
} else {
iy = ky;
if (*beta == 0.) {
i__1 = leny;
for (i__ = 1; i__ <= i__1; ++i__) {
y[iy] = 0.;
iy += *incy;
}
} else {
i__1 = leny;
for (i__ = 1; i__ <= i__1; ++i__) {
y[iy] = *beta * y[iy];
iy += *incy;
}
}
}
}
if (*alpha == 0.) {
return 0;
}
if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) {
jx = kx;
if (*incy == 1) {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
temp = *alpha * x[jx];
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
y[i__] += temp * a[i__ + j * a_dim1];
}
jx += *incx;
}
} else {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
temp = *alpha * x[jx];
iy = ky;
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
y[iy] += temp * a[i__ + j * a_dim1];
iy += *incy;
}
jx += *incx;
}
}
} else {
jy = ky;
if (*incx == 1) {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
temp = 0.;
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
temp += a[i__ + j * a_dim1] * x[i__];
}
y[jy] += *alpha * temp;
jy += *incy;
}
} else {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
temp = 0.;
ix = kx;
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
temp += a[i__ + j * a_dim1] * x[ix];
ix += *incx;
}
y[jy] += *alpha * temp;
jy += *incy;
}
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,327 +0,0 @@
*> \brief \b DGEMV
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* SUBROUTINE DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
*
* .. Scalar Arguments ..
* DOUBLE PRECISION ALPHA,BETA
* INTEGER INCX,INCY,LDA,M,N
* CHARACTER TRANS
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A(LDA,*),X(*),Y(*)
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGEMV performs one of the matrix-vector operations
*>
*> y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y,
*>
*> where alpha and beta are scalars, x and y are vectors and A is an
*> m by n matrix.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] TRANS
*> \verbatim
*> TRANS is CHARACTER*1
*> On entry, TRANS specifies the operation to be performed as
*> follows:
*>
*> TRANS = 'N' or 'n' y := alpha*A*x + beta*y.
*>
*> TRANS = 'T' or 't' y := alpha*A**T*x + beta*y.
*>
*> TRANS = 'C' or 'c' y := alpha*A**T*x + beta*y.
*> \endverbatim
*>
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> On entry, M specifies the number of rows of the matrix A.
*> M must be at least zero.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> On entry, N specifies the number of columns of the matrix A.
*> N must be at least zero.
*> \endverbatim
*>
*> \param[in] ALPHA
*> \verbatim
*> ALPHA is DOUBLE PRECISION.
*> On entry, ALPHA specifies the scalar alpha.
*> \endverbatim
*>
*> \param[in] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension ( LDA, N )
*> Before entry, the leading m by n part of the array A must
*> contain the matrix of coefficients.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> On entry, LDA specifies the first dimension of A as declared
*> in the calling (sub) program. LDA must be at least
*> max( 1, m ).
*> \endverbatim
*>
*> \param[in] X
*> \verbatim
*> X is DOUBLE PRECISION array, dimension at least
*> ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
*> and at least
*> ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
*> Before entry, the incremented array X must contain the
*> vector x.
*> \endverbatim
*>
*> \param[in] INCX
*> \verbatim
*> INCX is INTEGER
*> On entry, INCX specifies the increment for the elements of
*> X. INCX must not be zero.
*> \endverbatim
*>
*> \param[in] BETA
*> \verbatim
*> BETA is DOUBLE PRECISION.
*> On entry, BETA specifies the scalar beta. When BETA is
*> supplied as zero then Y need not be set on input.
*> \endverbatim
*>
*> \param[in,out] Y
*> \verbatim
*> Y is DOUBLE PRECISION array, dimension at least
*> ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
*> and at least
*> ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
*> Before entry with BETA non-zero, the incremented array Y
*> must contain the vector y. On exit, Y is overwritten by the
*> updated vector y.
*> \endverbatim
*>
*> \param[in] INCY
*> \verbatim
*> INCY is INTEGER
*> On entry, INCY specifies the increment for the elements of
*> Y. INCY must not be zero.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup double_blas_level2
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> Level 2 Blas routine.
*> The vector and matrix arguments are not referenced when N = 0, or M = 0
*>
*> -- Written on 22-October-1986.
*> Jack Dongarra, Argonne National Lab.
*> Jeremy Du Croz, Nag Central Office.
*> Sven Hammarling, Nag Central Office.
*> Richard Hanson, Sandia National Labs.
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
*
* -- Reference BLAS level2 routine --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
DOUBLE PRECISION ALPHA,BETA
INTEGER INCX,INCY,LDA,M,N
CHARACTER TRANS
* ..
* .. Array Arguments ..
DOUBLE PRECISION A(LDA,*),X(*),Y(*)
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE,ZERO
PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
* ..
* .. Local Scalars ..
DOUBLE PRECISION TEMP
INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY,LENX,LENY
* ..
* .. External Functions ..
LOGICAL LSAME
EXTERNAL LSAME
* ..
* .. External Subroutines ..
EXTERNAL XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX
* ..
*
* Test the input parameters.
*
INFO = 0
IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
+ .NOT.LSAME(TRANS,'C')) THEN
INFO = 1
ELSE IF (M.LT.0) THEN
INFO = 2
ELSE IF (N.LT.0) THEN
INFO = 3
ELSE IF (LDA.LT.MAX(1,M)) THEN
INFO = 6
ELSE IF (INCX.EQ.0) THEN
INFO = 8
ELSE IF (INCY.EQ.0) THEN
INFO = 11
END IF
IF (INFO.NE.0) THEN
CALL XERBLA('DGEMV ',INFO)
RETURN
END IF
*
* Quick return if possible.
*
IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
+ ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
*
* Set LENX and LENY, the lengths of the vectors x and y, and set
* up the start points in X and Y.
*
IF (LSAME(TRANS,'N')) THEN
LENX = N
LENY = M
ELSE
LENX = M
LENY = N
END IF
IF (INCX.GT.0) THEN
KX = 1
ELSE
KX = 1 - (LENX-1)*INCX
END IF
IF (INCY.GT.0) THEN
KY = 1
ELSE
KY = 1 - (LENY-1)*INCY
END IF
*
* Start the operations. In this version the elements of A are
* accessed sequentially with one pass through A.
*
* First form y := beta*y.
*
IF (BETA.NE.ONE) THEN
IF (INCY.EQ.1) THEN
IF (BETA.EQ.ZERO) THEN
DO 10 I = 1,LENY
Y(I) = ZERO
10 CONTINUE
ELSE
DO 20 I = 1,LENY
Y(I) = BETA*Y(I)
20 CONTINUE
END IF
ELSE
IY = KY
IF (BETA.EQ.ZERO) THEN
DO 30 I = 1,LENY
Y(IY) = ZERO
IY = IY + INCY
30 CONTINUE
ELSE
DO 40 I = 1,LENY
Y(IY) = BETA*Y(IY)
IY = IY + INCY
40 CONTINUE
END IF
END IF
END IF
IF (ALPHA.EQ.ZERO) RETURN
IF (LSAME(TRANS,'N')) THEN
*
* Form y := alpha*A*x + y.
*
JX = KX
IF (INCY.EQ.1) THEN
DO 60 J = 1,N
TEMP = ALPHA*X(JX)
DO 50 I = 1,M
Y(I) = Y(I) + TEMP*A(I,J)
50 CONTINUE
JX = JX + INCX
60 CONTINUE
ELSE
DO 80 J = 1,N
TEMP = ALPHA*X(JX)
IY = KY
DO 70 I = 1,M
Y(IY) = Y(IY) + TEMP*A(I,J)
IY = IY + INCY
70 CONTINUE
JX = JX + INCX
80 CONTINUE
END IF
ELSE
*
* Form y := alpha*A**T*x + y.
*
JY = KY
IF (INCX.EQ.1) THEN
DO 100 J = 1,N
TEMP = ZERO
DO 90 I = 1,M
TEMP = TEMP + A(I,J)*X(I)
90 CONTINUE
Y(JY) = Y(JY) + ALPHA*TEMP
JY = JY + INCY
100 CONTINUE
ELSE
DO 120 J = 1,N
TEMP = ZERO
IX = KX
DO 110 I = 1,M
TEMP = TEMP + A(I,J)*X(IX)
IX = IX + INCX
110 CONTINUE
Y(JY) = Y(JY) + ALPHA*TEMP
JY = JY + INCY
120 CONTINUE
END IF
END IF
*
RETURN
*
* End of DGEMV
*
END

54
lib/linalg/dgeqr2.cpp Normal file
View File

@ -0,0 +1,54 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
int dgeqr2_(integer *m, integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal *work,
integer *info)
{
integer a_dim1, a_offset, i__1, i__2, i__3;
integer i__, k;
doublereal aii;
extern int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *,
doublereal *, integer *, doublereal *, ftnlen),
dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *),
xerbla_(char *, integer *, ftnlen);
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--tau;
--work;
*info = 0;
if (*m < 0) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*lda < max(1, *m)) {
*info = -4;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGEQR2", &i__1, (ftnlen)6);
return 0;
}
k = min(*m, *n);
i__1 = k;
for (i__ = 1; i__ <= i__1; ++i__) {
i__2 = *m - i__ + 1;
i__3 = i__ + 1;
dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3, *m) + i__ * a_dim1], &c__1, &tau[i__]);
if (i__ < *n) {
aii = a[i__ + i__ * a_dim1];
a[i__ + i__ * a_dim1] = 1.;
i__2 = *m - i__ + 1;
i__3 = *n - i__;
dlarf_((char *)"Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[i__],
&a[i__ + (i__ + 1) * a_dim1], lda, &work[1], (ftnlen)4);
a[i__ + i__ * a_dim1] = aii;
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,198 +0,0 @@
*> \brief \b DGEQR2 computes the QR factorization of a general rectangular matrix using an unblocked algorithm.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGEQR2 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqr2.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqr2.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqr2.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGEQR2( M, N, A, LDA, TAU, WORK, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGEQR2 computes a QR factorization of a real m-by-n matrix A:
*>
*> A = Q * ( R ),
*> ( 0 )
*>
*> where:
*>
*> Q is a m-by-m orthogonal matrix;
*> R is an upper-triangular n-by-n matrix;
*> 0 is a (m-n)-by-n zero matrix, if m > n.
*>
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the m by n matrix A.
*> On exit, the elements on and above the diagonal of the array
*> contain the min(m,n) by n upper trapezoidal matrix R (R is
*> upper triangular if m >= n); the elements below the diagonal,
*> with the array TAU, represent the orthogonal matrix Q as a
*> product of elementary reflectors (see Further Details).
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] TAU
*> \verbatim
*> TAU is DOUBLE PRECISION array, dimension (min(M,N))
*> The scalar factors of the elementary reflectors (see Further
*> Details).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (N)
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEcomputational
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> The matrix Q is represented as a product of elementary reflectors
*>
*> Q = H(1) H(2) . . . H(k), where k = min(m,n).
*>
*> Each H(i) has the form
*>
*> H(i) = I - tau * v * v**T
*>
*> where tau is a real scalar, and v is a real vector with
*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
*> and tau in TAU(i).
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DGEQR2( M, N, A, LDA, TAU, WORK, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
INTEGER I, K
DOUBLE PRECISION AII
* ..
* .. External Subroutines ..
EXTERNAL DLARF, DLARFG, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input arguments
*
INFO = 0
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -4
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGEQR2', -INFO )
RETURN
END IF
*
K = MIN( M, N )
*
DO 10 I = 1, K
*
* Generate elementary reflector H(i) to annihilate A(i+1:m,i)
*
CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1,
$ TAU( I ) )
IF( I.LT.N ) THEN
*
* Apply H(i) to A(i:m,i+1:n) from the left
*
AII = A( I, I )
A( I, I ) = ONE
CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ),
$ A( I, I+1 ), LDA, WORK )
A( I, I ) = AII
END IF
10 CONTINUE
RETURN
*
* End of DGEQR2
*
END

113
lib/linalg/dgeqrf.cpp Normal file
View File

@ -0,0 +1,113 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
static integer c_n1 = -1;
static integer c__3 = 3;
static integer c__2 = 2;
int dgeqrf_(integer *m, integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal *work,
integer *lwork, integer *info)
{
integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
extern int dgeqr2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
integer *),
dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *,
integer *, doublereal *, integer *, doublereal *, integer *, doublereal *,
integer *, ftnlen, ftnlen, ftnlen, ftnlen),
dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *,
doublereal *, integer *, ftnlen, ftnlen),
xerbla_(char *, integer *, ftnlen);
extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
ftnlen, ftnlen);
integer ldwork, lwkopt;
logical lquery;
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--tau;
--work;
k = min(*m, *n);
*info = 0;
nb = ilaenv_(&c__1, (char *)"DGEQRF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
lquery = *lwork == -1;
if (*m < 0) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*lda < max(1, *m)) {
*info = -4;
} else if (!lquery) {
if (*lwork <= 0 || *m > 0 && *lwork < max(1, *n)) {
*info = -7;
}
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGEQRF", &i__1, (ftnlen)6);
return 0;
} else if (lquery) {
if (k == 0) {
lwkopt = 1;
} else {
lwkopt = *n * nb;
}
work[1] = (doublereal)lwkopt;
return 0;
}
if (k == 0) {
work[1] = 1.;
return 0;
}
nbmin = 2;
nx = 0;
iws = *n;
if (nb > 1 && nb < k) {
i__1 = 0, i__2 = ilaenv_(&c__3, (char *)"DGEQRF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
nx = max(i__1, i__2);
if (nx < k) {
ldwork = *n;
iws = ldwork * nb;
if (*lwork < iws) {
nb = *lwork / ldwork;
i__1 = 2,
i__2 = ilaenv_(&c__2, (char *)"DGEQRF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
nbmin = max(i__1, i__2);
}
}
}
if (nb >= nbmin && nb < k && nx < k) {
i__1 = k - nx;
i__2 = nb;
for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
i__3 = k - i__ + 1;
ib = min(i__3, nb);
i__3 = *m - i__ + 1;
dgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1], &iinfo);
if (i__ + ib <= *n) {
i__3 = *m - i__ + 1;
dlarft_((char *)"Forward", (char *)"Columnwise", &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__],
&work[1], &ldwork, (ftnlen)7, (ftnlen)10);
i__3 = *m - i__ + 1;
i__4 = *n - i__ - ib + 1;
dlarfb_((char *)"Left", (char *)"Transpose", (char *)"Forward", (char *)"Columnwise", &i__3, &i__4, &ib,
&a[i__ + i__ * a_dim1], lda, &work[1], &ldwork,
&a[i__ + (i__ + ib) * a_dim1], lda, &work[ib + 1], &ldwork, (ftnlen)4,
(ftnlen)9, (ftnlen)7, (ftnlen)10);
}
}
} else {
i__ = 1;
}
if (i__ <= k) {
i__2 = *m - i__ + 1;
i__1 = *n - i__ + 1;
dgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1], &iinfo);
}
work[1] = (doublereal)iws;
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,282 +0,0 @@
*> \brief \b DGEQRF
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGEQRF + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqrf.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqrf.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqrf.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, LWORK, M, N
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGEQRF computes a QR factorization of a real M-by-N matrix A:
*>
*> A = Q * ( R ),
*> ( 0 )
*>
*> where:
*>
*> Q is a M-by-M orthogonal matrix;
*> R is an upper-triangular N-by-N matrix;
*> 0 is a (M-N)-by-N zero matrix, if M > N.
*>
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the M-by-N matrix A.
*> On exit, the elements on and above the diagonal of the array
*> contain the min(M,N)-by-N upper trapezoidal matrix R (R is
*> upper triangular if m >= n); the elements below the diagonal,
*> with the array TAU, represent the orthogonal matrix Q as a
*> product of min(m,n) elementary reflectors (see Further
*> Details).
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] TAU
*> \verbatim
*> TAU is DOUBLE PRECISION array, dimension (min(M,N))
*> The scalar factors of the elementary reflectors (see Further
*> Details).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> \verbatim
*> LWORK is INTEGER
*> The dimension of the array WORK.
*> LWORK >= 1, if MIN(M,N) = 0, and LWORK >= N, otherwise.
*> For optimum performance LWORK >= N*NB, where NB is
*> the optimal blocksize.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
*> this value as the first entry of the WORK array, and no error
*> message related to LWORK is issued by XERBLA.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEcomputational
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> The matrix Q is represented as a product of elementary reflectors
*>
*> Q = H(1) H(2) . . . H(k), where k = min(m,n).
*>
*> Each H(i) has the form
*>
*> H(i) = I - tau * v * v**T
*>
*> where tau is a real scalar, and v is a real vector with
*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
*> and tau in TAU(i).
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LWORK, M, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB,
$ NBMIN, NX
* ..
* .. External Subroutines ..
EXTERNAL DGEQR2, DLARFB, DLARFT, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. External Functions ..
INTEGER ILAENV
EXTERNAL ILAENV
* ..
* .. Executable Statements ..
*
* Test the input arguments
*
K = MIN( M, N )
INFO = 0
NB = ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
LQUERY = ( LWORK.EQ.-1 )
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -4
ELSE IF( .NOT.LQUERY ) THEN
IF( LWORK.LE.0 .OR. ( M.GT.0 .AND. LWORK.LT.MAX( 1, N ) ) )
$ INFO = -7
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGEQRF', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
IF( K.EQ.0 ) THEN
LWKOPT = 1
ELSE
LWKOPT = N*NB
END IF
WORK( 1 ) = LWKOPT
RETURN
END IF
*
* Quick return if possible
*
IF( K.EQ.0 ) THEN
WORK( 1 ) = 1
RETURN
END IF
*
NBMIN = 2
NX = 0
IWS = N
IF( NB.GT.1 .AND. NB.LT.K ) THEN
*
* Determine when to cross over from blocked to unblocked code.
*
NX = MAX( 0, ILAENV( 3, 'DGEQRF', ' ', M, N, -1, -1 ) )
IF( NX.LT.K ) THEN
*
* Determine if workspace is large enough for blocked code.
*
LDWORK = N
IWS = LDWORK*NB
IF( LWORK.LT.IWS ) THEN
*
* Not enough workspace to use optimal NB: reduce NB and
* determine the minimum value of NB.
*
NB = LWORK / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'DGEQRF', ' ', M, N, -1,
$ -1 ) )
END IF
END IF
END IF
*
IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
*
* Use blocked code initially
*
DO 10 I = 1, K - NX, NB
IB = MIN( K-I+1, NB )
*
* Compute the QR factorization of the current block
* A(i:m,i:i+ib-1)
*
CALL DGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), WORK,
$ IINFO )
IF( I+IB.LE.N ) THEN
*
* Form the triangular factor of the block reflector
* H = H(i) H(i+1) . . . H(i+ib-1)
*
CALL DLARFT( 'Forward', 'Columnwise', M-I+1, IB,
$ A( I, I ), LDA, TAU( I ), WORK, LDWORK )
*
* Apply H**T to A(i:m,i+ib:n) from the left
*
CALL DLARFB( 'Left', 'Transpose', 'Forward',
$ 'Columnwise', M-I+1, N-I-IB+1, IB,
$ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ),
$ LDA, WORK( IB+1 ), LDWORK )
END IF
10 CONTINUE
ELSE
I = 1
END IF
*
* Use unblocked code to factor the last or only block.
*
IF( I.LE.K )
$ CALL DGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
$ IINFO )
*
WORK( 1 ) = IWS
RETURN
*
* End of DGEQRF
*
END

77
lib/linalg/dger.cpp Normal file
View File

@ -0,0 +1,77 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dger_(integer *m, integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *y,
integer *incy, doublereal *a, integer *lda)
{
integer a_dim1, a_offset, i__1, i__2;
integer i__, j, ix, jy, kx, info;
doublereal temp;
extern int xerbla_(char *, integer *, ftnlen);
--x;
--y;
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
info = 0;
if (*m < 0) {
info = 1;
} else if (*n < 0) {
info = 2;
} else if (*incx == 0) {
info = 5;
} else if (*incy == 0) {
info = 7;
} else if (*lda < max(1, *m)) {
info = 9;
}
if (info != 0) {
xerbla_((char *)"DGER ", &info, (ftnlen)6);
return 0;
}
if (*m == 0 || *n == 0 || *alpha == 0.) {
return 0;
}
if (*incy > 0) {
jy = 1;
} else {
jy = 1 - (*n - 1) * *incy;
}
if (*incx == 1) {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
if (y[jy] != 0.) {
temp = *alpha * y[jy];
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
a[i__ + j * a_dim1] += x[i__] * temp;
}
}
jy += *incy;
}
} else {
if (*incx > 0) {
kx = 1;
} else {
kx = 1 - (*m - 1) * *incx;
}
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
if (y[jy] != 0.) {
temp = *alpha * y[jy];
ix = kx;
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
a[i__ + j * a_dim1] += x[ix] * temp;
ix += *incx;
}
}
jy += *incy;
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,224 +0,0 @@
*> \brief \b DGER
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* SUBROUTINE DGER(M,N,ALPHA,X,INCX,Y,INCY,A,LDA)
*
* .. Scalar Arguments ..
* DOUBLE PRECISION ALPHA
* INTEGER INCX,INCY,LDA,M,N
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A(LDA,*),X(*),Y(*)
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGER performs the rank 1 operation
*>
*> A := alpha*x*y**T + A,
*>
*> where alpha is a scalar, x is an m element vector, y is an n element
*> vector and A is an m by n matrix.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> On entry, M specifies the number of rows of the matrix A.
*> M must be at least zero.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> On entry, N specifies the number of columns of the matrix A.
*> N must be at least zero.
*> \endverbatim
*>
*> \param[in] ALPHA
*> \verbatim
*> ALPHA is DOUBLE PRECISION.
*> On entry, ALPHA specifies the scalar alpha.
*> \endverbatim
*>
*> \param[in] X
*> \verbatim
*> X is DOUBLE PRECISION array, dimension at least
*> ( 1 + ( m - 1 )*abs( INCX ) ).
*> Before entry, the incremented array X must contain the m
*> element vector x.
*> \endverbatim
*>
*> \param[in] INCX
*> \verbatim
*> INCX is INTEGER
*> On entry, INCX specifies the increment for the elements of
*> X. INCX must not be zero.
*> \endverbatim
*>
*> \param[in] Y
*> \verbatim
*> Y is DOUBLE PRECISION array, dimension at least
*> ( 1 + ( n - 1 )*abs( INCY ) ).
*> Before entry, the incremented array Y must contain the n
*> element vector y.
*> \endverbatim
*>
*> \param[in] INCY
*> \verbatim
*> INCY is INTEGER
*> On entry, INCY specifies the increment for the elements of
*> Y. INCY must not be zero.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension ( LDA, N )
*> Before entry, the leading m by n part of the array A must
*> contain the matrix of coefficients. On exit, A is
*> overwritten by the updated matrix.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> On entry, LDA specifies the first dimension of A as declared
*> in the calling (sub) program. LDA must be at least
*> max( 1, m ).
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup double_blas_level2
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> Level 2 Blas routine.
*>
*> -- Written on 22-October-1986.
*> Jack Dongarra, Argonne National Lab.
*> Jeremy Du Croz, Nag Central Office.
*> Sven Hammarling, Nag Central Office.
*> Richard Hanson, Sandia National Labs.
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DGER(M,N,ALPHA,X,INCX,Y,INCY,A,LDA)
*
* -- Reference BLAS level2 routine --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
DOUBLE PRECISION ALPHA
INTEGER INCX,INCY,LDA,M,N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A(LDA,*),X(*),Y(*)
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO
PARAMETER (ZERO=0.0D+0)
* ..
* .. Local Scalars ..
DOUBLE PRECISION TEMP
INTEGER I,INFO,IX,J,JY,KX
* ..
* .. External Subroutines ..
EXTERNAL XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX
* ..
*
* Test the input parameters.
*
INFO = 0
IF (M.LT.0) THEN
INFO = 1
ELSE IF (N.LT.0) THEN
INFO = 2
ELSE IF (INCX.EQ.0) THEN
INFO = 5
ELSE IF (INCY.EQ.0) THEN
INFO = 7
ELSE IF (LDA.LT.MAX(1,M)) THEN
INFO = 9
END IF
IF (INFO.NE.0) THEN
CALL XERBLA('DGER ',INFO)
RETURN
END IF
*
* Quick return if possible.
*
IF ((M.EQ.0) .OR. (N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
*
* Start the operations. In this version the elements of A are
* accessed sequentially with one pass through A.
*
IF (INCY.GT.0) THEN
JY = 1
ELSE
JY = 1 - (N-1)*INCY
END IF
IF (INCX.EQ.1) THEN
DO 20 J = 1,N
IF (Y(JY).NE.ZERO) THEN
TEMP = ALPHA*Y(JY)
DO 10 I = 1,M
A(I,J) = A(I,J) + X(I)*TEMP
10 CONTINUE
END IF
JY = JY + INCY
20 CONTINUE
ELSE
IF (INCX.GT.0) THEN
KX = 1
ELSE
KX = 1 - (M-1)*INCX
END IF
DO 40 J = 1,N
IF (Y(JY).NE.ZERO) THEN
TEMP = ALPHA*Y(JY)
IX = KX
DO 30 I = 1,M
A(I,J) = A(I,J) + X(IX)*TEMP
IX = IX + INCX
30 CONTINUE
END IF
JY = JY + INCY
40 CONTINUE
END IF
*
RETURN
*
* End of DGER
*
END

44
lib/linalg/dgesv.cpp Normal file
View File

@ -0,0 +1,44 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dgesv_(integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, doublereal *b,
integer *ldb, integer *info)
{
integer a_dim1, a_offset, b_dim1, b_offset, i__1;
extern int dgetrf_(integer *, integer *, doublereal *, integer *, integer *, integer *),
xerbla_(char *, integer *, ftnlen),
dgetrs_(char *, integer *, integer *, doublereal *, integer *, integer *, doublereal *,
integer *, integer *, ftnlen);
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--ipiv;
b_dim1 = *ldb;
b_offset = 1 + b_dim1;
b -= b_offset;
*info = 0;
if (*n < 0) {
*info = -1;
} else if (*nrhs < 0) {
*info = -2;
} else if (*lda < max(1, *n)) {
*info = -4;
} else if (*ldb < max(1, *n)) {
*info = -7;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGESV ", &i__1, (ftnlen)6);
return 0;
}
dgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info);
if (*info == 0) {
dgetrs_((char *)"No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], ldb, info,
(ftnlen)12);
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,176 +0,0 @@
*> \brief <b> DGESV computes the solution to system of linear equations A * X = B for GE matrices</b>
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGESV + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesv.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesv.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesv.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, LDB, N, NRHS
* ..
* .. Array Arguments ..
* INTEGER IPIV( * )
* DOUBLE PRECISION A( LDA, * ), B( LDB, * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGESV computes the solution to a real system of linear equations
*> A * X = B,
*> where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
*>
*> The LU decomposition with partial pivoting and row interchanges is
*> used to factor A as
*> A = P * L * U,
*> where P is a permutation matrix, L is unit lower triangular, and U is
*> upper triangular. The factored form of A is then used to solve the
*> system of equations A * X = B.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of linear equations, i.e., the order of the
*> matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in] NRHS
*> \verbatim
*> NRHS is INTEGER
*> The number of right hand sides, i.e., the number of columns
*> of the matrix B. NRHS >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the N-by-N coefficient matrix A.
*> On exit, the factors L and U from the factorization
*> A = P*L*U; the unit diagonal elements of L are not stored.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,N).
*> \endverbatim
*>
*> \param[out] IPIV
*> \verbatim
*> IPIV is INTEGER array, dimension (N)
*> The pivot indices that define the permutation matrix P;
*> row i of the matrix was interchanged with row IPIV(i).
*> \endverbatim
*>
*> \param[in,out] B
*> \verbatim
*> B is DOUBLE PRECISION array, dimension (LDB,NRHS)
*> On entry, the N-by-NRHS matrix of right hand side matrix B.
*> On exit, if INFO = 0, the N-by-NRHS solution matrix X.
*> \endverbatim
*>
*> \param[in] LDB
*> \verbatim
*> LDB is INTEGER
*> The leading dimension of the array B. LDB >= max(1,N).
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> > 0: if INFO = i, U(i,i) is exactly zero. The factorization
*> has been completed, but the factor U is exactly
*> singular, so the solution could not be computed.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEsolve
*
* =====================================================================
SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO )
*
* -- LAPACK driver routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LDB, N, NRHS
* ..
* .. Array Arguments ..
INTEGER IPIV( * )
DOUBLE PRECISION A( LDA, * ), B( LDB, * )
* ..
*
* =====================================================================
*
* .. External Subroutines ..
EXTERNAL DGETRF, DGETRS, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
IF( N.LT.0 ) THEN
INFO = -1
ELSE IF( NRHS.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
INFO = -4
ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
INFO = -7
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGESV ', -INFO )
RETURN
END IF
*
* Compute the LU factorization of A.
*
CALL DGETRF( N, N, A, LDA, IPIV, INFO )
IF( INFO.EQ.0 ) THEN
*
* Solve the system A*X = B, overwriting B with X.
*
CALL DGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, B, LDB,
$ INFO )
END IF
RETURN
*
* End of DGESV
*
END

1906
lib/linalg/dgesvd.cpp Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

76
lib/linalg/dgetf2.cpp Normal file
View File

@ -0,0 +1,76 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
static doublereal c_b8 = -1.;
int dgetf2_(integer *m, integer *n, doublereal *a, integer *lda, integer *ipiv, integer *info)
{
integer a_dim1, a_offset, i__1, i__2, i__3;
doublereal d__1;
integer i__, j, jp;
extern int dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *,
integer *, doublereal *, integer *),
dscal_(integer *, doublereal *, doublereal *, integer *);
doublereal sfmin;
extern int dswap_(integer *, doublereal *, integer *, doublereal *, integer *);
extern doublereal dlamch_(char *, ftnlen);
extern integer idamax_(integer *, doublereal *, integer *);
extern int xerbla_(char *, integer *, ftnlen);
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--ipiv;
*info = 0;
if (*m < 0) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*lda < max(1, *m)) {
*info = -4;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGETF2", &i__1, (ftnlen)6);
return 0;
}
if (*m == 0 || *n == 0) {
return 0;
}
sfmin = dlamch_((char *)"S", (ftnlen)1);
i__1 = min(*m, *n);
for (j = 1; j <= i__1; ++j) {
i__2 = *m - j + 1;
jp = j - 1 + idamax_(&i__2, &a[j + j * a_dim1], &c__1);
ipiv[j] = jp;
if (a[jp + j * a_dim1] != 0.) {
if (jp != j) {
dswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda);
}
if (j < *m) {
if ((d__1 = a[j + j * a_dim1], abs(d__1)) >= sfmin) {
i__2 = *m - j;
d__1 = 1. / a[j + j * a_dim1];
dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1);
} else {
i__2 = *m - j;
for (i__ = 1; i__ <= i__2; ++i__) {
a[j + i__ + j * a_dim1] /= a[j + j * a_dim1];
}
}
}
} else if (*info == 0) {
*info = j;
}
if (j < min(*m, *n)) {
i__2 = *m - j;
i__3 = *n - j;
dger_(&i__2, &i__3, &c_b8, &a[j + 1 + j * a_dim1], &c__1, &a[j + (j + 1) * a_dim1], lda,
&a[j + 1 + (j + 1) * a_dim1], lda);
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,210 +0,0 @@
*> \brief \b DGETF2 computes the LU factorization of a general m-by-n matrix using partial pivoting with row interchanges (unblocked algorithm).
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGETF2 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetf2.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetf2.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetf2.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGETF2( M, N, A, LDA, IPIV, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
* INTEGER IPIV( * )
* DOUBLE PRECISION A( LDA, * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGETF2 computes an LU factorization of a general m-by-n matrix A
*> using partial pivoting with row interchanges.
*>
*> The factorization has the form
*> A = P * L * U
*> where P is a permutation matrix, L is lower triangular with unit
*> diagonal elements (lower trapezoidal if m > n), and U is upper
*> triangular (upper trapezoidal if m < n).
*>
*> This is the right-looking Level 2 BLAS version of the algorithm.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the m by n matrix to be factored.
*> On exit, the factors L and U from the factorization
*> A = P*L*U; the unit diagonal elements of L are not stored.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] IPIV
*> \verbatim
*> IPIV is INTEGER array, dimension (min(M,N))
*> The pivot indices; for 1 <= i <= min(M,N), row i of the
*> matrix was interchanged with row IPIV(i).
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -k, the k-th argument had an illegal value
*> > 0: if INFO = k, U(k,k) is exactly zero. The factorization
*> has been completed, but the factor U is exactly
*> singular, and division by zero will occur if it is used
*> to solve a system of equations.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEcomputational
*
* =====================================================================
SUBROUTINE DGETF2( M, N, A, LDA, IPIV, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
INTEGER IPIV( * )
DOUBLE PRECISION A( LDA, * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE, ZERO
PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 )
* ..
* .. Local Scalars ..
DOUBLE PRECISION SFMIN
INTEGER I, J, JP
* ..
* .. External Functions ..
DOUBLE PRECISION DLAMCH
INTEGER IDAMAX
EXTERNAL DLAMCH, IDAMAX
* ..
* .. External Subroutines ..
EXTERNAL DGER, DSCAL, DSWAP, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -4
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGETF2', -INFO )
RETURN
END IF
*
* Quick return if possible
*
IF( M.EQ.0 .OR. N.EQ.0 )
$ RETURN
*
* Compute machine safe minimum
*
SFMIN = DLAMCH('S')
*
DO 10 J = 1, MIN( M, N )
*
* Find pivot and test for singularity.
*
JP = J - 1 + IDAMAX( M-J+1, A( J, J ), 1 )
IPIV( J ) = JP
IF( A( JP, J ).NE.ZERO ) THEN
*
* Apply the interchange to columns 1:N.
*
IF( JP.NE.J )
$ CALL DSWAP( N, A( J, 1 ), LDA, A( JP, 1 ), LDA )
*
* Compute elements J+1:M of J-th column.
*
IF( J.LT.M ) THEN
IF( ABS(A( J, J )) .GE. SFMIN ) THEN
CALL DSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 )
ELSE
DO 20 I = 1, M-J
A( J+I, J ) = A( J+I, J ) / A( J, J )
20 CONTINUE
END IF
END IF
*
ELSE IF( INFO.EQ.0 ) THEN
*
INFO = J
END IF
*
IF( J.LT.MIN( M, N ) ) THEN
*
* Update trailing submatrix.
*
CALL DGER( M-J, N-J, -ONE, A( J+1, J ), 1, A( J, J+1 ), LDA,
$ A( J+1, J+1 ), LDA )
END IF
10 CONTINUE
RETURN
*
* End of DGETF2
*
END

90
lib/linalg/dgetrf.cpp Normal file
View File

@ -0,0 +1,90 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
static integer c_n1 = -1;
static doublereal c_b16 = 1.;
static doublereal c_b19 = -1.;
int dgetrf_(integer *m, integer *n, doublereal *a, integer *lda, integer *ipiv, integer *info)
{
integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
integer i__, j, jb, nb;
extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
ftnlen, ftnlen);
integer iinfo;
extern int dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *,
doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen,
ftnlen),
xerbla_(char *, integer *, ftnlen);
extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
ftnlen, ftnlen);
extern int dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *,
integer *),
dgetrf2_(integer *, integer *, doublereal *, integer *, integer *, integer *);
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--ipiv;
*info = 0;
if (*m < 0) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*lda < max(1, *m)) {
*info = -4;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGETRF", &i__1, (ftnlen)6);
return 0;
}
if (*m == 0 || *n == 0) {
return 0;
}
nb = ilaenv_(&c__1, (char *)"DGETRF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
if (nb <= 1 || nb >= min(*m, *n)) {
dgetrf2_(m, n, &a[a_offset], lda, &ipiv[1], info);
} else {
i__1 = min(*m, *n);
i__2 = nb;
for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
i__3 = min(*m, *n) - j + 1;
jb = min(i__3, nb);
i__3 = *m - j + 1;
dgetrf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo);
if (*info == 0 && iinfo > 0) {
*info = iinfo + j - 1;
}
i__4 = *m, i__5 = j + jb - 1;
i__3 = min(i__4, i__5);
for (i__ = j; i__ <= i__3; ++i__) {
ipiv[i__] = j - 1 + ipiv[i__];
}
i__3 = j - 1;
i__4 = j + jb - 1;
dlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1);
if (j + jb <= *n) {
i__3 = *n - j - jb + 1;
i__4 = j + jb - 1;
dlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, &ipiv[1], &c__1);
i__3 = *n - j - jb + 1;
dtrsm_((char *)"Left", (char *)"Lower", (char *)"No transpose", (char *)"Unit", &jb, &i__3, &c_b16,
&a[j + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda, (ftnlen)4,
(ftnlen)5, (ftnlen)12, (ftnlen)4);
if (j + jb <= *m) {
i__3 = *m - j - jb + 1;
i__4 = *n - j - jb + 1;
dgemm_((char *)"No transpose", (char *)"No transpose", &i__3, &i__4, &jb, &c_b19,
&a[j + jb + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda, &c_b16,
&a[j + jb + (j + jb) * a_dim1], lda, (ftnlen)12, (ftnlen)12);
}
}
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,222 +0,0 @@
*> \brief \b DGETRF
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGETRF + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetrf.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetrf.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetrf.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
* INTEGER IPIV( * )
* DOUBLE PRECISION A( LDA, * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGETRF computes an LU factorization of a general M-by-N matrix A
*> using partial pivoting with row interchanges.
*>
*> The factorization has the form
*> A = P * L * U
*> where P is a permutation matrix, L is lower triangular with unit
*> diagonal elements (lower trapezoidal if m > n), and U is upper
*> triangular (upper trapezoidal if m < n).
*>
*> This is the right-looking Level 3 BLAS version of the algorithm.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the M-by-N matrix to be factored.
*> On exit, the factors L and U from the factorization
*> A = P*L*U; the unit diagonal elements of L are not stored.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] IPIV
*> \verbatim
*> IPIV is INTEGER array, dimension (min(M,N))
*> The pivot indices; for 1 <= i <= min(M,N), row i of the
*> matrix was interchanged with row IPIV(i).
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> > 0: if INFO = i, U(i,i) is exactly zero. The factorization
*> has been completed, but the factor U is exactly
*> singular, and division by zero will occur if it is used
*> to solve a system of equations.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEcomputational
*
* =====================================================================
SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
INTEGER IPIV( * )
DOUBLE PRECISION A( LDA, * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
INTEGER I, IINFO, J, JB, NB
* ..
* .. External Subroutines ..
EXTERNAL DGEMM, DGETRF2, DLASWP, DTRSM, XERBLA
* ..
* .. External Functions ..
INTEGER ILAENV
EXTERNAL ILAENV
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -4
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGETRF', -INFO )
RETURN
END IF
*
* Quick return if possible
*
IF( M.EQ.0 .OR. N.EQ.0 )
$ RETURN
*
* Determine the block size for this environment.
*
NB = ILAENV( 1, 'DGETRF', ' ', M, N, -1, -1 )
IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
*
* Use unblocked code.
*
CALL DGETRF2( M, N, A, LDA, IPIV, INFO )
ELSE
*
* Use blocked code.
*
DO 20 J = 1, MIN( M, N ), NB
JB = MIN( MIN( M, N )-J+1, NB )
*
* Factor diagonal and subdiagonal blocks and test for exact
* singularity.
*
CALL DGETRF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
*
* Adjust INFO and the pivot indices.
*
IF( INFO.EQ.0 .AND. IINFO.GT.0 )
$ INFO = IINFO + J - 1
DO 10 I = J, MIN( M, J+JB-1 )
IPIV( I ) = J - 1 + IPIV( I )
10 CONTINUE
*
* Apply interchanges to columns 1:J-1.
*
CALL DLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 )
*
IF( J+JB.LE.N ) THEN
*
* Apply interchanges to columns J+JB:N.
*
CALL DLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1,
$ IPIV, 1 )
*
* Compute block row of U.
*
CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB,
$ N-J-JB+1, ONE, A( J, J ), LDA, A( J, J+JB ),
$ LDA )
IF( J+JB.LE.M ) THEN
*
* Update trailing submatrix.
*
CALL DGEMM( 'No transpose', 'No transpose', M-J-JB+1,
$ N-J-JB+1, JB, -ONE, A( J+JB, J ), LDA,
$ A( J, J+JB ), LDA, ONE, A( J+JB, J+JB ),
$ LDA )
END IF
END IF
20 CONTINUE
END IF
RETURN
*
* End of DGETRF
*
END

106
lib/linalg/dgetrf2.cpp Normal file
View File

@ -0,0 +1,106 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
static doublereal c_b13 = 1.;
static doublereal c_b16 = -1.;
int dgetrf2_(integer *m, integer *n, doublereal *a, integer *lda, integer *ipiv, integer *info)
{
integer a_dim1, a_offset, i__1, i__2;
doublereal d__1;
integer i__, n1, n2;
doublereal temp;
extern int dscal_(integer *, doublereal *, doublereal *, integer *),
dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen,
ftnlen);
integer iinfo;
doublereal sfmin;
extern int dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *,
doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen,
ftnlen);
extern doublereal dlamch_(char *, ftnlen);
extern integer idamax_(integer *, doublereal *, integer *);
extern int xerbla_(char *, integer *, ftnlen),
dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *);
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--ipiv;
*info = 0;
if (*m < 0) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*lda < max(1, *m)) {
*info = -4;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGETRF2", &i__1, (ftnlen)7);
return 0;
}
if (*m == 0 || *n == 0) {
return 0;
}
if (*m == 1) {
ipiv[1] = 1;
if (a[a_dim1 + 1] == 0.) {
*info = 1;
}
} else if (*n == 1) {
sfmin = dlamch_((char *)"S", (ftnlen)1);
i__ = idamax_(m, &a[a_dim1 + 1], &c__1);
ipiv[1] = i__;
if (a[i__ + a_dim1] != 0.) {
if (i__ != 1) {
temp = a[a_dim1 + 1];
a[a_dim1 + 1] = a[i__ + a_dim1];
a[i__ + a_dim1] = temp;
}
if ((d__1 = a[a_dim1 + 1], abs(d__1)) >= sfmin) {
i__1 = *m - 1;
d__1 = 1. / a[a_dim1 + 1];
dscal_(&i__1, &d__1, &a[a_dim1 + 2], &c__1);
} else {
i__1 = *m - 1;
for (i__ = 1; i__ <= i__1; ++i__) {
a[i__ + 1 + a_dim1] /= a[a_dim1 + 1];
}
}
} else {
*info = 1;
}
} else {
n1 = min(*m, *n) / 2;
n2 = *n - n1;
dgetrf2_(m, &n1, &a[a_offset], lda, &ipiv[1], &iinfo);
if (*info == 0 && iinfo > 0) {
*info = iinfo;
}
dlaswp_(&n2, &a[(n1 + 1) * a_dim1 + 1], lda, &c__1, &n1, &ipiv[1], &c__1);
dtrsm_((char *)"L", (char *)"L", (char *)"N", (char *)"U", &n1, &n2, &c_b13, &a[a_offset], lda, &a[(n1 + 1) * a_dim1 + 1],
lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1);
i__1 = *m - n1;
dgemm_((char *)"N", (char *)"N", &i__1, &n2, &n1, &c_b16, &a[n1 + 1 + a_dim1], lda,
&a[(n1 + 1) * a_dim1 + 1], lda, &c_b13, &a[n1 + 1 + (n1 + 1) * a_dim1], lda,
(ftnlen)1, (ftnlen)1);
i__1 = *m - n1;
dgetrf2_(&i__1, &n2, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, &ipiv[n1 + 1], &iinfo);
if (*info == 0 && iinfo > 0) {
*info = iinfo + n1;
}
i__1 = min(*m, *n);
for (i__ = n1 + 1; i__ <= i__1; ++i__) {
ipiv[i__] += n1;
}
i__1 = n1 + 1;
i__2 = min(*m, *n);
dlaswp_(&n1, &a[a_dim1 + 1], lda, &i__1, &i__2, &ipiv[1], &c__1);
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,269 +0,0 @@
*> \brief \b DGETRF2
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* RECURSIVE SUBROUTINE DGETRF2( M, N, A, LDA, IPIV, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
* INTEGER IPIV( * )
* DOUBLE PRECISION A( LDA, * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGETRF2 computes an LU factorization of a general M-by-N matrix A
*> using partial pivoting with row interchanges.
*>
*> The factorization has the form
*> A = P * L * U
*> where P is a permutation matrix, L is lower triangular with unit
*> diagonal elements (lower trapezoidal if m > n), and U is upper
*> triangular (upper trapezoidal if m < n).
*>
*> This is the recursive version of the algorithm. It divides
*> the matrix into four submatrices:
*>
*> [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2
*> A = [ -----|----- ] with n1 = min(m,n)/2
*> [ A21 | A22 ] n2 = n-n1
*>
*> [ A11 ]
*> The subroutine calls itself to factor [ --- ],
*> [ A12 ]
*> [ A12 ]
*> do the swaps on [ --- ], solve A12, update A22,
*> [ A22 ]
*>
*> then calls itself to factor A22 and do the swaps on A21.
*>
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the M-by-N matrix to be factored.
*> On exit, the factors L and U from the factorization
*> A = P*L*U; the unit diagonal elements of L are not stored.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] IPIV
*> \verbatim
*> IPIV is INTEGER array, dimension (min(M,N))
*> The pivot indices; for 1 <= i <= min(M,N), row i of the
*> matrix was interchanged with row IPIV(i).
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> > 0: if INFO = i, U(i,i) is exactly zero. The factorization
*> has been completed, but the factor U is exactly
*> singular, and division by zero will occur if it is used
*> to solve a system of equations.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEcomputational
*
* =====================================================================
RECURSIVE SUBROUTINE DGETRF2( M, N, A, LDA, IPIV, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
INTEGER IPIV( * )
DOUBLE PRECISION A( LDA, * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE, ZERO
PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 )
* ..
* .. Local Scalars ..
DOUBLE PRECISION SFMIN, TEMP
INTEGER I, IINFO, N1, N2
* ..
* .. External Functions ..
DOUBLE PRECISION DLAMCH
INTEGER IDAMAX
EXTERNAL DLAMCH, IDAMAX
* ..
* .. External Subroutines ..
EXTERNAL DGEMM, DSCAL, DLASWP, DTRSM, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input parameters
*
INFO = 0
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -4
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGETRF2', -INFO )
RETURN
END IF
*
* Quick return if possible
*
IF( M.EQ.0 .OR. N.EQ.0 )
$ RETURN
IF ( M.EQ.1 ) THEN
*
* Use unblocked code for one row case
* Just need to handle IPIV and INFO
*
IPIV( 1 ) = 1
IF ( A(1,1).EQ.ZERO )
$ INFO = 1
*
ELSE IF( N.EQ.1 ) THEN
*
* Use unblocked code for one column case
*
*
* Compute machine safe minimum
*
SFMIN = DLAMCH('S')
*
* Find pivot and test for singularity
*
I = IDAMAX( M, A( 1, 1 ), 1 )
IPIV( 1 ) = I
IF( A( I, 1 ).NE.ZERO ) THEN
*
* Apply the interchange
*
IF( I.NE.1 ) THEN
TEMP = A( 1, 1 )
A( 1, 1 ) = A( I, 1 )
A( I, 1 ) = TEMP
END IF
*
* Compute elements 2:M of the column
*
IF( ABS(A( 1, 1 )) .GE. SFMIN ) THEN
CALL DSCAL( M-1, ONE / A( 1, 1 ), A( 2, 1 ), 1 )
ELSE
DO 10 I = 1, M-1
A( 1+I, 1 ) = A( 1+I, 1 ) / A( 1, 1 )
10 CONTINUE
END IF
*
ELSE
INFO = 1
END IF
*
ELSE
*
* Use recursive code
*
N1 = MIN( M, N ) / 2
N2 = N-N1
*
* [ A11 ]
* Factor [ --- ]
* [ A21 ]
*
CALL DGETRF2( M, N1, A, LDA, IPIV, IINFO )
IF ( INFO.EQ.0 .AND. IINFO.GT.0 )
$ INFO = IINFO
*
* [ A12 ]
* Apply interchanges to [ --- ]
* [ A22 ]
*
CALL DLASWP( N2, A( 1, N1+1 ), LDA, 1, N1, IPIV, 1 )
*
* Solve A12
*
CALL DTRSM( 'L', 'L', 'N', 'U', N1, N2, ONE, A, LDA,
$ A( 1, N1+1 ), LDA )
*
* Update A22
*
CALL DGEMM( 'N', 'N', M-N1, N2, N1, -ONE, A( N1+1, 1 ), LDA,
$ A( 1, N1+1 ), LDA, ONE, A( N1+1, N1+1 ), LDA )
*
* Factor A22
*
CALL DGETRF2( M-N1, N2, A( N1+1, N1+1 ), LDA, IPIV( N1+1 ),
$ IINFO )
*
* Adjust INFO and the pivot indices
*
IF ( INFO.EQ.0 .AND. IINFO.GT.0 )
$ INFO = IINFO + N1
DO 20 I = N1+1, MIN( M, N )
IPIV( I ) = IPIV( I ) + N1
20 CONTINUE
*
* Apply interchanges to A21
*
CALL DLASWP( N1, A( 1, 1 ), LDA, N1+1, MIN( M, N), IPIV, 1 )
*
END IF
RETURN
*
* End of DGETRF2
*
END

125
lib/linalg/dgetri.cpp Normal file
View File

@ -0,0 +1,125 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
static integer c_n1 = -1;
static integer c__2 = 2;
static doublereal c_b20 = -1.;
static doublereal c_b22 = 1.;
int dgetri_(integer *n, doublereal *a, integer *lda, integer *ipiv, doublereal *work,
integer *lwork, integer *info)
{
integer a_dim1, a_offset, i__1, i__2, i__3;
integer i__, j, jb, nb, jj, jp, nn, iws;
extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
ftnlen, ftnlen),
dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *,
integer *, doublereal *, doublereal *, integer *, ftnlen);
integer nbmin;
extern int dswap_(integer *, doublereal *, integer *, doublereal *, integer *),
dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *,
integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen),
xerbla_(char *, integer *, ftnlen);
extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
ftnlen, ftnlen);
integer ldwork;
extern int dtrtri_(char *, char *, integer *, doublereal *, integer *, integer *, ftnlen,
ftnlen);
integer lwkopt;
logical lquery;
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--ipiv;
--work;
*info = 0;
nb = ilaenv_(&c__1, (char *)"DGETRI", (char *)" ", n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
lwkopt = *n * nb;
work[1] = (doublereal)lwkopt;
lquery = *lwork == -1;
if (*n < 0) {
*info = -1;
} else if (*lda < max(1, *n)) {
*info = -3;
} else if (*lwork < max(1, *n) && !lquery) {
*info = -6;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGETRI", &i__1, (ftnlen)6);
return 0;
} else if (lquery) {
return 0;
}
if (*n == 0) {
return 0;
}
dtrtri_((char *)"Upper", (char *)"Non-unit", n, &a[a_offset], lda, info, (ftnlen)5, (ftnlen)8);
if (*info > 0) {
return 0;
}
nbmin = 2;
ldwork = *n;
if (nb > 1 && nb < *n) {
i__1 = ldwork * nb;
iws = max(i__1, 1);
if (*lwork < iws) {
nb = *lwork / ldwork;
i__1 = 2,
i__2 = ilaenv_(&c__2, (char *)"DGETRI", (char *)" ", n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
nbmin = max(i__1, i__2);
}
} else {
iws = *n;
}
if (nb < nbmin || nb >= *n) {
for (j = *n; j >= 1; --j) {
i__1 = *n;
for (i__ = j + 1; i__ <= i__1; ++i__) {
work[i__] = a[i__ + j * a_dim1];
a[i__ + j * a_dim1] = 0.;
}
if (j < *n) {
i__1 = *n - j;
dgemv_((char *)"No transpose", n, &i__1, &c_b20, &a[(j + 1) * a_dim1 + 1], lda,
&work[j + 1], &c__1, &c_b22, &a[j * a_dim1 + 1], &c__1, (ftnlen)12);
}
}
} else {
nn = (*n - 1) / nb * nb + 1;
i__1 = -nb;
for (j = nn; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) {
i__2 = nb, i__3 = *n - j + 1;
jb = min(i__2, i__3);
i__2 = j + jb - 1;
for (jj = j; jj <= i__2; ++jj) {
i__3 = *n;
for (i__ = jj + 1; i__ <= i__3; ++i__) {
work[i__ + (jj - j) * ldwork] = a[i__ + jj * a_dim1];
a[i__ + jj * a_dim1] = 0.;
}
}
if (j + jb <= *n) {
i__2 = *n - j - jb + 1;
dgemm_((char *)"No transpose", (char *)"No transpose", n, &jb, &i__2, &c_b20,
&a[(j + jb) * a_dim1 + 1], lda, &work[j + jb], &ldwork, &c_b22,
&a[j * a_dim1 + 1], lda, (ftnlen)12, (ftnlen)12);
}
dtrsm_((char *)"Right", (char *)"Lower", (char *)"No transpose", (char *)"Unit", n, &jb, &c_b22, &work[j], &ldwork,
&a[j * a_dim1 + 1], lda, (ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)4);
}
}
for (j = *n - 1; j >= 1; --j) {
jp = ipiv[j];
if (jp != j) {
dswap_(n, &a[j * a_dim1 + 1], &c__1, &a[jp * a_dim1 + 1], &c__1);
}
}
work[1] = (doublereal)iws;
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,258 +0,0 @@
*> \brief \b DGETRI
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGETRI + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetri.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetri.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetri.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, LWORK, N
* ..
* .. Array Arguments ..
* INTEGER IPIV( * )
* DOUBLE PRECISION A( LDA, * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGETRI computes the inverse of a matrix using the LU factorization
*> computed by DGETRF.
*>
*> This method inverts U and then computes inv(A) by solving the system
*> inv(A)*L = inv(U) for inv(A).
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The order of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the factors L and U from the factorization
*> A = P*L*U as computed by DGETRF.
*> On exit, if INFO = 0, the inverse of the original matrix A.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,N).
*> \endverbatim
*>
*> \param[in] IPIV
*> \verbatim
*> IPIV is INTEGER array, dimension (N)
*> The pivot indices from DGETRF; for 1<=i<=N, row i of the
*> matrix was interchanged with row IPIV(i).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
*> On exit, if INFO=0, then WORK(1) returns the optimal LWORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> \verbatim
*> LWORK is INTEGER
*> The dimension of the array WORK. LWORK >= max(1,N).
*> For optimal performance LWORK >= N*NB, where NB is
*> the optimal blocksize returned by ILAENV.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
*> this value as the first entry of the WORK array, and no error
*> message related to LWORK is issued by XERBLA.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> > 0: if INFO = i, U(i,i) is exactly zero; the matrix is
*> singular and its inverse could not be computed.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEcomputational
*
* =====================================================================
SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LWORK, N
* ..
* .. Array Arguments ..
INTEGER IPIV( * )
DOUBLE PRECISION A( LDA, * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB,
$ NBMIN, NN
* ..
* .. External Functions ..
INTEGER ILAENV
EXTERNAL ILAENV
* ..
* .. External Subroutines ..
EXTERNAL DGEMM, DGEMV, DSWAP, DTRSM, DTRTRI, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
NB = ILAENV( 1, 'DGETRI', ' ', N, -1, -1, -1 )
LWKOPT = N*NB
WORK( 1 ) = LWKOPT
LQUERY = ( LWORK.EQ.-1 )
IF( N.LT.0 ) THEN
INFO = -1
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
INFO = -3
ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
INFO = -6
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGETRI', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
RETURN
END IF
*
* Quick return if possible
*
IF( N.EQ.0 )
$ RETURN
*
* Form inv(U). If INFO > 0 from DTRTRI, then U is singular,
* and the inverse is not computed.
*
CALL DTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO )
IF( INFO.GT.0 )
$ RETURN
*
NBMIN = 2
LDWORK = N
IF( NB.GT.1 .AND. NB.LT.N ) THEN
IWS = MAX( LDWORK*NB, 1 )
IF( LWORK.LT.IWS ) THEN
NB = LWORK / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'DGETRI', ' ', N, -1, -1, -1 ) )
END IF
ELSE
IWS = N
END IF
*
* Solve the equation inv(A)*L = inv(U) for inv(A).
*
IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN
*
* Use unblocked code.
*
DO 20 J = N, 1, -1
*
* Copy current column of L to WORK and replace with zeros.
*
DO 10 I = J + 1, N
WORK( I ) = A( I, J )
A( I, J ) = ZERO
10 CONTINUE
*
* Compute current column of inv(A).
*
IF( J.LT.N )
$ CALL DGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ),
$ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 )
20 CONTINUE
ELSE
*
* Use blocked code.
*
NN = ( ( N-1 ) / NB )*NB + 1
DO 50 J = NN, 1, -NB
JB = MIN( NB, N-J+1 )
*
* Copy current block column of L to WORK and replace with
* zeros.
*
DO 40 JJ = J, J + JB - 1
DO 30 I = JJ + 1, N
WORK( I+( JJ-J )*LDWORK ) = A( I, JJ )
A( I, JJ ) = ZERO
30 CONTINUE
40 CONTINUE
*
* Compute current block column of inv(A).
*
IF( J+JB.LE.N )
$ CALL DGEMM( 'No transpose', 'No transpose', N, JB,
$ N-J-JB+1, -ONE, A( 1, J+JB ), LDA,
$ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA )
CALL DTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB,
$ ONE, WORK( J ), LDWORK, A( 1, J ), LDA )
50 CONTINUE
END IF
*
* Apply column interchanges.
*
DO 60 J = N - 1, 1, -1
JP = IPIV( J )
IF( JP.NE.J )
$ CALL DSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 )
60 CONTINUE
*
WORK( 1 ) = IWS
RETURN
*
* End of DGETRI
*
END

65
lib/linalg/dgetrs.cpp Normal file
View File

@ -0,0 +1,65 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
static doublereal c_b12 = 1.;
static integer c_n1 = -1;
int dgetrs_(char *trans, integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv,
doublereal *b, integer *ldb, integer *info, ftnlen trans_len)
{
integer a_dim1, a_offset, b_dim1, b_offset, i__1;
extern logical lsame_(char *, char *, ftnlen, ftnlen);
extern int dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *,
doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen,
ftnlen),
xerbla_(char *, integer *, ftnlen),
dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *);
logical notran;
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--ipiv;
b_dim1 = *ldb;
b_offset = 1 + b_dim1;
b -= b_offset;
*info = 0;
notran = lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1);
if (!notran && !lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1) &&
!lsame_(trans, (char *)"C", (ftnlen)1, (ftnlen)1)) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*nrhs < 0) {
*info = -3;
} else if (*lda < max(1, *n)) {
*info = -5;
} else if (*ldb < max(1, *n)) {
*info = -8;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DGETRS", &i__1, (ftnlen)6);
return 0;
}
if (*n == 0 || *nrhs == 0) {
return 0;
}
if (notran) {
dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1);
dtrsm_((char *)"Left", (char *)"Lower", (char *)"No transpose", (char *)"Unit", n, nrhs, &c_b12, &a[a_offset], lda,
&b[b_offset], ldb, (ftnlen)4, (ftnlen)5, (ftnlen)12, (ftnlen)4);
dtrsm_((char *)"Left", (char *)"Upper", (char *)"No transpose", (char *)"Non-unit", n, nrhs, &c_b12, &a[a_offset], lda,
&b[b_offset], ldb, (ftnlen)4, (ftnlen)5, (ftnlen)12, (ftnlen)8);
} else {
dtrsm_((char *)"Left", (char *)"Upper", (char *)"Transpose", (char *)"Non-unit", n, nrhs, &c_b12, &a[a_offset], lda,
&b[b_offset], ldb, (ftnlen)4, (ftnlen)5, (ftnlen)9, (ftnlen)8);
dtrsm_((char *)"Left", (char *)"Lower", (char *)"Transpose", (char *)"Unit", n, nrhs, &c_b12, &a[a_offset], lda,
&b[b_offset], ldb, (ftnlen)4, (ftnlen)5, (ftnlen)9, (ftnlen)4);
dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1);
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,222 +0,0 @@
*> \brief \b DGETRS
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGETRS + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetrs.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetrs.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetrs.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
*
* .. Scalar Arguments ..
* CHARACTER TRANS
* INTEGER INFO, LDA, LDB, N, NRHS
* ..
* .. Array Arguments ..
* INTEGER IPIV( * )
* DOUBLE PRECISION A( LDA, * ), B( LDB, * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGETRS solves a system of linear equations
*> A * X = B or A**T * X = B
*> with a general N-by-N matrix A using the LU factorization computed
*> by DGETRF.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] TRANS
*> \verbatim
*> TRANS is CHARACTER*1
*> Specifies the form of the system of equations:
*> = 'N': A * X = B (No transpose)
*> = 'T': A**T* X = B (Transpose)
*> = 'C': A**T* X = B (Conjugate transpose = Transpose)
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The order of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in] NRHS
*> \verbatim
*> NRHS is INTEGER
*> The number of right hand sides, i.e., the number of columns
*> of the matrix B. NRHS >= 0.
*> \endverbatim
*>
*> \param[in] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> The factors L and U from the factorization A = P*L*U
*> as computed by DGETRF.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,N).
*> \endverbatim
*>
*> \param[in] IPIV
*> \verbatim
*> IPIV is INTEGER array, dimension (N)
*> The pivot indices from DGETRF; for 1<=i<=N, row i of the
*> matrix was interchanged with row IPIV(i).
*> \endverbatim
*>
*> \param[in,out] B
*> \verbatim
*> B is DOUBLE PRECISION array, dimension (LDB,NRHS)
*> On entry, the right hand side matrix B.
*> On exit, the solution matrix X.
*> \endverbatim
*>
*> \param[in] LDB
*> \verbatim
*> LDB is INTEGER
*> The leading dimension of the array B. LDB >= max(1,N).
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEcomputational
*
* =====================================================================
SUBROUTINE DGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
CHARACTER TRANS
INTEGER INFO, LDA, LDB, N, NRHS
* ..
* .. Array Arguments ..
INTEGER IPIV( * )
DOUBLE PRECISION A( LDA, * ), B( LDB, * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
LOGICAL NOTRAN
* ..
* .. External Functions ..
LOGICAL LSAME
EXTERNAL LSAME
* ..
* .. External Subroutines ..
EXTERNAL DLASWP, DTRSM, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
NOTRAN = LSAME( TRANS, 'N' )
IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
$ LSAME( TRANS, 'C' ) ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( NRHS.LT.0 ) THEN
INFO = -3
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
INFO = -5
ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
INFO = -8
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGETRS', -INFO )
RETURN
END IF
*
* Quick return if possible
*
IF( N.EQ.0 .OR. NRHS.EQ.0 )
$ RETURN
*
IF( NOTRAN ) THEN
*
* Solve A * X = B.
*
* Apply row interchanges to the right hand sides.
*
CALL DLASWP( NRHS, B, LDB, 1, N, IPIV, 1 )
*
* Solve L*X = B, overwriting B with X.
*
CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', N, NRHS,
$ ONE, A, LDA, B, LDB )
*
* Solve U*X = B, overwriting B with X.
*
CALL DTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N,
$ NRHS, ONE, A, LDA, B, LDB )
ELSE
*
* Solve A**T * X = B.
*
* Solve U**T *X = B, overwriting B with X.
*
CALL DTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, NRHS,
$ ONE, A, LDA, B, LDB )
*
* Solve L**T *X = B, overwriting B with X.
*
CALL DTRSM( 'Left', 'Lower', 'Transpose', 'Unit', N, NRHS, ONE,
$ A, LDA, B, LDB )
*
* Apply row interchanges to the solution vectors.
*
CALL DLASWP( NRHS, B, LDB, 1, N, IPIV, -1 )
END IF
*
RETURN
*
* End of DGETRS
*
END

14
lib/linalg/disnan.cpp Normal file
View File

@ -0,0 +1,14 @@
#include <cmath>
extern "C" {
#include "lmp_f2c.h"
logical disnan_(const doublereal *din)
{
if (!din) return TRUE_;
return std::isnan(*din) ? TRUE_ : FALSE_;
}
}

View File

@ -1,77 +0,0 @@
*> \brief \b DISNAN tests input for NaN.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DISNAN + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/disnan.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/disnan.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/disnan.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* LOGICAL FUNCTION DISNAN( DIN )
*
* .. Scalar Arguments ..
* DOUBLE PRECISION, INTENT(IN) :: DIN
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DISNAN returns .TRUE. if its argument is NaN, and .FALSE.
*> otherwise. To be replaced by the Fortran 2003 intrinsic in the
*> future.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] DIN
*> \verbatim
*> DIN is DOUBLE PRECISION
*> Input to test for NaN.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup OTHERauxiliary
*
* =====================================================================
LOGICAL FUNCTION DISNAN( DIN )
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
DOUBLE PRECISION, INTENT(IN) :: DIN
* ..
*
* =====================================================================
*
* .. External Functions ..
LOGICAL DLAISNAN
EXTERNAL DLAISNAN
* ..
* .. Executable Statements ..
DISNAN = DLAISNAN(DIN,DIN)
RETURN
END

16
lib/linalg/dlabad.cpp Normal file
View File

@ -0,0 +1,16 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dlabad_(doublereal *small, doublereal *large)
{
double d_lmp_lg10(doublereal *), sqrt(doublereal);
if (d_lmp_lg10(large) > 2e3) {
*small = sqrt(*small);
*large = sqrt(*large);
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,102 +0,0 @@
*> \brief \b DLABAD
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLABAD + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlabad.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlabad.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlabad.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLABAD( SMALL, LARGE )
*
* .. Scalar Arguments ..
* DOUBLE PRECISION LARGE, SMALL
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DLABAD takes as input the values computed by DLAMCH for underflow and
*> overflow, and returns the square root of each of these values if the
*> log of LARGE is sufficiently large. This subroutine is intended to
*> identify machines with a large exponent range, such as the Crays, and
*> redefine the underflow and overflow limits to be the square roots of
*> the values computed by DLAMCH. This subroutine is needed because
*> DLAMCH does not compensate for poor arithmetic in the upper half of
*> the exponent range, as is found on a Cray.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in,out] SMALL
*> \verbatim
*> SMALL is DOUBLE PRECISION
*> On entry, the underflow threshold as computed by DLAMCH.
*> On exit, if LOG10(LARGE) is sufficiently large, the square
*> root of SMALL, otherwise unchanged.
*> \endverbatim
*>
*> \param[in,out] LARGE
*> \verbatim
*> LARGE is DOUBLE PRECISION
*> On entry, the overflow threshold as computed by DLAMCH.
*> On exit, if LOG10(LARGE) is sufficiently large, the square
*> root of LARGE, otherwise unchanged.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup OTHERauxiliary
*
* =====================================================================
SUBROUTINE DLABAD( SMALL, LARGE )
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
DOUBLE PRECISION LARGE, SMALL
* ..
*
* =====================================================================
*
* .. Intrinsic Functions ..
INTRINSIC LOG10, SQRT
* ..
* .. Executable Statements ..
*
* If it looks like we're on a Cray, take the square root of
* SMALL and LARGE to avoid overflow and underflow problems.
*
IF( LOG10( LARGE ).GT.2000.D0 ) THEN
SMALL = SQRT( SMALL )
LARGE = SQRT( LARGE )
END IF
*
RETURN
*
* End of DLABAD
*
END

210
lib/linalg/dlabrd.cpp Normal file
View File

@ -0,0 +1,210 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static doublereal c_b4 = -1.;
static doublereal c_b5 = 1.;
static integer c__1 = 1;
static doublereal c_b16 = 0.;
int dlabrd_(integer *m, integer *n, integer *nb, doublereal *a, integer *lda, doublereal *d__,
doublereal *e, doublereal *tauq, doublereal *taup, doublereal *x, integer *ldx,
doublereal *y, integer *ldy)
{
integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2, i__3;
integer i__;
extern int dscal_(integer *, doublereal *, doublereal *, integer *),
dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *,
integer *, doublereal *, doublereal *, integer *, ftnlen),
dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *);
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--d__;
--e;
--tauq;
--taup;
x_dim1 = *ldx;
x_offset = 1 + x_dim1;
x -= x_offset;
y_dim1 = *ldy;
y_offset = 1 + y_dim1;
y -= y_offset;
if (*m <= 0 || *n <= 0) {
return 0;
}
if (*m >= *n) {
i__1 = *nb;
for (i__ = 1; i__ <= i__1; ++i__) {
i__2 = *m - i__ + 1;
i__3 = i__ - 1;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &a[i__ + a_dim1], lda, &y[i__ + y_dim1],
ldy, &c_b5, &a[i__ + i__ * a_dim1], &c__1, (ftnlen)12);
i__2 = *m - i__ + 1;
i__3 = i__ - 1;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &x[i__ + x_dim1], ldx, &a[i__ * a_dim1 + 1],
&c__1, &c_b5, &a[i__ + i__ * a_dim1], &c__1, (ftnlen)12);
i__2 = *m - i__ + 1;
i__3 = i__ + 1;
dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3, *m) + i__ * a_dim1], &c__1,
&tauq[i__]);
d__[i__] = a[i__ + i__ * a_dim1];
if (i__ < *n) {
a[i__ + i__ * a_dim1] = 1.;
i__2 = *m - i__ + 1;
i__3 = *n - i__;
dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[i__ + (i__ + 1) * a_dim1], lda,
&a[i__ + i__ * a_dim1], &c__1, &c_b16, &y[i__ + 1 + i__ * y_dim1], &c__1,
(ftnlen)9);
i__2 = *m - i__ + 1;
i__3 = i__ - 1;
dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[i__ + a_dim1], lda,
&a[i__ + i__ * a_dim1], &c__1, &c_b16, &y[i__ * y_dim1 + 1], &c__1,
(ftnlen)9);
i__2 = *n - i__;
i__3 = i__ - 1;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &y[i__ + 1 + y_dim1], ldy,
&y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[i__ + 1 + i__ * y_dim1], &c__1,
(ftnlen)12);
i__2 = *m - i__ + 1;
i__3 = i__ - 1;
dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &x[i__ + x_dim1], ldx,
&a[i__ + i__ * a_dim1], &c__1, &c_b16, &y[i__ * y_dim1 + 1], &c__1,
(ftnlen)9);
i__2 = i__ - 1;
i__3 = *n - i__;
dgemv_((char *)"Transpose", &i__2, &i__3, &c_b4, &a[(i__ + 1) * a_dim1 + 1], lda,
&y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[i__ + 1 + i__ * y_dim1], &c__1,
(ftnlen)9);
i__2 = *n - i__;
dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
i__2 = *n - i__;
dgemv_((char *)"No transpose", &i__2, &i__, &c_b4, &y[i__ + 1 + y_dim1], ldy,
&a[i__ + a_dim1], lda, &c_b5, &a[i__ + (i__ + 1) * a_dim1], lda, (ftnlen)12);
i__2 = i__ - 1;
i__3 = *n - i__;
dgemv_((char *)"Transpose", &i__2, &i__3, &c_b4, &a[(i__ + 1) * a_dim1 + 1], lda,
&x[i__ + x_dim1], ldx, &c_b5, &a[i__ + (i__ + 1) * a_dim1], lda, (ftnlen)9);
i__2 = *n - i__;
i__3 = i__ + 2;
dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min(i__3, *n) * a_dim1], lda,
&taup[i__]);
e[i__] = a[i__ + (i__ + 1) * a_dim1];
a[i__ + (i__ + 1) * a_dim1] = 1.;
i__2 = *m - i__;
i__3 = *n - i__;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + (i__ + 1) * a_dim1], lda,
&a[i__ + (i__ + 1) * a_dim1], lda, &c_b16, &x[i__ + 1 + i__ * x_dim1], &c__1,
(ftnlen)12);
i__2 = *n - i__;
dgemv_((char *)"Transpose", &i__2, &i__, &c_b5, &y[i__ + 1 + y_dim1], ldy,
&a[i__ + (i__ + 1) * a_dim1], lda, &c_b16, &x[i__ * x_dim1 + 1], &c__1,
(ftnlen)9);
i__2 = *m - i__;
dgemv_((char *)"No transpose", &i__2, &i__, &c_b4, &a[i__ + 1 + a_dim1], lda,
&x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[i__ + 1 + i__ * x_dim1], &c__1,
(ftnlen)12);
i__2 = i__ - 1;
i__3 = *n - i__;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &a[(i__ + 1) * a_dim1 + 1], lda,
&a[i__ + (i__ + 1) * a_dim1], lda, &c_b16, &x[i__ * x_dim1 + 1], &c__1,
(ftnlen)12);
i__2 = *m - i__;
i__3 = i__ - 1;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &x[i__ + 1 + x_dim1], ldx,
&x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[i__ + 1 + i__ * x_dim1], &c__1,
(ftnlen)12);
i__2 = *m - i__;
dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
}
}
} else {
i__1 = *nb;
for (i__ = 1; i__ <= i__1; ++i__) {
i__2 = *n - i__ + 1;
i__3 = i__ - 1;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &y[i__ + y_dim1], ldy, &a[i__ + a_dim1],
lda, &c_b5, &a[i__ + i__ * a_dim1], lda, (ftnlen)12);
i__2 = i__ - 1;
i__3 = *n - i__ + 1;
dgemv_((char *)"Transpose", &i__2, &i__3, &c_b4, &a[i__ * a_dim1 + 1], lda, &x[i__ + x_dim1],
ldx, &c_b5, &a[i__ + i__ * a_dim1], lda, (ftnlen)9);
i__2 = *n - i__ + 1;
i__3 = i__ + 1;
dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3, *n) * a_dim1], lda,
&taup[i__]);
d__[i__] = a[i__ + i__ * a_dim1];
if (i__ < *m) {
a[i__ + i__ * a_dim1] = 1.;
i__2 = *m - i__;
i__3 = *n - i__ + 1;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + i__ * a_dim1], lda,
&a[i__ + i__ * a_dim1], lda, &c_b16, &x[i__ + 1 + i__ * x_dim1], &c__1,
(ftnlen)12);
i__2 = *n - i__ + 1;
i__3 = i__ - 1;
dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &y[i__ + y_dim1], ldy,
&a[i__ + i__ * a_dim1], lda, &c_b16, &x[i__ * x_dim1 + 1], &c__1, (ftnlen)9);
i__2 = *m - i__;
i__3 = i__ - 1;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &a[i__ + 1 + a_dim1], lda,
&x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[i__ + 1 + i__ * x_dim1], &c__1,
(ftnlen)12);
i__2 = i__ - 1;
i__3 = *n - i__ + 1;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &a[i__ * a_dim1 + 1], lda,
&a[i__ + i__ * a_dim1], lda, &c_b16, &x[i__ * x_dim1 + 1], &c__1,
(ftnlen)12);
i__2 = *m - i__;
i__3 = i__ - 1;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &x[i__ + 1 + x_dim1], ldx,
&x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[i__ + 1 + i__ * x_dim1], &c__1,
(ftnlen)12);
i__2 = *m - i__;
dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
i__2 = *m - i__;
i__3 = i__ - 1;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &a[i__ + 1 + a_dim1], lda,
&y[i__ + y_dim1], ldy, &c_b5, &a[i__ + 1 + i__ * a_dim1], &c__1, (ftnlen)12);
i__2 = *m - i__;
dgemv_((char *)"No transpose", &i__2, &i__, &c_b4, &x[i__ + 1 + x_dim1], ldx,
&a[i__ * a_dim1 + 1], &c__1, &c_b5, &a[i__ + 1 + i__ * a_dim1], &c__1,
(ftnlen)12);
i__2 = *m - i__;
i__3 = i__ + 2;
dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3, *m) + i__ * a_dim1], &c__1,
&tauq[i__]);
e[i__] = a[i__ + 1 + i__ * a_dim1];
a[i__ + 1 + i__ * a_dim1] = 1.;
i__2 = *m - i__;
i__3 = *n - i__;
dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + (i__ + 1) * a_dim1], lda,
&a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &y[i__ + 1 + i__ * y_dim1], &c__1,
(ftnlen)9);
i__2 = *m - i__;
i__3 = i__ - 1;
dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + a_dim1], lda,
&a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &y[i__ * y_dim1 + 1], &c__1,
(ftnlen)9);
i__2 = *n - i__;
i__3 = i__ - 1;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &y[i__ + 1 + y_dim1], ldy,
&y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[i__ + 1 + i__ * y_dim1], &c__1,
(ftnlen)12);
i__2 = *m - i__;
dgemv_((char *)"Transpose", &i__2, &i__, &c_b5, &x[i__ + 1 + x_dim1], ldx,
&a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &y[i__ * y_dim1 + 1], &c__1,
(ftnlen)9);
i__2 = *n - i__;
dgemv_((char *)"Transpose", &i__, &i__2, &c_b4, &a[(i__ + 1) * a_dim1 + 1], lda,
&y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[i__ + 1 + i__ * y_dim1], &c__1,
(ftnlen)9);
i__2 = *n - i__;
dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
}
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,378 +0,0 @@
*> \brief \b DLABRD reduces the first nb rows and columns of a general matrix to a bidiagonal form.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLABRD + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlabrd.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlabrd.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlabrd.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y,
* LDY )
*
* .. Scalar Arguments ..
* INTEGER LDA, LDX, LDY, M, N, NB
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ),
* $ TAUQ( * ), X( LDX, * ), Y( LDY, * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DLABRD reduces the first NB rows and columns of a real general
*> m by n matrix A to upper or lower bidiagonal form by an orthogonal
*> transformation Q**T * A * P, and returns the matrices X and Y which
*> are needed to apply the transformation to the unreduced part of A.
*>
*> If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower
*> bidiagonal form.
*>
*> This is an auxiliary routine called by DGEBRD
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows in the matrix A.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns in the matrix A.
*> \endverbatim
*>
*> \param[in] NB
*> \verbatim
*> NB is INTEGER
*> The number of leading rows and columns of A to be reduced.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> On entry, the m by n general matrix to be reduced.
*> On exit, the first NB rows and columns of the matrix are
*> overwritten; the rest of the array is unchanged.
*> If m >= n, elements on and below the diagonal in the first NB
*> columns, with the array TAUQ, represent the orthogonal
*> matrix Q as a product of elementary reflectors; and
*> elements above the diagonal in the first NB rows, with the
*> array TAUP, represent the orthogonal matrix P as a product
*> of elementary reflectors.
*> If m < n, elements below the diagonal in the first NB
*> columns, with the array TAUQ, represent the orthogonal
*> matrix Q as a product of elementary reflectors, and
*> elements on and above the diagonal in the first NB rows,
*> with the array TAUP, represent the orthogonal matrix P as
*> a product of elementary reflectors.
*> See Further Details.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] D
*> \verbatim
*> D is DOUBLE PRECISION array, dimension (NB)
*> The diagonal elements of the first NB rows and columns of
*> the reduced matrix. D(i) = A(i,i).
*> \endverbatim
*>
*> \param[out] E
*> \verbatim
*> E is DOUBLE PRECISION array, dimension (NB)
*> The off-diagonal elements of the first NB rows and columns of
*> the reduced matrix.
*> \endverbatim
*>
*> \param[out] TAUQ
*> \verbatim
*> TAUQ is DOUBLE PRECISION array, dimension (NB)
*> The scalar factors of the elementary reflectors which
*> represent the orthogonal matrix Q. See Further Details.
*> \endverbatim
*>
*> \param[out] TAUP
*> \verbatim
*> TAUP is DOUBLE PRECISION array, dimension (NB)
*> The scalar factors of the elementary reflectors which
*> represent the orthogonal matrix P. See Further Details.
*> \endverbatim
*>
*> \param[out] X
*> \verbatim
*> X is DOUBLE PRECISION array, dimension (LDX,NB)
*> The m-by-nb matrix X required to update the unreduced part
*> of A.
*> \endverbatim
*>
*> \param[in] LDX
*> \verbatim
*> LDX is INTEGER
*> The leading dimension of the array X. LDX >= max(1,M).
*> \endverbatim
*>
*> \param[out] Y
*> \verbatim
*> Y is DOUBLE PRECISION array, dimension (LDY,NB)
*> The n-by-nb matrix Y required to update the unreduced part
*> of A.
*> \endverbatim
*>
*> \param[in] LDY
*> \verbatim
*> LDY is INTEGER
*> The leading dimension of the array Y. LDY >= max(1,N).
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleOTHERauxiliary
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> The matrices Q and P are represented as products of elementary
*> reflectors:
*>
*> Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb)
*>
*> Each H(i) and G(i) has the form:
*>
*> H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T
*>
*> where tauq and taup are real scalars, and v and u are real vectors.
*>
*> If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in
*> A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in
*> A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
*>
*> If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in
*> A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in
*> A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
*>
*> The elements of the vectors v and u together form the m-by-nb matrix
*> V and the nb-by-n matrix U**T which are needed, with X and Y, to apply
*> the transformation to the unreduced part of the matrix, using a block
*> update of the form: A := A - V*Y**T - X*U**T.
*>
*> The contents of A on exit are illustrated by the following examples
*> with nb = 2:
*>
*> m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
*>
*> ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 )
*> ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 )
*> ( v1 v2 a a a ) ( v1 1 a a a a )
*> ( v1 v2 a a a ) ( v1 v2 a a a a )
*> ( v1 v2 a a a ) ( v1 v2 a a a a )
*> ( v1 v2 a a a )
*>
*> where a denotes an element of the original matrix which is unchanged,
*> vi denotes an element of the vector defining H(i), and ui an element
*> of the vector defining G(i).
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y,
$ LDY )
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER LDA, LDX, LDY, M, N, NB
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ),
$ TAUQ( * ), X( LDX, * ), Y( LDY, * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
* ..
* .. Local Scalars ..
INTEGER I
* ..
* .. External Subroutines ..
EXTERNAL DGEMV, DLARFG, DSCAL
* ..
* .. Intrinsic Functions ..
INTRINSIC MIN
* ..
* .. Executable Statements ..
*
* Quick return if possible
*
IF( M.LE.0 .OR. N.LE.0 )
$ RETURN
*
IF( M.GE.N ) THEN
*
* Reduce to upper bidiagonal form
*
DO 10 I = 1, NB
*
* Update A(i:m,i)
*
CALL DGEMV( 'No transpose', M-I+1, I-1, -ONE, A( I, 1 ),
$ LDA, Y( I, 1 ), LDY, ONE, A( I, I ), 1 )
CALL DGEMV( 'No transpose', M-I+1, I-1, -ONE, X( I, 1 ),
$ LDX, A( 1, I ), 1, ONE, A( I, I ), 1 )
*
* Generate reflection Q(i) to annihilate A(i+1:m,i)
*
CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1,
$ TAUQ( I ) )
D( I ) = A( I, I )
IF( I.LT.N ) THEN
A( I, I ) = ONE
*
* Compute Y(i+1:n,i)
*
CALL DGEMV( 'Transpose', M-I+1, N-I, ONE, A( I, I+1 ),
$ LDA, A( I, I ), 1, ZERO, Y( I+1, I ), 1 )
CALL DGEMV( 'Transpose', M-I+1, I-1, ONE, A( I, 1 ), LDA,
$ A( I, I ), 1, ZERO, Y( 1, I ), 1 )
CALL DGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ),
$ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
CALL DGEMV( 'Transpose', M-I+1, I-1, ONE, X( I, 1 ), LDX,
$ A( I, I ), 1, ZERO, Y( 1, I ), 1 )
CALL DGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ),
$ LDA, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
CALL DSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 )
*
* Update A(i,i+1:n)
*
CALL DGEMV( 'No transpose', N-I, I, -ONE, Y( I+1, 1 ),
$ LDY, A( I, 1 ), LDA, ONE, A( I, I+1 ), LDA )
CALL DGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ),
$ LDA, X( I, 1 ), LDX, ONE, A( I, I+1 ), LDA )
*
* Generate reflection P(i) to annihilate A(i,i+2:n)
*
CALL DLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ),
$ LDA, TAUP( I ) )
E( I ) = A( I, I+1 )
A( I, I+1 ) = ONE
*
* Compute X(i+1:m,i)
*
CALL DGEMV( 'No transpose', M-I, N-I, ONE, A( I+1, I+1 ),
$ LDA, A( I, I+1 ), LDA, ZERO, X( I+1, I ), 1 )
CALL DGEMV( 'Transpose', N-I, I, ONE, Y( I+1, 1 ), LDY,
$ A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 )
CALL DGEMV( 'No transpose', M-I, I, -ONE, A( I+1, 1 ),
$ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
CALL DGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ),
$ LDA, A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 )
CALL DGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ),
$ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
CALL DSCAL( M-I, TAUP( I ), X( I+1, I ), 1 )
END IF
10 CONTINUE
ELSE
*
* Reduce to lower bidiagonal form
*
DO 20 I = 1, NB
*
* Update A(i,i:n)
*
CALL DGEMV( 'No transpose', N-I+1, I-1, -ONE, Y( I, 1 ),
$ LDY, A( I, 1 ), LDA, ONE, A( I, I ), LDA )
CALL DGEMV( 'Transpose', I-1, N-I+1, -ONE, A( 1, I ), LDA,
$ X( I, 1 ), LDX, ONE, A( I, I ), LDA )
*
* Generate reflection P(i) to annihilate A(i,i+1:n)
*
CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA,
$ TAUP( I ) )
D( I ) = A( I, I )
IF( I.LT.M ) THEN
A( I, I ) = ONE
*
* Compute X(i+1:m,i)
*
CALL DGEMV( 'No transpose', M-I, N-I+1, ONE, A( I+1, I ),
$ LDA, A( I, I ), LDA, ZERO, X( I+1, I ), 1 )
CALL DGEMV( 'Transpose', N-I+1, I-1, ONE, Y( I, 1 ), LDY,
$ A( I, I ), LDA, ZERO, X( 1, I ), 1 )
CALL DGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ),
$ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
CALL DGEMV( 'No transpose', I-1, N-I+1, ONE, A( 1, I ),
$ LDA, A( I, I ), LDA, ZERO, X( 1, I ), 1 )
CALL DGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ),
$ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
CALL DSCAL( M-I, TAUP( I ), X( I+1, I ), 1 )
*
* Update A(i+1:m,i)
*
CALL DGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ),
$ LDA, Y( I, 1 ), LDY, ONE, A( I+1, I ), 1 )
CALL DGEMV( 'No transpose', M-I, I, -ONE, X( I+1, 1 ),
$ LDX, A( 1, I ), 1, ONE, A( I+1, I ), 1 )
*
* Generate reflection Q(i) to annihilate A(i+2:m,i)
*
CALL DLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1,
$ TAUQ( I ) )
E( I ) = A( I+1, I )
A( I+1, I ) = ONE
*
* Compute Y(i+1:n,i)
*
CALL DGEMV( 'Transpose', M-I, N-I, ONE, A( I+1, I+1 ),
$ LDA, A( I+1, I ), 1, ZERO, Y( I+1, I ), 1 )
CALL DGEMV( 'Transpose', M-I, I-1, ONE, A( I+1, 1 ), LDA,
$ A( I+1, I ), 1, ZERO, Y( 1, I ), 1 )
CALL DGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ),
$ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
CALL DGEMV( 'Transpose', M-I, I, ONE, X( I+1, 1 ), LDX,
$ A( I+1, I ), 1, ZERO, Y( 1, I ), 1 )
CALL DGEMV( 'Transpose', I, N-I, -ONE, A( 1, I+1 ), LDA,
$ Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
CALL DSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 )
END IF
20 CONTINUE
END IF
RETURN
*
* End of DLABRD
*
END

136
lib/linalg/dlacn2.cpp Normal file
View File

@ -0,0 +1,136 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
int dlacn2_(integer *n, doublereal *v, doublereal *x, integer *isgn, doublereal *est, integer *kase,
integer *isave)
{
integer i__1;
doublereal d__1;
integer i_lmp_dnnt(doublereal *);
integer i__;
doublereal xs, temp;
extern doublereal dasum_(integer *, doublereal *, integer *);
integer jlast;
extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *);
extern integer idamax_(integer *, doublereal *, integer *);
doublereal altsgn, estold;
--isave;
--isgn;
--x;
--v;
if (*kase == 0) {
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
x[i__] = 1. / (doublereal)(*n);
}
*kase = 1;
isave[1] = 1;
return 0;
}
switch (isave[1]) {
case 1:
goto L20;
case 2:
goto L40;
case 3:
goto L70;
case 4:
goto L110;
case 5:
goto L140;
}
L20:
if (*n == 1) {
v[1] = x[1];
*est = abs(v[1]);
goto L150;
}
*est = dasum_(n, &x[1], &c__1);
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
if (x[i__] >= 0.) {
x[i__] = 1.;
} else {
x[i__] = -1.;
}
isgn[i__] = i_lmp_dnnt(&x[i__]);
}
*kase = 2;
isave[1] = 2;
return 0;
L40:
isave[2] = idamax_(n, &x[1], &c__1);
isave[3] = 2;
L50:
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
x[i__] = 0.;
}
x[isave[2]] = 1.;
*kase = 1;
isave[1] = 3;
return 0;
L70:
dcopy_(n, &x[1], &c__1, &v[1], &c__1);
estold = *est;
*est = dasum_(n, &v[1], &c__1);
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
if (x[i__] >= 0.) {
xs = 1.;
} else {
xs = -1.;
}
if (i_lmp_dnnt(&xs) != isgn[i__]) {
goto L90;
}
}
goto L120;
L90:
if (*est <= estold) {
goto L120;
}
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
if (x[i__] >= 0.) {
x[i__] = 1.;
} else {
x[i__] = -1.;
}
isgn[i__] = i_lmp_dnnt(&x[i__]);
}
*kase = 2;
isave[1] = 4;
return 0;
L110:
jlast = isave[2];
isave[2] = idamax_(n, &x[1], &c__1);
if (x[jlast] != (d__1 = x[isave[2]], abs(d__1)) && isave[3] < 5) {
++isave[3];
goto L50;
}
L120:
altsgn = 1.;
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
x[i__] = altsgn * ((doublereal)(i__ - 1) / (doublereal)(*n - 1) + 1.);
altsgn = -altsgn;
}
*kase = 1;
isave[1] = 5;
return 0;
L140:
temp = dasum_(n, &x[1], &c__1) / (doublereal)(*n * 3) * 2.;
if (temp > *est) {
dcopy_(n, &x[1], &c__1, &v[1], &c__1);
*est = temp;
}
L150:
*kase = 0;
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,304 +0,0 @@
*> \brief \b DLACN2 estimates the 1-norm of a square matrix, using reverse communication for evaluating matrix-vector products.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLACN2 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlacn2.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlacn2.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlacn2.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLACN2( N, V, X, ISGN, EST, KASE, ISAVE )
*
* .. Scalar Arguments ..
* INTEGER KASE, N
* DOUBLE PRECISION EST
* ..
* .. Array Arguments ..
* INTEGER ISGN( * ), ISAVE( 3 )
* DOUBLE PRECISION V( * ), X( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DLACN2 estimates the 1-norm of a square, real matrix A.
*> Reverse communication is used for evaluating matrix-vector products.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The order of the matrix. N >= 1.
*> \endverbatim
*>
*> \param[out] V
*> \verbatim
*> V is DOUBLE PRECISION array, dimension (N)
*> On the final return, V = A*W, where EST = norm(V)/norm(W)
*> (W is not returned).
*> \endverbatim
*>
*> \param[in,out] X
*> \verbatim
*> X is DOUBLE PRECISION array, dimension (N)
*> On an intermediate return, X should be overwritten by
*> A * X, if KASE=1,
*> A**T * X, if KASE=2,
*> and DLACN2 must be re-called with all the other parameters
*> unchanged.
*> \endverbatim
*>
*> \param[out] ISGN
*> \verbatim
*> ISGN is INTEGER array, dimension (N)
*> \endverbatim
*>
*> \param[in,out] EST
*> \verbatim
*> EST is DOUBLE PRECISION
*> On entry with KASE = 1 or 2 and ISAVE(1) = 3, EST should be
*> unchanged from the previous call to DLACN2.
*> On exit, EST is an estimate (a lower bound) for norm(A).
*> \endverbatim
*>
*> \param[in,out] KASE
*> \verbatim
*> KASE is INTEGER
*> On the initial call to DLACN2, KASE should be 0.
*> On an intermediate return, KASE will be 1 or 2, indicating
*> whether X should be overwritten by A * X or A**T * X.
*> On the final return from DLACN2, KASE will again be 0.
*> \endverbatim
*>
*> \param[in,out] ISAVE
*> \verbatim
*> ISAVE is INTEGER array, dimension (3)
*> ISAVE is used to save variables between calls to DLACN2
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleOTHERauxiliary
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> Originally named SONEST, dated March 16, 1988.
*>
*> This is a thread safe version of DLACON, which uses the array ISAVE
*> in place of a SAVE statement, as follows:
*>
*> DLACON DLACN2
*> JUMP ISAVE(1)
*> J ISAVE(2)
*> ITER ISAVE(3)
*> \endverbatim
*
*> \par Contributors:
* ==================
*>
*> Nick Higham, University of Manchester
*
*> \par References:
* ================
*>
*> N.J. Higham, "FORTRAN codes for estimating the one-norm of
*> a real or complex matrix, with applications to condition estimation",
*> ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.
*>
* =====================================================================
SUBROUTINE DLACN2( N, V, X, ISGN, EST, KASE, ISAVE )
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER KASE, N
DOUBLE PRECISION EST
* ..
* .. Array Arguments ..
INTEGER ISGN( * ), ISAVE( 3 )
DOUBLE PRECISION V( * ), X( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
INTEGER ITMAX
PARAMETER ( ITMAX = 5 )
DOUBLE PRECISION ZERO, ONE, TWO
PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 )
* ..
* .. Local Scalars ..
INTEGER I, JLAST
DOUBLE PRECISION ALTSGN, ESTOLD, TEMP, XS
* ..
* .. External Functions ..
INTEGER IDAMAX
DOUBLE PRECISION DASUM
EXTERNAL IDAMAX, DASUM
* ..
* .. External Subroutines ..
EXTERNAL DCOPY
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, DBLE, NINT
* ..
* .. Executable Statements ..
*
IF( KASE.EQ.0 ) THEN
DO 10 I = 1, N
X( I ) = ONE / DBLE( N )
10 CONTINUE
KASE = 1
ISAVE( 1 ) = 1
RETURN
END IF
*
GO TO ( 20, 40, 70, 110, 140 )ISAVE( 1 )
*
* ................ ENTRY (ISAVE( 1 ) = 1)
* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X.
*
20 CONTINUE
IF( N.EQ.1 ) THEN
V( 1 ) = X( 1 )
EST = ABS( V( 1 ) )
* ... QUIT
GO TO 150
END IF
EST = DASUM( N, X, 1 )
*
DO 30 I = 1, N
IF( X(I).GE.ZERO ) THEN
X(I) = ONE
ELSE
X(I) = -ONE
END IF
ISGN( I ) = NINT( X( I ) )
30 CONTINUE
KASE = 2
ISAVE( 1 ) = 2
RETURN
*
* ................ ENTRY (ISAVE( 1 ) = 2)
* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X.
*
40 CONTINUE
ISAVE( 2 ) = IDAMAX( N, X, 1 )
ISAVE( 3 ) = 2
*
* MAIN LOOP - ITERATIONS 2,3,...,ITMAX.
*
50 CONTINUE
DO 60 I = 1, N
X( I ) = ZERO
60 CONTINUE
X( ISAVE( 2 ) ) = ONE
KASE = 1
ISAVE( 1 ) = 3
RETURN
*
* ................ ENTRY (ISAVE( 1 ) = 3)
* X HAS BEEN OVERWRITTEN BY A*X.
*
70 CONTINUE
CALL DCOPY( N, X, 1, V, 1 )
ESTOLD = EST
EST = DASUM( N, V, 1 )
DO 80 I = 1, N
IF( X(I).GE.ZERO ) THEN
XS = ONE
ELSE
XS = -ONE
END IF
IF( NINT( XS ).NE.ISGN( I ) )
$ GO TO 90
80 CONTINUE
* REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED.
GO TO 120
*
90 CONTINUE
* TEST FOR CYCLING.
IF( EST.LE.ESTOLD )
$ GO TO 120
*
DO 100 I = 1, N
IF( X(I).GE.ZERO ) THEN
X(I) = ONE
ELSE
X(I) = -ONE
END IF
ISGN( I ) = NINT( X( I ) )
100 CONTINUE
KASE = 2
ISAVE( 1 ) = 4
RETURN
*
* ................ ENTRY (ISAVE( 1 ) = 4)
* X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X.
*
110 CONTINUE
JLAST = ISAVE( 2 )
ISAVE( 2 ) = IDAMAX( N, X, 1 )
IF( ( X( JLAST ).NE.ABS( X( ISAVE( 2 ) ) ) ) .AND.
$ ( ISAVE( 3 ).LT.ITMAX ) ) THEN
ISAVE( 3 ) = ISAVE( 3 ) + 1
GO TO 50
END IF
*
* ITERATION COMPLETE. FINAL STAGE.
*
120 CONTINUE
ALTSGN = ONE
DO 130 I = 1, N
X( I ) = ALTSGN*( ONE+DBLE( I-1 ) / DBLE( N-1 ) )
ALTSGN = -ALTSGN
130 CONTINUE
KASE = 1
ISAVE( 1 ) = 5
RETURN
*
* ................ ENTRY (ISAVE( 1 ) = 5)
* X HAS BEEN OVERWRITTEN BY A*X.
*
140 CONTINUE
TEMP = TWO*( DASUM( N, X, 1 ) / DBLE( 3*N ) )
IF( TEMP.GT.EST ) THEN
CALL DCOPY( N, X, 1, V, 1 )
EST = TEMP
END IF
*
150 CONTINUE
KASE = 0
RETURN
*
* End of DLACN2
*
END

46
lib/linalg/dlacpy.cpp Normal file
View File

@ -0,0 +1,46 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dlacpy_(char *uplo, integer *m, integer *n, doublereal *a, integer *lda, doublereal *b,
integer *ldb, ftnlen uplo_len)
{
integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
integer i__, j;
extern logical lsame_(char *, char *, ftnlen, ftnlen);
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
b_dim1 = *ldb;
b_offset = 1 + b_dim1;
b -= b_offset;
if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
i__2 = min(j, *m);
for (i__ = 1; i__ <= i__2; ++i__) {
b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
}
}
} else if (lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
i__2 = *m;
for (i__ = j; i__ <= i__2; ++i__) {
b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
}
}
} else {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
}
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,153 +0,0 @@
*> \brief \b DLACPY copies all or part of one two-dimensional array to another.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLACPY + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlacpy.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlacpy.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlacpy.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLACPY( UPLO, M, N, A, LDA, B, LDB )
*
* .. Scalar Arguments ..
* CHARACTER UPLO
* INTEGER LDA, LDB, M, N
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A( LDA, * ), B( LDB, * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DLACPY copies all or part of a two-dimensional matrix A to another
*> matrix B.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] UPLO
*> \verbatim
*> UPLO is CHARACTER*1
*> Specifies the part of the matrix A to be copied to B.
*> = 'U': Upper triangular part
*> = 'L': Lower triangular part
*> Otherwise: All of the matrix A
*> \endverbatim
*>
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> The m by n matrix A. If UPLO = 'U', only the upper triangle
*> or trapezoid is accessed; if UPLO = 'L', only the lower
*> triangle or trapezoid is accessed.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] B
*> \verbatim
*> B is DOUBLE PRECISION array, dimension (LDB,N)
*> On exit, B = A in the locations specified by UPLO.
*> \endverbatim
*>
*> \param[in] LDB
*> \verbatim
*> LDB is INTEGER
*> The leading dimension of the array B. LDB >= max(1,M).
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup OTHERauxiliary
*
* =====================================================================
SUBROUTINE DLACPY( UPLO, M, N, A, LDA, B, LDB )
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
CHARACTER UPLO
INTEGER LDA, LDB, M, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), B( LDB, * )
* ..
*
* =====================================================================
*
* .. Local Scalars ..
INTEGER I, J
* ..
* .. External Functions ..
LOGICAL LSAME
EXTERNAL LSAME
* ..
* .. Intrinsic Functions ..
INTRINSIC MIN
* ..
* .. Executable Statements ..
*
IF( LSAME( UPLO, 'U' ) ) THEN
DO 20 J = 1, N
DO 10 I = 1, MIN( J, M )
B( I, J ) = A( I, J )
10 CONTINUE
20 CONTINUE
ELSE IF( LSAME( UPLO, 'L' ) ) THEN
DO 40 J = 1, N
DO 30 I = J, M
B( I, J ) = A( I, J )
30 CONTINUE
40 CONTINUE
ELSE
DO 60 J = 1, N
DO 50 I = 1, M
B( I, J ) = A( I, J )
50 CONTINUE
60 CONTINUE
END IF
RETURN
*
* End of DLACPY
*
END

88
lib/linalg/dladiv.cpp Normal file
View File

@ -0,0 +1,88 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dladiv_(doublereal *a, doublereal *b, doublereal *c__, doublereal *d__, doublereal *p,
doublereal *q)
{
doublereal d__1, d__2;
doublereal s, aa, ab, bb, cc, cd, dd, be, un, ov, eps;
extern doublereal dlamch_(char *, ftnlen);
extern int dladiv1_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *,
doublereal *);
aa = *a;
bb = *b;
cc = *c__;
dd = *d__;
d__1 = abs(*a), d__2 = abs(*b);
ab = max(d__1, d__2);
d__1 = abs(*c__), d__2 = abs(*d__);
cd = max(d__1, d__2);
s = 1.;
ov = dlamch_((char *)"Overflow threshold", (ftnlen)18);
un = dlamch_((char *)"Safe minimum", (ftnlen)12);
eps = dlamch_((char *)"Epsilon", (ftnlen)7);
be = 2. / (eps * eps);
if (ab >= ov * .5) {
aa *= .5;
bb *= .5;
s *= 2.;
}
if (cd >= ov * .5) {
cc *= .5;
dd *= .5;
s *= .5;
}
if (ab <= un * 2. / eps) {
aa *= be;
bb *= be;
s /= be;
}
if (cd <= un * 2. / eps) {
cc *= be;
dd *= be;
s *= be;
}
if (abs(*d__) <= abs(*c__)) {
dladiv1_(&aa, &bb, &cc, &dd, p, q);
} else {
dladiv1_(&bb, &aa, &dd, &cc, p, q);
*q = -(*q);
}
*p *= s;
*q *= s;
return 0;
}
int dladiv1_(doublereal *a, doublereal *b, doublereal *c__, doublereal *d__, doublereal *p,
doublereal *q)
{
doublereal r__, t;
extern doublereal dladiv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *,
doublereal *);
r__ = *d__ / *c__;
t = 1. / (*c__ + *d__ * r__);
*p = dladiv2_(a, b, c__, d__, &r__, &t);
*a = -(*a);
*q = dladiv2_(b, a, c__, d__, &r__, &t);
return 0;
}
doublereal dladiv2_(doublereal *a, doublereal *b, doublereal *c__, doublereal *d__, doublereal *r__,
doublereal *t)
{
doublereal ret_val;
doublereal br;
if (*r__ != 0.) {
br = *b * *r__;
if (br != 0.) {
ret_val = (*a + br) * *t;
} else {
ret_val = *a * *t + *b * *t * *r__;
}
} else {
ret_val = (*a + *d__ * (*b / *c__)) * *t;
}
return ret_val;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,251 +0,0 @@
*> \brief \b DLADIV performs complex division in real arithmetic, avoiding unnecessary overflow.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLADIV + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dladiv.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dladiv.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dladiv.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLADIV( A, B, C, D, P, Q )
*
* .. Scalar Arguments ..
* DOUBLE PRECISION A, B, C, D, P, Q
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DLADIV performs complex division in real arithmetic
*>
*> a + i*b
*> p + i*q = ---------
*> c + i*d
*>
*> The algorithm is due to Michael Baudin and Robert L. Smith
*> and can be found in the paper
*> "A Robust Complex Division in Scilab"
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] A
*> \verbatim
*> A is DOUBLE PRECISION
*> \endverbatim
*>
*> \param[in] B
*> \verbatim
*> B is DOUBLE PRECISION
*> \endverbatim
*>
*> \param[in] C
*> \verbatim
*> C is DOUBLE PRECISION
*> \endverbatim
*>
*> \param[in] D
*> \verbatim
*> D is DOUBLE PRECISION
*> The scalars a, b, c, and d in the above expression.
*> \endverbatim
*>
*> \param[out] P
*> \verbatim
*> P is DOUBLE PRECISION
*> \endverbatim
*>
*> \param[out] Q
*> \verbatim
*> Q is DOUBLE PRECISION
*> The scalars p and q in the above expression.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleOTHERauxiliary
*
* =====================================================================
SUBROUTINE DLADIV( A, B, C, D, P, Q )
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
DOUBLE PRECISION A, B, C, D, P, Q
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION BS
PARAMETER ( BS = 2.0D0 )
DOUBLE PRECISION HALF
PARAMETER ( HALF = 0.5D0 )
DOUBLE PRECISION TWO
PARAMETER ( TWO = 2.0D0 )
*
* .. Local Scalars ..
DOUBLE PRECISION AA, BB, CC, DD, AB, CD, S, OV, UN, BE, EPS
* ..
* .. External Functions ..
DOUBLE PRECISION DLAMCH
EXTERNAL DLAMCH
* ..
* .. External Subroutines ..
EXTERNAL DLADIV1
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, MAX
* ..
* .. Executable Statements ..
*
AA = A
BB = B
CC = C
DD = D
AB = MAX( ABS(A), ABS(B) )
CD = MAX( ABS(C), ABS(D) )
S = 1.0D0
OV = DLAMCH( 'Overflow threshold' )
UN = DLAMCH( 'Safe minimum' )
EPS = DLAMCH( 'Epsilon' )
BE = BS / (EPS*EPS)
IF( AB >= HALF*OV ) THEN
AA = HALF * AA
BB = HALF * BB
S = TWO * S
END IF
IF( CD >= HALF*OV ) THEN
CC = HALF * CC
DD = HALF * DD
S = HALF * S
END IF
IF( AB <= UN*BS/EPS ) THEN
AA = AA * BE
BB = BB * BE
S = S / BE
END IF
IF( CD <= UN*BS/EPS ) THEN
CC = CC * BE
DD = DD * BE
S = S * BE
END IF
IF( ABS( D ).LE.ABS( C ) ) THEN
CALL DLADIV1(AA, BB, CC, DD, P, Q)
ELSE
CALL DLADIV1(BB, AA, DD, CC, P, Q)
Q = -Q
END IF
P = P * S
Q = Q * S
*
RETURN
*
* End of DLADIV
*
END
*> \ingroup doubleOTHERauxiliary
SUBROUTINE DLADIV1( A, B, C, D, P, Q )
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
DOUBLE PRECISION A, B, C, D, P, Q
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D0 )
*
* .. Local Scalars ..
DOUBLE PRECISION R, T
* ..
* .. External Functions ..
DOUBLE PRECISION DLADIV2
EXTERNAL DLADIV2
* ..
* .. Executable Statements ..
*
R = D / C
T = ONE / (C + D * R)
P = DLADIV2(A, B, C, D, R, T)
A = -A
Q = DLADIV2(B, A, C, D, R, T)
*
RETURN
*
* End of DLADIV1
*
END
*> \ingroup doubleOTHERauxiliary
DOUBLE PRECISION FUNCTION DLADIV2( A, B, C, D, R, T )
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
DOUBLE PRECISION A, B, C, D, R, T
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO
PARAMETER ( ZERO = 0.0D0 )
*
* .. Local Scalars ..
DOUBLE PRECISION BR
* ..
* .. Executable Statements ..
*
IF( R.NE.ZERO ) THEN
BR = B * R
IF( BR.NE.ZERO ) THEN
DLADIV2 = (A + BR) * T
ELSE
DLADIV2 = A * T + (B * T) * R
END IF
ELSE
DLADIV2 = (A + D * (B / C)) * T
END IF
*
RETURN
*
* End of DLADIV2
*
END

45
lib/linalg/dlae2.cpp Normal file
View File

@ -0,0 +1,45 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dlae2_(doublereal *a, doublereal *b, doublereal *c__, doublereal *rt1, doublereal *rt2)
{
doublereal d__1;
double sqrt(doublereal);
doublereal ab, df, tb, sm, rt, adf, acmn, acmx;
sm = *a + *c__;
df = *a - *c__;
adf = abs(df);
tb = *b + *b;
ab = abs(tb);
if (abs(*a) > abs(*c__)) {
acmx = *a;
acmn = *c__;
} else {
acmx = *c__;
acmn = *a;
}
if (adf > ab) {
d__1 = ab / adf;
rt = adf * sqrt(d__1 * d__1 + 1.);
} else if (adf < ab) {
d__1 = adf / ab;
rt = ab * sqrt(d__1 * d__1 + 1.);
} else {
rt = ab * sqrt(2.);
}
if (sm < 0.) {
*rt1 = (sm - rt) * .5;
*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
} else if (sm > 0.) {
*rt1 = (sm + rt) * .5;
*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
} else {
*rt1 = rt * .5;
*rt2 = rt * -.5;
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,182 +0,0 @@
*> \brief \b DLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLAE2 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlae2.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlae2.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlae2.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLAE2( A, B, C, RT1, RT2 )
*
* .. Scalar Arguments ..
* DOUBLE PRECISION A, B, C, RT1, RT2
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix
*> [ A B ]
*> [ B C ].
*> On return, RT1 is the eigenvalue of larger absolute value, and RT2
*> is the eigenvalue of smaller absolute value.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] A
*> \verbatim
*> A is DOUBLE PRECISION
*> The (1,1) element of the 2-by-2 matrix.
*> \endverbatim
*>
*> \param[in] B
*> \verbatim
*> B is DOUBLE PRECISION
*> The (1,2) and (2,1) elements of the 2-by-2 matrix.
*> \endverbatim
*>
*> \param[in] C
*> \verbatim
*> C is DOUBLE PRECISION
*> The (2,2) element of the 2-by-2 matrix.
*> \endverbatim
*>
*> \param[out] RT1
*> \verbatim
*> RT1 is DOUBLE PRECISION
*> The eigenvalue of larger absolute value.
*> \endverbatim
*>
*> \param[out] RT2
*> \verbatim
*> RT2 is DOUBLE PRECISION
*> The eigenvalue of smaller absolute value.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup OTHERauxiliary
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> RT1 is accurate to a few ulps barring over/underflow.
*>
*> RT2 may be inaccurate if there is massive cancellation in the
*> determinant A*C-B*B; higher precision or correctly rounded or
*> correctly truncated arithmetic would be needed to compute RT2
*> accurately in all cases.
*>
*> Overflow is possible only if RT1 is within a factor of 5 of overflow.
*> Underflow is harmless if the input data is 0 or exceeds
*> underflow_threshold / macheps.
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DLAE2( A, B, C, RT1, RT2 )
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
DOUBLE PRECISION A, B, C, RT1, RT2
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D0 )
DOUBLE PRECISION TWO
PARAMETER ( TWO = 2.0D0 )
DOUBLE PRECISION ZERO
PARAMETER ( ZERO = 0.0D0 )
DOUBLE PRECISION HALF
PARAMETER ( HALF = 0.5D0 )
* ..
* .. Local Scalars ..
DOUBLE PRECISION AB, ACMN, ACMX, ADF, DF, RT, SM, TB
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, SQRT
* ..
* .. Executable Statements ..
*
* Compute the eigenvalues
*
SM = A + C
DF = A - C
ADF = ABS( DF )
TB = B + B
AB = ABS( TB )
IF( ABS( A ).GT.ABS( C ) ) THEN
ACMX = A
ACMN = C
ELSE
ACMX = C
ACMN = A
END IF
IF( ADF.GT.AB ) THEN
RT = ADF*SQRT( ONE+( AB / ADF )**2 )
ELSE IF( ADF.LT.AB ) THEN
RT = AB*SQRT( ONE+( ADF / AB )**2 )
ELSE
*
* Includes case AB=ADF=0
*
RT = AB*SQRT( TWO )
END IF
IF( SM.LT.ZERO ) THEN
RT1 = HALF*( SM-RT )
*
* Order of execution important.
* To get fully accurate smaller eigenvalue,
* next line needs to be executed in higher precision.
*
RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B
ELSE IF( SM.GT.ZERO ) THEN
RT1 = HALF*( SM+RT )
*
* Order of execution important.
* To get fully accurate smaller eigenvalue,
* next line needs to be executed in higher precision.
*
RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B
ELSE
*
* Includes case RT1 = RT2 = 0
*
RT1 = HALF*RT
RT2 = -HALF*RT
END IF
RETURN
*
* End of DLAE2
*
END

236
lib/linalg/dlaed0.cpp Normal file
View File

@ -0,0 +1,236 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__9 = 9;
static integer c__0 = 0;
static integer c__2 = 2;
static doublereal c_b23 = 1.;
static doublereal c_b24 = 0.;
static integer c__1 = 1;
int dlaed0_(integer *icompq, integer *qsiz, integer *n, doublereal *d__, doublereal *e,
doublereal *q, integer *ldq, doublereal *qstore, integer *ldqs, doublereal *work,
integer *iwork, integer *info)
{
integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2;
doublereal d__1;
double log(doublereal);
integer pow_lmp_ii(integer *, integer *);
integer i__, j, k, iq, lgn, msd2, smm1, spm1, spm2;
doublereal temp;
integer curr;
extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
ftnlen, ftnlen);
integer iperm;
extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *);
integer indxq, iwrem;
extern int dlaed1_(integer *, doublereal *, doublereal *, integer *, integer *, doublereal *,
integer *, doublereal *, integer *, integer *);
integer iqptr;
extern int dlaed7_(integer *, integer *, integer *, integer *, integer *, integer *,
doublereal *, doublereal *, integer *, integer *, doublereal *, integer *,
doublereal *, integer *, integer *, integer *, integer *, integer *,
doublereal *, doublereal *, integer *, integer *);
integer tlvls;
extern int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *,
integer *, ftnlen);
integer igivcl;
extern int xerbla_(char *, integer *, ftnlen);
extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
ftnlen, ftnlen);
integer igivnm, submat, curprb, subpbs, igivpt;
extern int dsteqr_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *,
doublereal *, integer *, ftnlen);
integer curlvl, matsiz, iprmpt, smlsiz;
--d__;
--e;
q_dim1 = *ldq;
q_offset = 1 + q_dim1;
q -= q_offset;
qstore_dim1 = *ldqs;
qstore_offset = 1 + qstore_dim1;
qstore -= qstore_offset;
--work;
--iwork;
*info = 0;
if (*icompq < 0 || *icompq > 2) {
*info = -1;
} else if (*icompq == 1 && *qsiz < max(0, *n)) {
*info = -2;
} else if (*n < 0) {
*info = -3;
} else if (*ldq < max(1, *n)) {
*info = -7;
} else if (*ldqs < max(1, *n)) {
*info = -9;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DLAED0", &i__1, (ftnlen)6);
return 0;
}
if (*n == 0) {
return 0;
}
smlsiz = ilaenv_(&c__9, (char *)"DLAED0", (char *)" ", &c__0, &c__0, &c__0, &c__0, (ftnlen)6, (ftnlen)1);
iwork[1] = *n;
subpbs = 1;
tlvls = 0;
L10:
if (iwork[subpbs] > smlsiz) {
for (j = subpbs; j >= 1; --j) {
iwork[j * 2] = (iwork[j] + 1) / 2;
iwork[(j << 1) - 1] = iwork[j] / 2;
}
++tlvls;
subpbs <<= 1;
goto L10;
}
i__1 = subpbs;
for (j = 2; j <= i__1; ++j) {
iwork[j] += iwork[j - 1];
}
spm1 = subpbs - 1;
i__1 = spm1;
for (i__ = 1; i__ <= i__1; ++i__) {
submat = iwork[i__] + 1;
smm1 = submat - 1;
d__[smm1] -= (d__1 = e[smm1], abs(d__1));
d__[submat] -= (d__1 = e[smm1], abs(d__1));
}
indxq = (*n << 2) + 3;
if (*icompq != 2) {
temp = log((doublereal)(*n)) / log(2.);
lgn = (integer)temp;
if (pow_lmp_ii(&c__2, &lgn) < *n) {
++lgn;
}
if (pow_lmp_ii(&c__2, &lgn) < *n) {
++lgn;
}
iprmpt = indxq + *n + 1;
iperm = iprmpt + *n * lgn;
iqptr = iperm + *n * lgn;
igivpt = iqptr + *n + 2;
igivcl = igivpt + *n * lgn;
igivnm = 1;
iq = igivnm + (*n << 1) * lgn;
i__1 = *n;
iwrem = iq + i__1 * i__1 + 1;
i__1 = subpbs;
for (i__ = 0; i__ <= i__1; ++i__) {
iwork[iprmpt + i__] = 1;
iwork[igivpt + i__] = 1;
}
iwork[iqptr] = 1;
}
curr = 0;
i__1 = spm1;
for (i__ = 0; i__ <= i__1; ++i__) {
if (i__ == 0) {
submat = 1;
matsiz = iwork[1];
} else {
submat = iwork[i__] + 1;
matsiz = iwork[i__ + 1] - iwork[i__];
}
if (*icompq == 2) {
dsteqr_((char *)"I", &matsiz, &d__[submat], &e[submat], &q[submat + submat * q_dim1], ldq,
&work[1], info, (ftnlen)1);
if (*info != 0) {
goto L130;
}
} else {
dsteqr_((char *)"I", &matsiz, &d__[submat], &e[submat], &work[iq - 1 + iwork[iqptr + curr]],
&matsiz, &work[1], info, (ftnlen)1);
if (*info != 0) {
goto L130;
}
if (*icompq == 1) {
dgemm_((char *)"N", (char *)"N", qsiz, &matsiz, &matsiz, &c_b23, &q[submat * q_dim1 + 1], ldq,
&work[iq - 1 + iwork[iqptr + curr]], &matsiz, &c_b24,
&qstore[submat * qstore_dim1 + 1], ldqs, (ftnlen)1, (ftnlen)1);
}
i__2 = matsiz;
iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2;
++curr;
}
k = 1;
i__2 = iwork[i__ + 1];
for (j = submat; j <= i__2; ++j) {
iwork[indxq + j] = k;
++k;
}
}
curlvl = 1;
L80:
if (subpbs > 1) {
spm2 = subpbs - 2;
i__1 = spm2;
for (i__ = 0; i__ <= i__1; i__ += 2) {
if (i__ == 0) {
submat = 1;
matsiz = iwork[2];
msd2 = iwork[1];
curprb = 0;
} else {
submat = iwork[i__] + 1;
matsiz = iwork[i__ + 2] - iwork[i__];
msd2 = matsiz / 2;
++curprb;
}
if (*icompq == 2) {
dlaed1_(&matsiz, &d__[submat], &q[submat + submat * q_dim1], ldq,
&iwork[indxq + submat], &e[submat + msd2 - 1], &msd2, &work[1],
&iwork[subpbs + 1], info);
} else {
dlaed7_(icompq, &matsiz, qsiz, &tlvls, &curlvl, &curprb, &d__[submat],
&qstore[submat * qstore_dim1 + 1], ldqs, &iwork[indxq + submat],
&e[submat + msd2 - 1], &msd2, &work[iq], &iwork[iqptr], &iwork[iprmpt],
&iwork[iperm], &iwork[igivpt], &iwork[igivcl], &work[igivnm], &work[iwrem],
&iwork[subpbs + 1], info);
}
if (*info != 0) {
goto L130;
}
iwork[i__ / 2 + 1] = iwork[i__ + 2];
}
subpbs /= 2;
++curlvl;
goto L80;
}
if (*icompq == 1) {
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
j = iwork[indxq + i__];
work[i__] = d__[j];
dcopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1 + 1], &c__1);
}
dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
} else if (*icompq == 2) {
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
j = iwork[indxq + i__];
work[i__] = d__[j];
dcopy_(n, &q[j * q_dim1 + 1], &c__1, &work[*n * i__ + 1], &c__1);
}
dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
dlacpy_((char *)"A", n, n, &work[*n + 1], n, &q[q_offset], ldq, (ftnlen)1);
} else {
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
j = iwork[indxq + i__];
work[i__] = d__[j];
}
dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
}
goto L140;
L130:
*info = submat * (*n + 1) + submat + matsiz - 1;
L140:
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,431 +0,0 @@
*> \brief \b DLAED0 used by DSTEDC. Computes all eigenvalues and corresponding eigenvectors of an unreduced symmetric tridiagonal matrix using the divide and conquer method.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLAED0 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed0.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed0.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed0.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLAED0( ICOMPQ, QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS,
* WORK, IWORK, INFO )
*
* .. Scalar Arguments ..
* INTEGER ICOMPQ, INFO, LDQ, LDQS, N, QSIZ
* ..
* .. Array Arguments ..
* INTEGER IWORK( * )
* DOUBLE PRECISION D( * ), E( * ), Q( LDQ, * ), QSTORE( LDQS, * ),
* $ WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DLAED0 computes all eigenvalues and corresponding eigenvectors of a
*> symmetric tridiagonal matrix using the divide and conquer method.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] ICOMPQ
*> \verbatim
*> ICOMPQ is INTEGER
*> = 0: Compute eigenvalues only.
*> = 1: Compute eigenvectors of original dense symmetric matrix
*> also. On entry, Q contains the orthogonal matrix used
*> to reduce the original matrix to tridiagonal form.
*> = 2: Compute eigenvalues and eigenvectors of tridiagonal
*> matrix.
*> \endverbatim
*>
*> \param[in] QSIZ
*> \verbatim
*> QSIZ is INTEGER
*> The dimension of the orthogonal matrix used to reduce
*> the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The dimension of the symmetric tridiagonal matrix. N >= 0.
*> \endverbatim
*>
*> \param[in,out] D
*> \verbatim
*> D is DOUBLE PRECISION array, dimension (N)
*> On entry, the main diagonal of the tridiagonal matrix.
*> On exit, its eigenvalues.
*> \endverbatim
*>
*> \param[in] E
*> \verbatim
*> E is DOUBLE PRECISION array, dimension (N-1)
*> The off-diagonal elements of the tridiagonal matrix.
*> On exit, E has been destroyed.
*> \endverbatim
*>
*> \param[in,out] Q
*> \verbatim
*> Q is DOUBLE PRECISION array, dimension (LDQ, N)
*> On entry, Q must contain an N-by-N orthogonal matrix.
*> If ICOMPQ = 0 Q is not referenced.
*> If ICOMPQ = 1 On entry, Q is a subset of the columns of the
*> orthogonal matrix used to reduce the full
*> matrix to tridiagonal form corresponding to
*> the subset of the full matrix which is being
*> decomposed at this time.
*> If ICOMPQ = 2 On entry, Q will be the identity matrix.
*> On exit, Q contains the eigenvectors of the
*> tridiagonal matrix.
*> \endverbatim
*>
*> \param[in] LDQ
*> \verbatim
*> LDQ is INTEGER
*> The leading dimension of the array Q. If eigenvectors are
*> desired, then LDQ >= max(1,N). In any case, LDQ >= 1.
*> \endverbatim
*>
*> \param[out] QSTORE
*> \verbatim
*> QSTORE is DOUBLE PRECISION array, dimension (LDQS, N)
*> Referenced only when ICOMPQ = 1. Used to store parts of
*> the eigenvector matrix when the updating matrix multiplies
*> take place.
*> \endverbatim
*>
*> \param[in] LDQS
*> \verbatim
*> LDQS is INTEGER
*> The leading dimension of the array QSTORE. If ICOMPQ = 1,
*> then LDQS >= max(1,N). In any case, LDQS >= 1.
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array,
*> If ICOMPQ = 0 or 1, the dimension of WORK must be at least
*> 1 + 3*N + 2*N*lg N + 3*N**2
*> ( lg( N ) = smallest integer k
*> such that 2^k >= N )
*> If ICOMPQ = 2, the dimension of WORK must be at least
*> 4*N + N**2.
*> \endverbatim
*>
*> \param[out] IWORK
*> \verbatim
*> IWORK is INTEGER array,
*> If ICOMPQ = 0 or 1, the dimension of IWORK must be at least
*> 6 + 6*N + 5*N*lg N.
*> ( lg( N ) = smallest integer k
*> such that 2^k >= N )
*> If ICOMPQ = 2, the dimension of IWORK must be at least
*> 3 + 5*N.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit.
*> < 0: if INFO = -i, the i-th argument had an illegal value.
*> > 0: The algorithm failed to compute an eigenvalue while
*> working on the submatrix lying in rows and columns
*> INFO/(N+1) through mod(INFO,N+1).
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup auxOTHERcomputational
*
*> \par Contributors:
* ==================
*>
*> Jeff Rutter, Computer Science Division, University of California
*> at Berkeley, USA
*
* =====================================================================
SUBROUTINE DLAED0( ICOMPQ, QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS,
$ WORK, IWORK, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER ICOMPQ, INFO, LDQ, LDQS, N, QSIZ
* ..
* .. Array Arguments ..
INTEGER IWORK( * )
DOUBLE PRECISION D( * ), E( * ), Q( LDQ, * ), QSTORE( LDQS, * ),
$ WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO, ONE, TWO
PARAMETER ( ZERO = 0.D0, ONE = 1.D0, TWO = 2.D0 )
* ..
* .. Local Scalars ..
INTEGER CURLVL, CURPRB, CURR, I, IGIVCL, IGIVNM,
$ IGIVPT, INDXQ, IPERM, IPRMPT, IQ, IQPTR, IWREM,
$ J, K, LGN, MATSIZ, MSD2, SMLSIZ, SMM1, SPM1,
$ SPM2, SUBMAT, SUBPBS, TLVLS
DOUBLE PRECISION TEMP
* ..
* .. External Subroutines ..
EXTERNAL DCOPY, DGEMM, DLACPY, DLAED1, DLAED7, DSTEQR,
$ XERBLA
* ..
* .. External Functions ..
INTEGER ILAENV
EXTERNAL ILAENV
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, DBLE, INT, LOG, MAX
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
*
IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.2 ) THEN
INFO = -1
ELSE IF( ( ICOMPQ.EQ.1 ) .AND. ( QSIZ.LT.MAX( 0, N ) ) ) THEN
INFO = -2
ELSE IF( N.LT.0 ) THEN
INFO = -3
ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
INFO = -7
ELSE IF( LDQS.LT.MAX( 1, N ) ) THEN
INFO = -9
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DLAED0', -INFO )
RETURN
END IF
*
* Quick return if possible
*
IF( N.EQ.0 )
$ RETURN
*
SMLSIZ = ILAENV( 9, 'DLAED0', ' ', 0, 0, 0, 0 )
*
* Determine the size and placement of the submatrices, and save in
* the leading elements of IWORK.
*
IWORK( 1 ) = N
SUBPBS = 1
TLVLS = 0
10 CONTINUE
IF( IWORK( SUBPBS ).GT.SMLSIZ ) THEN
DO 20 J = SUBPBS, 1, -1
IWORK( 2*J ) = ( IWORK( J )+1 ) / 2
IWORK( 2*J-1 ) = IWORK( J ) / 2
20 CONTINUE
TLVLS = TLVLS + 1
SUBPBS = 2*SUBPBS
GO TO 10
END IF
DO 30 J = 2, SUBPBS
IWORK( J ) = IWORK( J ) + IWORK( J-1 )
30 CONTINUE
*
* Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1
* using rank-1 modifications (cuts).
*
SPM1 = SUBPBS - 1
DO 40 I = 1, SPM1
SUBMAT = IWORK( I ) + 1
SMM1 = SUBMAT - 1
D( SMM1 ) = D( SMM1 ) - ABS( E( SMM1 ) )
D( SUBMAT ) = D( SUBMAT ) - ABS( E( SMM1 ) )
40 CONTINUE
*
INDXQ = 4*N + 3
IF( ICOMPQ.NE.2 ) THEN
*
* Set up workspaces for eigenvalues only/accumulate new vectors
* routine
*
TEMP = LOG( DBLE( N ) ) / LOG( TWO )
LGN = INT( TEMP )
IF( 2**LGN.LT.N )
$ LGN = LGN + 1
IF( 2**LGN.LT.N )
$ LGN = LGN + 1
IPRMPT = INDXQ + N + 1
IPERM = IPRMPT + N*LGN
IQPTR = IPERM + N*LGN
IGIVPT = IQPTR + N + 2
IGIVCL = IGIVPT + N*LGN
*
IGIVNM = 1
IQ = IGIVNM + 2*N*LGN
IWREM = IQ + N**2 + 1
*
* Initialize pointers
*
DO 50 I = 0, SUBPBS
IWORK( IPRMPT+I ) = 1
IWORK( IGIVPT+I ) = 1
50 CONTINUE
IWORK( IQPTR ) = 1
END IF
*
* Solve each submatrix eigenproblem at the bottom of the divide and
* conquer tree.
*
CURR = 0
DO 70 I = 0, SPM1
IF( I.EQ.0 ) THEN
SUBMAT = 1
MATSIZ = IWORK( 1 )
ELSE
SUBMAT = IWORK( I ) + 1
MATSIZ = IWORK( I+1 ) - IWORK( I )
END IF
IF( ICOMPQ.EQ.2 ) THEN
CALL DSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ),
$ Q( SUBMAT, SUBMAT ), LDQ, WORK, INFO )
IF( INFO.NE.0 )
$ GO TO 130
ELSE
CALL DSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ),
$ WORK( IQ-1+IWORK( IQPTR+CURR ) ), MATSIZ, WORK,
$ INFO )
IF( INFO.NE.0 )
$ GO TO 130
IF( ICOMPQ.EQ.1 ) THEN
CALL DGEMM( 'N', 'N', QSIZ, MATSIZ, MATSIZ, ONE,
$ Q( 1, SUBMAT ), LDQ, WORK( IQ-1+IWORK( IQPTR+
$ CURR ) ), MATSIZ, ZERO, QSTORE( 1, SUBMAT ),
$ LDQS )
END IF
IWORK( IQPTR+CURR+1 ) = IWORK( IQPTR+CURR ) + MATSIZ**2
CURR = CURR + 1
END IF
K = 1
DO 60 J = SUBMAT, IWORK( I+1 )
IWORK( INDXQ+J ) = K
K = K + 1
60 CONTINUE
70 CONTINUE
*
* Successively merge eigensystems of adjacent submatrices
* into eigensystem for the corresponding larger matrix.
*
* while ( SUBPBS > 1 )
*
CURLVL = 1
80 CONTINUE
IF( SUBPBS.GT.1 ) THEN
SPM2 = SUBPBS - 2
DO 90 I = 0, SPM2, 2
IF( I.EQ.0 ) THEN
SUBMAT = 1
MATSIZ = IWORK( 2 )
MSD2 = IWORK( 1 )
CURPRB = 0
ELSE
SUBMAT = IWORK( I ) + 1
MATSIZ = IWORK( I+2 ) - IWORK( I )
MSD2 = MATSIZ / 2
CURPRB = CURPRB + 1
END IF
*
* Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2)
* into an eigensystem of size MATSIZ.
* DLAED1 is used only for the full eigensystem of a tridiagonal
* matrix.
* DLAED7 handles the cases in which eigenvalues only or eigenvalues
* and eigenvectors of a full symmetric matrix (which was reduced to
* tridiagonal form) are desired.
*
IF( ICOMPQ.EQ.2 ) THEN
CALL DLAED1( MATSIZ, D( SUBMAT ), Q( SUBMAT, SUBMAT ),
$ LDQ, IWORK( INDXQ+SUBMAT ),
$ E( SUBMAT+MSD2-1 ), MSD2, WORK,
$ IWORK( SUBPBS+1 ), INFO )
ELSE
CALL DLAED7( ICOMPQ, MATSIZ, QSIZ, TLVLS, CURLVL, CURPRB,
$ D( SUBMAT ), QSTORE( 1, SUBMAT ), LDQS,
$ IWORK( INDXQ+SUBMAT ), E( SUBMAT+MSD2-1 ),
$ MSD2, WORK( IQ ), IWORK( IQPTR ),
$ IWORK( IPRMPT ), IWORK( IPERM ),
$ IWORK( IGIVPT ), IWORK( IGIVCL ),
$ WORK( IGIVNM ), WORK( IWREM ),
$ IWORK( SUBPBS+1 ), INFO )
END IF
IF( INFO.NE.0 )
$ GO TO 130
IWORK( I / 2+1 ) = IWORK( I+2 )
90 CONTINUE
SUBPBS = SUBPBS / 2
CURLVL = CURLVL + 1
GO TO 80
END IF
*
* end while
*
* Re-merge the eigenvalues/vectors which were deflated at the final
* merge step.
*
IF( ICOMPQ.EQ.1 ) THEN
DO 100 I = 1, N
J = IWORK( INDXQ+I )
WORK( I ) = D( J )
CALL DCOPY( QSIZ, QSTORE( 1, J ), 1, Q( 1, I ), 1 )
100 CONTINUE
CALL DCOPY( N, WORK, 1, D, 1 )
ELSE IF( ICOMPQ.EQ.2 ) THEN
DO 110 I = 1, N
J = IWORK( INDXQ+I )
WORK( I ) = D( J )
CALL DCOPY( N, Q( 1, J ), 1, WORK( N*I+1 ), 1 )
110 CONTINUE
CALL DCOPY( N, WORK, 1, D, 1 )
CALL DLACPY( 'A', N, N, WORK( N+1 ), N, Q, LDQ )
ELSE
DO 120 I = 1, N
J = IWORK( INDXQ+I )
WORK( I ) = D( J )
120 CONTINUE
CALL DCOPY( N, WORK, 1, D, 1 )
END IF
GO TO 140
*
130 CONTINUE
INFO = SUBMAT*( N+1 ) + SUBMAT + MATSIZ - 1
*
140 CONTINUE
RETURN
*
* End of DLAED0
*
END

90
lib/linalg/dlaed1.cpp Normal file
View File

@ -0,0 +1,90 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
static integer c_n1 = -1;
int dlaed1_(integer *n, doublereal *d__, doublereal *q, integer *ldq, integer *indxq,
doublereal *rho, integer *cutpnt, doublereal *work, integer *iwork, integer *info)
{
integer q_dim1, q_offset, i__1, i__2;
integer i__, k, n1, n2, is, iw, iz, iq2, zpp1, indx, indxc;
extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *);
integer indxp;
extern int dlaed2_(integer *, integer *, integer *, doublereal *, doublereal *, integer *,
integer *, doublereal *, doublereal *, doublereal *, doublereal *,
doublereal *, integer *, integer *, integer *, integer *, integer *),
dlaed3_(integer *, integer *, integer *, doublereal *, doublereal *, integer *,
doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *,
doublereal *, integer *);
integer idlmda;
extern int dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *),
xerbla_(char *, integer *, ftnlen);
integer coltyp;
--d__;
q_dim1 = *ldq;
q_offset = 1 + q_dim1;
q -= q_offset;
--indxq;
--work;
--iwork;
*info = 0;
if (*n < 0) {
*info = -1;
} else if (*ldq < max(1, *n)) {
*info = -4;
} else {
i__1 = 1, i__2 = *n / 2;
if (min(i__1, i__2) > *cutpnt || *n / 2 < *cutpnt) {
*info = -7;
}
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DLAED1", &i__1, (ftnlen)6);
return 0;
}
if (*n == 0) {
return 0;
}
iz = 1;
idlmda = iz + *n;
iw = idlmda + *n;
iq2 = iw + *n;
indx = 1;
indxc = indx + *n;
coltyp = indxc + *n;
indxp = coltyp + *n;
dcopy_(cutpnt, &q[*cutpnt + q_dim1], ldq, &work[iz], &c__1);
zpp1 = *cutpnt + 1;
i__1 = *n - *cutpnt;
dcopy_(&i__1, &q[zpp1 + zpp1 * q_dim1], ldq, &work[iz + *cutpnt], &c__1);
dlaed2_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, &indxq[1], rho, &work[iz], &work[idlmda],
&work[iw], &work[iq2], &iwork[indx], &iwork[indxc], &iwork[indxp], &iwork[coltyp],
info);
if (*info != 0) {
goto L20;
}
if (k != 0) {
is = (iwork[coltyp] + iwork[coltyp + 1]) * *cutpnt +
(iwork[coltyp + 1] + iwork[coltyp + 2]) * (*n - *cutpnt) + iq2;
dlaed3_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, rho, &work[idlmda], &work[iq2],
&iwork[indxc], &iwork[coltyp], &work[iw], &work[is], info);
if (*info != 0) {
goto L20;
}
n1 = k;
n2 = *n - k;
dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
} else {
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
indxq[i__] = i__;
}
}
L20:
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,271 +0,0 @@
*> \brief \b DLAED1 used by DSTEDC. Computes the updated eigensystem of a diagonal matrix after modification by a rank-one symmetric matrix. Used when the original matrix is tridiagonal.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLAED1 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed1.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed1.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed1.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLAED1( N, D, Q, LDQ, INDXQ, RHO, CUTPNT, WORK, IWORK,
* INFO )
*
* .. Scalar Arguments ..
* INTEGER CUTPNT, INFO, LDQ, N
* DOUBLE PRECISION RHO
* ..
* .. Array Arguments ..
* INTEGER INDXQ( * ), IWORK( * )
* DOUBLE PRECISION D( * ), Q( LDQ, * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DLAED1 computes the updated eigensystem of a diagonal
*> matrix after modification by a rank-one symmetric matrix. This
*> routine is used only for the eigenproblem which requires all
*> eigenvalues and eigenvectors of a tridiagonal matrix. DLAED7 handles
*> the case in which eigenvalues only or eigenvalues and eigenvectors
*> of a full symmetric matrix (which was reduced to tridiagonal form)
*> are desired.
*>
*> T = Q(in) ( D(in) + RHO * Z*Z**T ) Q**T(in) = Q(out) * D(out) * Q**T(out)
*>
*> where Z = Q**T*u, u is a vector of length N with ones in the
*> CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
*>
*> The eigenvectors of the original matrix are stored in Q, and the
*> eigenvalues are in D. The algorithm consists of three stages:
*>
*> The first stage consists of deflating the size of the problem
*> when there are multiple eigenvalues or if there is a zero in
*> the Z vector. For each such occurrence the dimension of the
*> secular equation problem is reduced by one. This stage is
*> performed by the routine DLAED2.
*>
*> The second stage consists of calculating the updated
*> eigenvalues. This is done by finding the roots of the secular
*> equation via the routine DLAED4 (as called by DLAED3).
*> This routine also calculates the eigenvectors of the current
*> problem.
*>
*> The final stage consists of computing the updated eigenvectors
*> directly using the updated eigenvalues. The eigenvectors for
*> the current problem are multiplied with the eigenvectors from
*> the overall problem.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The dimension of the symmetric tridiagonal matrix. N >= 0.
*> \endverbatim
*>
*> \param[in,out] D
*> \verbatim
*> D is DOUBLE PRECISION array, dimension (N)
*> On entry, the eigenvalues of the rank-1-perturbed matrix.
*> On exit, the eigenvalues of the repaired matrix.
*> \endverbatim
*>
*> \param[in,out] Q
*> \verbatim
*> Q is DOUBLE PRECISION array, dimension (LDQ,N)
*> On entry, the eigenvectors of the rank-1-perturbed matrix.
*> On exit, the eigenvectors of the repaired tridiagonal matrix.
*> \endverbatim
*>
*> \param[in] LDQ
*> \verbatim
*> LDQ is INTEGER
*> The leading dimension of the array Q. LDQ >= max(1,N).
*> \endverbatim
*>
*> \param[in,out] INDXQ
*> \verbatim
*> INDXQ is INTEGER array, dimension (N)
*> On entry, the permutation which separately sorts the two
*> subproblems in D into ascending order.
*> On exit, the permutation which will reintegrate the
*> subproblems back into sorted order,
*> i.e. D( INDXQ( I = 1, N ) ) will be in ascending order.
*> \endverbatim
*>
*> \param[in] RHO
*> \verbatim
*> RHO is DOUBLE PRECISION
*> The subdiagonal entry used to create the rank-1 modification.
*> \endverbatim
*>
*> \param[in] CUTPNT
*> \verbatim
*> CUTPNT is INTEGER
*> The location of the last eigenvalue in the leading sub-matrix.
*> min(1,N) <= CUTPNT <= N/2.
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (4*N + N**2)
*> \endverbatim
*>
*> \param[out] IWORK
*> \verbatim
*> IWORK is INTEGER array, dimension (4*N)
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit.
*> < 0: if INFO = -i, the i-th argument had an illegal value.
*> > 0: if INFO = 1, an eigenvalue did not converge
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup auxOTHERcomputational
*
*> \par Contributors:
* ==================
*>
*> Jeff Rutter, Computer Science Division, University of California
*> at Berkeley, USA \n
*> Modified by Francoise Tisseur, University of Tennessee
*>
* =====================================================================
SUBROUTINE DLAED1( N, D, Q, LDQ, INDXQ, RHO, CUTPNT, WORK, IWORK,
$ INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER CUTPNT, INFO, LDQ, N
DOUBLE PRECISION RHO
* ..
* .. Array Arguments ..
INTEGER INDXQ( * ), IWORK( * )
DOUBLE PRECISION D( * ), Q( LDQ, * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Local Scalars ..
INTEGER COLTYP, I, IDLMDA, INDX, INDXC, INDXP, IQ2, IS,
$ IW, IZ, K, N1, N2, ZPP1
* ..
* .. External Subroutines ..
EXTERNAL DCOPY, DLAED2, DLAED3, DLAMRG, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
*
IF( N.LT.0 ) THEN
INFO = -1
ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
INFO = -4
ELSE IF( MIN( 1, N / 2 ).GT.CUTPNT .OR. ( N / 2 ).LT.CUTPNT ) THEN
INFO = -7
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DLAED1', -INFO )
RETURN
END IF
*
* Quick return if possible
*
IF( N.EQ.0 )
$ RETURN
*
* The following values are integer pointers which indicate
* the portion of the workspace
* used by a particular array in DLAED2 and DLAED3.
*
IZ = 1
IDLMDA = IZ + N
IW = IDLMDA + N
IQ2 = IW + N
*
INDX = 1
INDXC = INDX + N
COLTYP = INDXC + N
INDXP = COLTYP + N
*
*
* Form the z-vector which consists of the last row of Q_1 and the
* first row of Q_2.
*
CALL DCOPY( CUTPNT, Q( CUTPNT, 1 ), LDQ, WORK( IZ ), 1 )
ZPP1 = CUTPNT + 1
CALL DCOPY( N-CUTPNT, Q( ZPP1, ZPP1 ), LDQ, WORK( IZ+CUTPNT ), 1 )
*
* Deflate eigenvalues.
*
CALL DLAED2( K, N, CUTPNT, D, Q, LDQ, INDXQ, RHO, WORK( IZ ),
$ WORK( IDLMDA ), WORK( IW ), WORK( IQ2 ),
$ IWORK( INDX ), IWORK( INDXC ), IWORK( INDXP ),
$ IWORK( COLTYP ), INFO )
*
IF( INFO.NE.0 )
$ GO TO 20
*
* Solve Secular Equation.
*
IF( K.NE.0 ) THEN
IS = ( IWORK( COLTYP )+IWORK( COLTYP+1 ) )*CUTPNT +
$ ( IWORK( COLTYP+1 )+IWORK( COLTYP+2 ) )*( N-CUTPNT ) + IQ2
CALL DLAED3( K, N, CUTPNT, D, Q, LDQ, RHO, WORK( IDLMDA ),
$ WORK( IQ2 ), IWORK( INDXC ), IWORK( COLTYP ),
$ WORK( IW ), WORK( IS ), INFO )
IF( INFO.NE.0 )
$ GO TO 20
*
* Prepare the INDXQ sorting permutation.
*
N1 = K
N2 = N - K
CALL DLAMRG( N1, N2, D, 1, -1, INDXQ )
ELSE
DO 10 I = 1, N
INDXQ( I ) = I
10 CONTINUE
END IF
*
20 CONTINUE
RETURN
*
* End of DLAED1
*
END

263
lib/linalg/dlaed2.cpp Normal file
View File

@ -0,0 +1,263 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static doublereal c_b3 = -1.;
static integer c__1 = 1;
int dlaed2_(integer *k, integer *n, integer *n1, doublereal *d__, doublereal *q, integer *ldq,
integer *indxq, doublereal *rho, doublereal *z__, doublereal *dlamda, doublereal *w,
doublereal *q2, integer *indx, integer *indxc, integer *indxp, integer *coltyp,
integer *info)
{
integer q_dim1, q_offset, i__1, i__2;
doublereal d__1, d__2, d__3, d__4;
double sqrt(doublereal);
doublereal c__;
integer i__, j;
doublereal s, t;
integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1;
doublereal eps, tau, tol;
integer psm[4], imax, jmax;
extern int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *,
doublereal *);
integer ctot[4];
extern int dscal_(integer *, doublereal *, doublereal *, integer *),
dcopy_(integer *, doublereal *, integer *, doublereal *, integer *);
extern doublereal dlapy2_(doublereal *, doublereal *), dlamch_(char *, ftnlen);
extern integer idamax_(integer *, doublereal *, integer *);
extern int dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *),
dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *,
ftnlen),
xerbla_(char *, integer *, ftnlen);
--d__;
q_dim1 = *ldq;
q_offset = 1 + q_dim1;
q -= q_offset;
--indxq;
--z__;
--dlamda;
--w;
--q2;
--indx;
--indxc;
--indxp;
--coltyp;
*info = 0;
if (*n < 0) {
*info = -2;
} else if (*ldq < max(1, *n)) {
*info = -6;
} else {
i__1 = 1, i__2 = *n / 2;
if (min(i__1, i__2) > *n1 || *n / 2 < *n1) {
*info = -3;
}
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DLAED2", &i__1, (ftnlen)6);
return 0;
}
if (*n == 0) {
return 0;
}
n2 = *n - *n1;
n1p1 = *n1 + 1;
if (*rho < 0.) {
dscal_(&n2, &c_b3, &z__[n1p1], &c__1);
}
t = 1. / sqrt(2.);
dscal_(n, &t, &z__[1], &c__1);
*rho = (d__1 = *rho * 2., abs(d__1));
i__1 = *n;
for (i__ = n1p1; i__ <= i__1; ++i__) {
indxq[i__] += *n1;
}
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
dlamda[i__] = d__[indxq[i__]];
}
dlamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]);
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
indx[i__] = indxq[indxc[i__]];
}
imax = idamax_(n, &z__[1], &c__1);
jmax = idamax_(n, &d__[1], &c__1);
eps = dlamch_((char *)"Epsilon", (ftnlen)7);
d__3 = (d__1 = d__[jmax], abs(d__1)), d__4 = (d__2 = z__[imax], abs(d__2));
tol = eps * 8. * max(d__3, d__4);
if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) {
*k = 0;
iq2 = 1;
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
i__ = indx[j];
dcopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
dlamda[j] = d__[i__];
iq2 += *n;
}
dlacpy_((char *)"A", n, n, &q2[1], n, &q[q_offset], ldq, (ftnlen)1);
dcopy_(n, &dlamda[1], &c__1, &d__[1], &c__1);
goto L190;
}
i__1 = *n1;
for (i__ = 1; i__ <= i__1; ++i__) {
coltyp[i__] = 1;
}
i__1 = *n;
for (i__ = n1p1; i__ <= i__1; ++i__) {
coltyp[i__] = 3;
}
*k = 0;
k2 = *n + 1;
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
nj = indx[j];
if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) {
--k2;
coltyp[nj] = 4;
indxp[k2] = nj;
if (j == *n) {
goto L100;
}
} else {
pj = nj;
goto L80;
}
}
L80:
++j;
nj = indx[j];
if (j > *n) {
goto L100;
}
if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) {
--k2;
coltyp[nj] = 4;
indxp[k2] = nj;
} else {
s = z__[pj];
c__ = z__[nj];
tau = dlapy2_(&c__, &s);
t = d__[nj] - d__[pj];
c__ /= tau;
s = -s / tau;
if ((d__1 = t * c__ * s, abs(d__1)) <= tol) {
z__[nj] = tau;
z__[pj] = 0.;
if (coltyp[nj] != coltyp[pj]) {
coltyp[nj] = 2;
}
coltyp[pj] = 4;
drot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, &c__, &s);
d__1 = c__;
d__2 = s;
t = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2);
d__1 = s;
d__2 = c__;
d__[nj] = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2);
d__[pj] = t;
--k2;
i__ = 1;
L90:
if (k2 + i__ <= *n) {
if (d__[pj] < d__[indxp[k2 + i__]]) {
indxp[k2 + i__ - 1] = indxp[k2 + i__];
indxp[k2 + i__] = pj;
++i__;
goto L90;
} else {
indxp[k2 + i__ - 1] = pj;
}
} else {
indxp[k2 + i__ - 1] = pj;
}
pj = nj;
} else {
++(*k);
dlamda[*k] = d__[pj];
w[*k] = z__[pj];
indxp[*k] = pj;
pj = nj;
}
}
goto L80;
L100:
++(*k);
dlamda[*k] = d__[pj];
w[*k] = z__[pj];
indxp[*k] = pj;
for (j = 1; j <= 4; ++j) {
ctot[j - 1] = 0;
}
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
ct = coltyp[j];
++ctot[ct - 1];
}
psm[0] = 1;
psm[1] = ctot[0] + 1;
psm[2] = psm[1] + ctot[1];
psm[3] = psm[2] + ctot[2];
*k = *n - ctot[3];
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
js = indxp[j];
ct = coltyp[js];
indx[psm[ct - 1]] = js;
indxc[psm[ct - 1]] = j;
++psm[ct - 1];
}
i__ = 1;
iq1 = 1;
iq2 = (ctot[0] + ctot[1]) * *n1 + 1;
i__1 = ctot[0];
for (j = 1; j <= i__1; ++j) {
js = indx[i__];
dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
z__[i__] = d__[js];
++i__;
iq1 += *n1;
}
i__1 = ctot[1];
for (j = 1; j <= i__1; ++j) {
js = indx[i__];
dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
z__[i__] = d__[js];
++i__;
iq1 += *n1;
iq2 += n2;
}
i__1 = ctot[2];
for (j = 1; j <= i__1; ++j) {
js = indx[i__];
dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
z__[i__] = d__[js];
++i__;
iq2 += n2;
}
iq1 = iq2;
i__1 = ctot[3];
for (j = 1; j <= i__1; ++j) {
js = indx[i__];
dcopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
iq2 += *n;
z__[i__] = d__[js];
++i__;
}
if (*k < *n) {
dlacpy_((char *)"A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq, (ftnlen)1);
i__1 = *n - *k;
dcopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1);
}
for (j = 1; j <= 4; ++j) {
coltyp[j] = ctot[j - 1];
}
L190:
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,536 +0,0 @@
*> \brief \b DLAED2 used by DSTEDC. Merges eigenvalues and deflates secular equation. Used when the original matrix is tridiagonal.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLAED2 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed2.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed2.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed2.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
* Q2, INDX, INDXC, INDXP, COLTYP, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, K, LDQ, N, N1
* DOUBLE PRECISION RHO
* ..
* .. Array Arguments ..
* INTEGER COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),
* $ INDXQ( * )
* DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
* $ W( * ), Z( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DLAED2 merges the two sets of eigenvalues together into a single
*> sorted set. Then it tries to deflate the size of the problem.
*> There are two ways in which deflation can occur: when two or more
*> eigenvalues are close together or if there is a tiny entry in the
*> Z vector. For each such occurrence the order of the related secular
*> equation problem is reduced by one.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[out] K
*> \verbatim
*> K is INTEGER
*> The number of non-deflated eigenvalues, and the order of the
*> related secular equation. 0 <= K <=N.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The dimension of the symmetric tridiagonal matrix. N >= 0.
*> \endverbatim
*>
*> \param[in] N1
*> \verbatim
*> N1 is INTEGER
*> The location of the last eigenvalue in the leading sub-matrix.
*> min(1,N) <= N1 <= N/2.
*> \endverbatim
*>
*> \param[in,out] D
*> \verbatim
*> D is DOUBLE PRECISION array, dimension (N)
*> On entry, D contains the eigenvalues of the two submatrices to
*> be combined.
*> On exit, D contains the trailing (N-K) updated eigenvalues
*> (those which were deflated) sorted into increasing order.
*> \endverbatim
*>
*> \param[in,out] Q
*> \verbatim
*> Q is DOUBLE PRECISION array, dimension (LDQ, N)
*> On entry, Q contains the eigenvectors of two submatrices in
*> the two square blocks with corners at (1,1), (N1,N1)
*> and (N1+1, N1+1), (N,N).
*> On exit, Q contains the trailing (N-K) updated eigenvectors
*> (those which were deflated) in its last N-K columns.
*> \endverbatim
*>
*> \param[in] LDQ
*> \verbatim
*> LDQ is INTEGER
*> The leading dimension of the array Q. LDQ >= max(1,N).
*> \endverbatim
*>
*> \param[in,out] INDXQ
*> \verbatim
*> INDXQ is INTEGER array, dimension (N)
*> The permutation which separately sorts the two sub-problems
*> in D into ascending order. Note that elements in the second
*> half of this permutation must first have N1 added to their
*> values. Destroyed on exit.
*> \endverbatim
*>
*> \param[in,out] RHO
*> \verbatim
*> RHO is DOUBLE PRECISION
*> On entry, the off-diagonal element associated with the rank-1
*> cut which originally split the two submatrices which are now
*> being recombined.
*> On exit, RHO has been modified to the value required by
*> DLAED3.
*> \endverbatim
*>
*> \param[in] Z
*> \verbatim
*> Z is DOUBLE PRECISION array, dimension (N)
*> On entry, Z contains the updating vector (the last
*> row of the first sub-eigenvector matrix and the first row of
*> the second sub-eigenvector matrix).
*> On exit, the contents of Z have been destroyed by the updating
*> process.
*> \endverbatim
*>
*> \param[out] DLAMDA
*> \verbatim
*> DLAMDA is DOUBLE PRECISION array, dimension (N)
*> A copy of the first K eigenvalues which will be used by
*> DLAED3 to form the secular equation.
*> \endverbatim
*>
*> \param[out] W
*> \verbatim
*> W is DOUBLE PRECISION array, dimension (N)
*> The first k values of the final deflation-altered z-vector
*> which will be passed to DLAED3.
*> \endverbatim
*>
*> \param[out] Q2
*> \verbatim
*> Q2 is DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2)
*> A copy of the first K eigenvectors which will be used by
*> DLAED3 in a matrix multiply (DGEMM) to solve for the new
*> eigenvectors.
*> \endverbatim
*>
*> \param[out] INDX
*> \verbatim
*> INDX is INTEGER array, dimension (N)
*> The permutation used to sort the contents of DLAMDA into
*> ascending order.
*> \endverbatim
*>
*> \param[out] INDXC
*> \verbatim
*> INDXC is INTEGER array, dimension (N)
*> The permutation used to arrange the columns of the deflated
*> Q matrix into three groups: the first group contains non-zero
*> elements only at and above N1, the second contains
*> non-zero elements only below N1, and the third is dense.
*> \endverbatim
*>
*> \param[out] INDXP
*> \verbatim
*> INDXP is INTEGER array, dimension (N)
*> The permutation used to place deflated values of D at the end
*> of the array. INDXP(1:K) points to the nondeflated D-values
*> and INDXP(K+1:N) points to the deflated eigenvalues.
*> \endverbatim
*>
*> \param[out] COLTYP
*> \verbatim
*> COLTYP is INTEGER array, dimension (N)
*> During execution, a label which will indicate which of the
*> following types a column in the Q2 matrix is:
*> 1 : non-zero in the upper half only;
*> 2 : dense;
*> 3 : non-zero in the lower half only;
*> 4 : deflated.
*> On exit, COLTYP(i) is the number of columns of type i,
*> for i=1 to 4 only.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit.
*> < 0: if INFO = -i, the i-th argument had an illegal value.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup auxOTHERcomputational
*
*> \par Contributors:
* ==================
*>
*> Jeff Rutter, Computer Science Division, University of California
*> at Berkeley, USA \n
*> Modified by Francoise Tisseur, University of Tennessee
*>
* =====================================================================
SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
$ Q2, INDX, INDXC, INDXP, COLTYP, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, K, LDQ, N, N1
DOUBLE PRECISION RHO
* ..
* .. Array Arguments ..
INTEGER COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),
$ INDXQ( * )
DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
$ W( * ), Z( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION MONE, ZERO, ONE, TWO, EIGHT
PARAMETER ( MONE = -1.0D0, ZERO = 0.0D0, ONE = 1.0D0,
$ TWO = 2.0D0, EIGHT = 8.0D0 )
* ..
* .. Local Arrays ..
INTEGER CTOT( 4 ), PSM( 4 )
* ..
* .. Local Scalars ..
INTEGER CT, I, IMAX, IQ1, IQ2, J, JMAX, JS, K2, N1P1,
$ N2, NJ, PJ
DOUBLE PRECISION C, EPS, S, T, TAU, TOL
* ..
* .. External Functions ..
INTEGER IDAMAX
DOUBLE PRECISION DLAMCH, DLAPY2
EXTERNAL IDAMAX, DLAMCH, DLAPY2
* ..
* .. External Subroutines ..
EXTERNAL DCOPY, DLACPY, DLAMRG, DROT, DSCAL, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, MAX, MIN, SQRT
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
*
IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
INFO = -6
ELSE IF( MIN( 1, ( N / 2 ) ).GT.N1 .OR. ( N / 2 ).LT.N1 ) THEN
INFO = -3
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DLAED2', -INFO )
RETURN
END IF
*
* Quick return if possible
*
IF( N.EQ.0 )
$ RETURN
*
N2 = N - N1
N1P1 = N1 + 1
*
IF( RHO.LT.ZERO ) THEN
CALL DSCAL( N2, MONE, Z( N1P1 ), 1 )
END IF
*
* Normalize z so that norm(z) = 1. Since z is the concatenation of
* two normalized vectors, norm2(z) = sqrt(2).
*
T = ONE / SQRT( TWO )
CALL DSCAL( N, T, Z, 1 )
*
* RHO = ABS( norm(z)**2 * RHO )
*
RHO = ABS( TWO*RHO )
*
* Sort the eigenvalues into increasing order
*
DO 10 I = N1P1, N
INDXQ( I ) = INDXQ( I ) + N1
10 CONTINUE
*
* re-integrate the deflated parts from the last pass
*
DO 20 I = 1, N
DLAMDA( I ) = D( INDXQ( I ) )
20 CONTINUE
CALL DLAMRG( N1, N2, DLAMDA, 1, 1, INDXC )
DO 30 I = 1, N
INDX( I ) = INDXQ( INDXC( I ) )
30 CONTINUE
*
* Calculate the allowable deflation tolerance
*
IMAX = IDAMAX( N, Z, 1 )
JMAX = IDAMAX( N, D, 1 )
EPS = DLAMCH( 'Epsilon' )
TOL = EIGHT*EPS*MAX( ABS( D( JMAX ) ), ABS( Z( IMAX ) ) )
*
* If the rank-1 modifier is small enough, no more needs to be done
* except to reorganize Q so that its columns correspond with the
* elements in D.
*
IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN
K = 0
IQ2 = 1
DO 40 J = 1, N
I = INDX( J )
CALL DCOPY( N, Q( 1, I ), 1, Q2( IQ2 ), 1 )
DLAMDA( J ) = D( I )
IQ2 = IQ2 + N
40 CONTINUE
CALL DLACPY( 'A', N, N, Q2, N, Q, LDQ )
CALL DCOPY( N, DLAMDA, 1, D, 1 )
GO TO 190
END IF
*
* If there are multiple eigenvalues then the problem deflates. Here
* the number of equal eigenvalues are found. As each equal
* eigenvalue is found, an elementary reflector is computed to rotate
* the corresponding eigensubspace so that the corresponding
* components of Z are zero in this new basis.
*
DO 50 I = 1, N1
COLTYP( I ) = 1
50 CONTINUE
DO 60 I = N1P1, N
COLTYP( I ) = 3
60 CONTINUE
*
*
K = 0
K2 = N + 1
DO 70 J = 1, N
NJ = INDX( J )
IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
*
* Deflate due to small z component.
*
K2 = K2 - 1
COLTYP( NJ ) = 4
INDXP( K2 ) = NJ
IF( J.EQ.N )
$ GO TO 100
ELSE
PJ = NJ
GO TO 80
END IF
70 CONTINUE
80 CONTINUE
J = J + 1
NJ = INDX( J )
IF( J.GT.N )
$ GO TO 100
IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
*
* Deflate due to small z component.
*
K2 = K2 - 1
COLTYP( NJ ) = 4
INDXP( K2 ) = NJ
ELSE
*
* Check if eigenvalues are close enough to allow deflation.
*
S = Z( PJ )
C = Z( NJ )
*
* Find sqrt(a**2+b**2) without overflow or
* destructive underflow.
*
TAU = DLAPY2( C, S )
T = D( NJ ) - D( PJ )
C = C / TAU
S = -S / TAU
IF( ABS( T*C*S ).LE.TOL ) THEN
*
* Deflation is possible.
*
Z( NJ ) = TAU
Z( PJ ) = ZERO
IF( COLTYP( NJ ).NE.COLTYP( PJ ) )
$ COLTYP( NJ ) = 2
COLTYP( PJ ) = 4
CALL DROT( N, Q( 1, PJ ), 1, Q( 1, NJ ), 1, C, S )
T = D( PJ )*C**2 + D( NJ )*S**2
D( NJ ) = D( PJ )*S**2 + D( NJ )*C**2
D( PJ ) = T
K2 = K2 - 1
I = 1
90 CONTINUE
IF( K2+I.LE.N ) THEN
IF( D( PJ ).LT.D( INDXP( K2+I ) ) ) THEN
INDXP( K2+I-1 ) = INDXP( K2+I )
INDXP( K2+I ) = PJ
I = I + 1
GO TO 90
ELSE
INDXP( K2+I-1 ) = PJ
END IF
ELSE
INDXP( K2+I-1 ) = PJ
END IF
PJ = NJ
ELSE
K = K + 1
DLAMDA( K ) = D( PJ )
W( K ) = Z( PJ )
INDXP( K ) = PJ
PJ = NJ
END IF
END IF
GO TO 80
100 CONTINUE
*
* Record the last eigenvalue.
*
K = K + 1
DLAMDA( K ) = D( PJ )
W( K ) = Z( PJ )
INDXP( K ) = PJ
*
* Count up the total number of the various types of columns, then
* form a permutation which positions the four column types into
* four uniform groups (although one or more of these groups may be
* empty).
*
DO 110 J = 1, 4
CTOT( J ) = 0
110 CONTINUE
DO 120 J = 1, N
CT = COLTYP( J )
CTOT( CT ) = CTOT( CT ) + 1
120 CONTINUE
*
* PSM(*) = Position in SubMatrix (of types 1 through 4)
*
PSM( 1 ) = 1
PSM( 2 ) = 1 + CTOT( 1 )
PSM( 3 ) = PSM( 2 ) + CTOT( 2 )
PSM( 4 ) = PSM( 3 ) + CTOT( 3 )
K = N - CTOT( 4 )
*
* Fill out the INDXC array so that the permutation which it induces
* will place all type-1 columns first, all type-2 columns next,
* then all type-3's, and finally all type-4's.
*
DO 130 J = 1, N
JS = INDXP( J )
CT = COLTYP( JS )
INDX( PSM( CT ) ) = JS
INDXC( PSM( CT ) ) = J
PSM( CT ) = PSM( CT ) + 1
130 CONTINUE
*
* Sort the eigenvalues and corresponding eigenvectors into DLAMDA
* and Q2 respectively. The eigenvalues/vectors which were not
* deflated go into the first K slots of DLAMDA and Q2 respectively,
* while those which were deflated go into the last N - K slots.
*
I = 1
IQ1 = 1
IQ2 = 1 + ( CTOT( 1 )+CTOT( 2 ) )*N1
DO 140 J = 1, CTOT( 1 )
JS = INDX( I )
CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )
Z( I ) = D( JS )
I = I + 1
IQ1 = IQ1 + N1
140 CONTINUE
*
DO 150 J = 1, CTOT( 2 )
JS = INDX( I )
CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )
CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )
Z( I ) = D( JS )
I = I + 1
IQ1 = IQ1 + N1
IQ2 = IQ2 + N2
150 CONTINUE
*
DO 160 J = 1, CTOT( 3 )
JS = INDX( I )
CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )
Z( I ) = D( JS )
I = I + 1
IQ2 = IQ2 + N2
160 CONTINUE
*
IQ1 = IQ2
DO 170 J = 1, CTOT( 4 )
JS = INDX( I )
CALL DCOPY( N, Q( 1, JS ), 1, Q2( IQ2 ), 1 )
IQ2 = IQ2 + N
Z( I ) = D( JS )
I = I + 1
170 CONTINUE
*
* The deflated eigenvalues and their corresponding vectors go back
* into the last N - K slots of D and Q respectively.
*
IF( K.LT.N ) THEN
CALL DLACPY( 'A', N, CTOT( 4 ), Q2( IQ1 ), N,
$ Q( 1, K+1 ), LDQ )
CALL DCOPY( N-K, Z( K+1 ), 1, D( K+1 ), 1 )
END IF
*
* Copy CTOT into COLTYP for referencing in DLAED3.
*
DO 180 J = 1, 4
COLTYP( J ) = CTOT( J )
180 CONTINUE
*
190 CONTINUE
RETURN
*
* End of DLAED2
*
END

138
lib/linalg/dlaed3.cpp Normal file
View File

@ -0,0 +1,138 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
static doublereal c_b22 = 1.;
static doublereal c_b23 = 0.;
int dlaed3_(integer *k, integer *n, integer *n1, doublereal *d__, doublereal *q, integer *ldq,
doublereal *rho, doublereal *dlamda, doublereal *q2, integer *indx, integer *ctot,
doublereal *w, doublereal *s, integer *info)
{
integer q_dim1, q_offset, i__1, i__2;
doublereal d__1;
double sqrt(doublereal), d_lmp_sign(doublereal *, doublereal *);
integer i__, j, n2, n12, ii, n23, iq2;
doublereal temp;
extern doublereal dnrm2_(integer *, doublereal *, integer *);
extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
ftnlen, ftnlen),
dcopy_(integer *, doublereal *, integer *, doublereal *, integer *),
dlaed4_(integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *,
doublereal *, integer *);
extern doublereal dlamc3_(doublereal *, doublereal *);
extern int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *,
integer *, ftnlen),
dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *,
ftnlen),
xerbla_(char *, integer *, ftnlen);
--d__;
q_dim1 = *ldq;
q_offset = 1 + q_dim1;
q -= q_offset;
--dlamda;
--q2;
--indx;
--ctot;
--w;
--s;
*info = 0;
if (*k < 0) {
*info = -1;
} else if (*n < *k) {
*info = -2;
} else if (*ldq < max(1, *n)) {
*info = -6;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DLAED3", &i__1, (ftnlen)6);
return 0;
}
if (*k == 0) {
return 0;
}
i__1 = *k;
for (i__ = 1; i__ <= i__1; ++i__) {
dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__];
}
i__1 = *k;
for (j = 1; j <= i__1; ++j) {
dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], info);
if (*info != 0) {
goto L120;
}
}
if (*k == 1) {
goto L110;
}
if (*k == 2) {
i__1 = *k;
for (j = 1; j <= i__1; ++j) {
w[1] = q[j * q_dim1 + 1];
w[2] = q[j * q_dim1 + 2];
ii = indx[1];
q[j * q_dim1 + 1] = w[ii];
ii = indx[2];
q[j * q_dim1 + 2] = w[ii];
}
goto L110;
}
dcopy_(k, &w[1], &c__1, &s[1], &c__1);
i__1 = *ldq + 1;
dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1);
i__1 = *k;
for (j = 1; j <= i__1; ++j) {
i__2 = j - 1;
for (i__ = 1; i__ <= i__2; ++i__) {
w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
}
i__2 = *k;
for (i__ = j + 1; i__ <= i__2; ++i__) {
w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
}
}
i__1 = *k;
for (i__ = 1; i__ <= i__1; ++i__) {
d__1 = sqrt(-w[i__]);
w[i__] = d_lmp_sign(&d__1, &s[i__]);
}
i__1 = *k;
for (j = 1; j <= i__1; ++j) {
i__2 = *k;
for (i__ = 1; i__ <= i__2; ++i__) {
s[i__] = w[i__] / q[i__ + j * q_dim1];
}
temp = dnrm2_(k, &s[1], &c__1);
i__2 = *k;
for (i__ = 1; i__ <= i__2; ++i__) {
ii = indx[i__];
q[i__ + j * q_dim1] = s[ii] / temp;
}
}
L110:
n2 = *n - *n1;
n12 = ctot[1] + ctot[2];
n23 = ctot[2] + ctot[3];
dlacpy_((char *)"A", &n23, k, &q[ctot[1] + 1 + q_dim1], ldq, &s[1], &n23, (ftnlen)1);
iq2 = *n1 * n12 + 1;
if (n23 != 0) {
dgemm_((char *)"N", (char *)"N", &n2, k, &n23, &c_b22, &q2[iq2], &n2, &s[1], &n23, &c_b23,
&q[*n1 + 1 + q_dim1], ldq, (ftnlen)1, (ftnlen)1);
} else {
dlaset_((char *)"A", &n2, k, &c_b23, &c_b23, &q[*n1 + 1 + q_dim1], ldq, (ftnlen)1);
}
dlacpy_((char *)"A", &n12, k, &q[q_offset], ldq, &s[1], &n12, (ftnlen)1);
if (n12 != 0) {
dgemm_((char *)"N", (char *)"N", n1, k, &n12, &c_b22, &q2[1], n1, &s[1], &n12, &c_b23, &q[q_offset], ldq,
(ftnlen)1, (ftnlen)1);
} else {
dlaset_((char *)"A", n1, k, &c_b23, &c_b23, &q[q_dim1 + 1], ldq, (ftnlen)1);
}
L120:
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,350 +0,0 @@
*> \brief \b DLAED3 used by DSTEDC. Finds the roots of the secular equation and updates the eigenvectors. Used when the original matrix is tridiagonal.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLAED3 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed3.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed3.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed3.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,
* CTOT, W, S, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, K, LDQ, N, N1
* DOUBLE PRECISION RHO
* ..
* .. Array Arguments ..
* INTEGER CTOT( * ), INDX( * )
* DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
* $ S( * ), W( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DLAED3 finds the roots of the secular equation, as defined by the
*> values in D, W, and RHO, between 1 and K. It makes the
*> appropriate calls to DLAED4 and then updates the eigenvectors by
*> multiplying the matrix of eigenvectors of the pair of eigensystems
*> being combined by the matrix of eigenvectors of the K-by-K system
*> which is solved here.
*>
*> This code makes very mild assumptions about floating point
*> arithmetic. It will work on machines with a guard digit in
*> add/subtract, or on those binary machines without guard digits
*> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
*> It could conceivably fail on hexadecimal or decimal machines
*> without guard digits, but we know of none.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] K
*> \verbatim
*> K is INTEGER
*> The number of terms in the rational function to be solved by
*> DLAED4. K >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of rows and columns in the Q matrix.
*> N >= K (deflation may result in N>K).
*> \endverbatim
*>
*> \param[in] N1
*> \verbatim
*> N1 is INTEGER
*> The location of the last eigenvalue in the leading submatrix.
*> min(1,N) <= N1 <= N/2.
*> \endverbatim
*>
*> \param[out] D
*> \verbatim
*> D is DOUBLE PRECISION array, dimension (N)
*> D(I) contains the updated eigenvalues for
*> 1 <= I <= K.
*> \endverbatim
*>
*> \param[out] Q
*> \verbatim
*> Q is DOUBLE PRECISION array, dimension (LDQ,N)
*> Initially the first K columns are used as workspace.
*> On output the columns 1 to K contain
*> the updated eigenvectors.
*> \endverbatim
*>
*> \param[in] LDQ
*> \verbatim
*> LDQ is INTEGER
*> The leading dimension of the array Q. LDQ >= max(1,N).
*> \endverbatim
*>
*> \param[in] RHO
*> \verbatim
*> RHO is DOUBLE PRECISION
*> The value of the parameter in the rank one update equation.
*> RHO >= 0 required.
*> \endverbatim
*>
*> \param[in,out] DLAMDA
*> \verbatim
*> DLAMDA is DOUBLE PRECISION array, dimension (K)
*> The first K elements of this array contain the old roots
*> of the deflated updating problem. These are the poles
*> of the secular equation. May be changed on output by
*> having lowest order bit set to zero on Cray X-MP, Cray Y-MP,
*> Cray-2, or Cray C-90, as described above.
*> \endverbatim
*>
*> \param[in] Q2
*> \verbatim
*> Q2 is DOUBLE PRECISION array, dimension (LDQ2*N)
*> The first K columns of this matrix contain the non-deflated
*> eigenvectors for the split problem.
*> \endverbatim
*>
*> \param[in] INDX
*> \verbatim
*> INDX is INTEGER array, dimension (N)
*> The permutation used to arrange the columns of the deflated
*> Q matrix into three groups (see DLAED2).
*> The rows of the eigenvectors found by DLAED4 must be likewise
*> permuted before the matrix multiply can take place.
*> \endverbatim
*>
*> \param[in] CTOT
*> \verbatim
*> CTOT is INTEGER array, dimension (4)
*> A count of the total number of the various types of columns
*> in Q, as described in INDX. The fourth column type is any
*> column which has been deflated.
*> \endverbatim
*>
*> \param[in,out] W
*> \verbatim
*> W is DOUBLE PRECISION array, dimension (K)
*> The first K elements of this array contain the components
*> of the deflation-adjusted updating vector. Destroyed on
*> output.
*> \endverbatim
*>
*> \param[out] S
*> \verbatim
*> S is DOUBLE PRECISION array, dimension (N1 + 1)*K
*> Will contain the eigenvectors of the repaired matrix which
*> will be multiplied by the previously accumulated eigenvectors
*> to update the system.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit.
*> < 0: if INFO = -i, the i-th argument had an illegal value.
*> > 0: if INFO = 1, an eigenvalue did not converge
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup auxOTHERcomputational
*
*> \par Contributors:
* ==================
*>
*> Jeff Rutter, Computer Science Division, University of California
*> at Berkeley, USA \n
*> Modified by Francoise Tisseur, University of Tennessee
*>
* =====================================================================
SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,
$ CTOT, W, S, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, K, LDQ, N, N1
DOUBLE PRECISION RHO
* ..
* .. Array Arguments ..
INTEGER CTOT( * ), INDX( * )
DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
$ S( * ), W( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE, ZERO
PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0 )
* ..
* .. Local Scalars ..
INTEGER I, II, IQ2, J, N12, N2, N23
DOUBLE PRECISION TEMP
* ..
* .. External Functions ..
DOUBLE PRECISION DLAMC3, DNRM2
EXTERNAL DLAMC3, DNRM2
* ..
* .. External Subroutines ..
EXTERNAL DCOPY, DGEMM, DLACPY, DLAED4, DLASET, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, SIGN, SQRT
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
*
IF( K.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.K ) THEN
INFO = -2
ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
INFO = -6
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DLAED3', -INFO )
RETURN
END IF
*
* Quick return if possible
*
IF( K.EQ.0 )
$ RETURN
*
* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
* be computed with high relative accuracy (barring over/underflow).
* This is a problem on machines without a guard digit in
* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
* which on any of these machines zeros out the bottommost
* bit of DLAMDA(I) if it is 1; this makes the subsequent
* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
* occurs. On binary machines with a guard digit (almost all
* machines) it does not change DLAMDA(I) at all. On hexadecimal
* and decimal machines with a guard digit, it slightly
* changes the bottommost bits of DLAMDA(I). It does not account
* for hexadecimal or decimal machines without guard digits
* (we know of none). We use a subroutine call to compute
* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating
* this code.
*
DO 10 I = 1, K
DLAMDA( I ) = DLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I )
10 CONTINUE
*
DO 20 J = 1, K
CALL DLAED4( K, J, DLAMDA, W, Q( 1, J ), RHO, D( J ), INFO )
*
* If the zero finder fails, the computation is terminated.
*
IF( INFO.NE.0 )
$ GO TO 120
20 CONTINUE
*
IF( K.EQ.1 )
$ GO TO 110
IF( K.EQ.2 ) THEN
DO 30 J = 1, K
W( 1 ) = Q( 1, J )
W( 2 ) = Q( 2, J )
II = INDX( 1 )
Q( 1, J ) = W( II )
II = INDX( 2 )
Q( 2, J ) = W( II )
30 CONTINUE
GO TO 110
END IF
*
* Compute updated W.
*
CALL DCOPY( K, W, 1, S, 1 )
*
* Initialize W(I) = Q(I,I)
*
CALL DCOPY( K, Q, LDQ+1, W, 1 )
DO 60 J = 1, K
DO 40 I = 1, J - 1
W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )
40 CONTINUE
DO 50 I = J + 1, K
W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )
50 CONTINUE
60 CONTINUE
DO 70 I = 1, K
W( I ) = SIGN( SQRT( -W( I ) ), S( I ) )
70 CONTINUE
*
* Compute eigenvectors of the modified rank-1 modification.
*
DO 100 J = 1, K
DO 80 I = 1, K
S( I ) = W( I ) / Q( I, J )
80 CONTINUE
TEMP = DNRM2( K, S, 1 )
DO 90 I = 1, K
II = INDX( I )
Q( I, J ) = S( II ) / TEMP
90 CONTINUE
100 CONTINUE
*
* Compute the updated eigenvectors.
*
110 CONTINUE
*
N2 = N - N1
N12 = CTOT( 1 ) + CTOT( 2 )
N23 = CTOT( 2 ) + CTOT( 3 )
*
CALL DLACPY( 'A', N23, K, Q( CTOT( 1 )+1, 1 ), LDQ, S, N23 )
IQ2 = N1*N12 + 1
IF( N23.NE.0 ) THEN
CALL DGEMM( 'N', 'N', N2, K, N23, ONE, Q2( IQ2 ), N2, S, N23,
$ ZERO, Q( N1+1, 1 ), LDQ )
ELSE
CALL DLASET( 'A', N2, K, ZERO, ZERO, Q( N1+1, 1 ), LDQ )
END IF
*
CALL DLACPY( 'A', N12, K, Q, LDQ, S, N12 )
IF( N12.NE.0 ) THEN
CALL DGEMM( 'N', 'N', N1, K, N12, ONE, Q2, N1, S, N12, ZERO, Q,
$ LDQ )
ELSE
CALL DLASET( 'A', N1, K, ZERO, ZERO, Q( 1, 1 ), LDQ )
END IF
*
*
120 CONTINUE
RETURN
*
* End of DLAED3
*
END

571
lib/linalg/dlaed4.cpp Normal file
View File

@ -0,0 +1,571 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dlaed4_(integer *n, integer *i__, doublereal *d__, doublereal *z__, doublereal *delta,
doublereal *rho, doublereal *dlam, integer *info)
{
integer i__1;
doublereal d__1;
double sqrt(doublereal);
doublereal a, b, c__;
integer j;
doublereal w;
integer ii;
doublereal dw, zz[3];
integer ip1;
doublereal del, eta, phi, eps, tau, psi;
integer iim1, iip1;
doublereal dphi, dpsi;
integer iter;
doublereal temp, prew, temp1, dltlb, dltub, midpt;
integer niter;
logical swtch;
extern int dlaed5_(integer *, doublereal *, doublereal *, doublereal *, doublereal *,
doublereal *),
dlaed6_(integer *, logical *, doublereal *, doublereal *, doublereal *, doublereal *,
doublereal *, integer *);
logical swtch3;
extern doublereal dlamch_(char *, ftnlen);
logical orgati;
doublereal erretm, rhoinv;
--delta;
--z__;
--d__;
*info = 0;
if (*n == 1) {
*dlam = d__[1] + *rho * z__[1] * z__[1];
delta[1] = 1.;
return 0;
}
if (*n == 2) {
dlaed5_(i__, &d__[1], &z__[1], &delta[1], rho, dlam);
return 0;
}
eps = dlamch_((char *)"Epsilon", (ftnlen)7);
rhoinv = 1. / *rho;
if (*i__ == *n) {
ii = *n - 1;
niter = 1;
midpt = *rho / 2.;
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
delta[j] = d__[j] - d__[*i__] - midpt;
}
psi = 0.;
i__1 = *n - 2;
for (j = 1; j <= i__1; ++j) {
psi += z__[j] * z__[j] / delta[j];
}
c__ = rhoinv + psi;
w = c__ + z__[ii] * z__[ii] / delta[ii] + z__[*n] * z__[*n] / delta[*n];
if (w <= 0.) {
temp = z__[*n - 1] * z__[*n - 1] / (d__[*n] - d__[*n - 1] + *rho) +
z__[*n] * z__[*n] / *rho;
if (c__ <= temp) {
tau = *rho;
} else {
del = d__[*n] - d__[*n - 1];
a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n];
b = z__[*n] * z__[*n] * del;
if (a < 0.) {
tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
} else {
tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
}
}
dltlb = midpt;
dltub = *rho;
} else {
del = d__[*n] - d__[*n - 1];
a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n];
b = z__[*n] * z__[*n] * del;
if (a < 0.) {
tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
} else {
tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
}
dltlb = 0.;
dltub = midpt;
}
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
delta[j] = d__[j] - d__[*i__] - tau;
}
dpsi = 0.;
psi = 0.;
erretm = 0.;
i__1 = ii;
for (j = 1; j <= i__1; ++j) {
temp = z__[j] / delta[j];
psi += z__[j] * temp;
dpsi += temp * temp;
erretm += psi;
}
erretm = abs(erretm);
temp = z__[*n] / delta[*n];
phi = z__[*n] * temp;
dphi = temp * temp;
erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + dphi);
w = rhoinv + phi + psi;
if (abs(w) <= eps * erretm) {
*dlam = d__[*i__] + tau;
goto L250;
}
if (w <= 0.) {
dltlb = max(dltlb, tau);
} else {
dltub = min(dltub, tau);
}
++niter;
c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi;
a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * (dpsi + dphi);
b = delta[*n - 1] * delta[*n] * w;
if (c__ < 0.) {
c__ = abs(c__);
}
if (c__ == 0.) {
eta = -w / (dpsi + dphi);
} else if (a >= 0.) {
eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
} else {
eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
}
if (w * eta > 0.) {
eta = -w / (dpsi + dphi);
}
temp = tau + eta;
if (temp > dltub || temp < dltlb) {
if (w < 0.) {
eta = (dltub - tau) / 2.;
} else {
eta = (dltlb - tau) / 2.;
}
}
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
delta[j] -= eta;
}
tau += eta;
dpsi = 0.;
psi = 0.;
erretm = 0.;
i__1 = ii;
for (j = 1; j <= i__1; ++j) {
temp = z__[j] / delta[j];
psi += z__[j] * temp;
dpsi += temp * temp;
erretm += psi;
}
erretm = abs(erretm);
temp = z__[*n] / delta[*n];
phi = z__[*n] * temp;
dphi = temp * temp;
erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + dphi);
w = rhoinv + phi + psi;
iter = niter + 1;
for (niter = iter; niter <= 30; ++niter) {
if (abs(w) <= eps * erretm) {
*dlam = d__[*i__] + tau;
goto L250;
}
if (w <= 0.) {
dltlb = max(dltlb, tau);
} else {
dltub = min(dltub, tau);
}
c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi;
a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * (dpsi + dphi);
b = delta[*n - 1] * delta[*n] * w;
if (a >= 0.) {
eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
} else {
eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
}
if (w * eta > 0.) {
eta = -w / (dpsi + dphi);
}
temp = tau + eta;
if (temp > dltub || temp < dltlb) {
if (w < 0.) {
eta = (dltub - tau) / 2.;
} else {
eta = (dltlb - tau) / 2.;
}
}
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
delta[j] -= eta;
}
tau += eta;
dpsi = 0.;
psi = 0.;
erretm = 0.;
i__1 = ii;
for (j = 1; j <= i__1; ++j) {
temp = z__[j] / delta[j];
psi += z__[j] * temp;
dpsi += temp * temp;
erretm += psi;
}
erretm = abs(erretm);
temp = z__[*n] / delta[*n];
phi = z__[*n] * temp;
dphi = temp * temp;
erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + dphi);
w = rhoinv + phi + psi;
}
*info = 1;
*dlam = d__[*i__] + tau;
goto L250;
} else {
niter = 1;
ip1 = *i__ + 1;
del = d__[ip1] - d__[*i__];
midpt = del / 2.;
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
delta[j] = d__[j] - d__[*i__] - midpt;
}
psi = 0.;
i__1 = *i__ - 1;
for (j = 1; j <= i__1; ++j) {
psi += z__[j] * z__[j] / delta[j];
}
phi = 0.;
i__1 = *i__ + 2;
for (j = *n; j >= i__1; --j) {
phi += z__[j] * z__[j] / delta[j];
}
c__ = rhoinv + psi + phi;
w = c__ + z__[*i__] * z__[*i__] / delta[*i__] + z__[ip1] * z__[ip1] / delta[ip1];
if (w > 0.) {
orgati = TRUE_;
a = c__ * del + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1];
b = z__[*i__] * z__[*i__] * del;
if (a > 0.) {
tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
} else {
tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
}
dltlb = 0.;
dltub = midpt;
} else {
orgati = FALSE_;
a = c__ * del - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1];
b = z__[ip1] * z__[ip1] * del;
if (a < 0.) {
tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs(d__1))));
} else {
tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) / (c__ * 2.);
}
dltlb = -midpt;
dltub = 0.;
}
if (orgati) {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
delta[j] = d__[j] - d__[*i__] - tau;
}
} else {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
delta[j] = d__[j] - d__[ip1] - tau;
}
}
if (orgati) {
ii = *i__;
} else {
ii = *i__ + 1;
}
iim1 = ii - 1;
iip1 = ii + 1;
dpsi = 0.;
psi = 0.;
erretm = 0.;
i__1 = iim1;
for (j = 1; j <= i__1; ++j) {
temp = z__[j] / delta[j];
psi += z__[j] * temp;
dpsi += temp * temp;
erretm += psi;
}
erretm = abs(erretm);
dphi = 0.;
phi = 0.;
i__1 = iip1;
for (j = *n; j >= i__1; --j) {
temp = z__[j] / delta[j];
phi += z__[j] * temp;
dphi += temp * temp;
erretm += phi;
}
w = rhoinv + phi + psi;
swtch3 = FALSE_;
if (orgati) {
if (w < 0.) {
swtch3 = TRUE_;
}
} else {
if (w > 0.) {
swtch3 = TRUE_;
}
}
if (ii == 1 || ii == *n) {
swtch3 = FALSE_;
}
temp = z__[ii] / delta[ii];
dw = dpsi + dphi + temp * temp;
temp = z__[ii] * temp;
w += temp;
erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + abs(tau) * dw;
if (abs(w) <= eps * erretm) {
if (orgati) {
*dlam = d__[*i__] + tau;
} else {
*dlam = d__[ip1] + tau;
}
goto L250;
}
if (w <= 0.) {
dltlb = max(dltlb, tau);
} else {
dltub = min(dltub, tau);
}
++niter;
if (!swtch3) {
if (orgati) {
d__1 = z__[*i__] / delta[*i__];
c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (d__1 * d__1);
} else {
d__1 = z__[ip1] / delta[ip1];
c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (d__1 * d__1);
}
a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] * dw;
b = delta[*i__] * delta[ip1] * w;
if (c__ == 0.) {
if (a == 0.) {
if (orgati) {
a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] * (dpsi + dphi);
} else {
a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] * (dpsi + dphi);
}
}
eta = b / a;
} else if (a <= 0.) {
eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
} else {
eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
}
} else {
temp = rhoinv + psi + phi;
if (orgati) {
temp1 = z__[iim1] / delta[iim1];
temp1 *= temp1;
c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[iip1]) * temp1;
zz[0] = z__[iim1] * z__[iim1];
zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi);
} else {
temp1 = z__[iip1] / delta[iip1];
temp1 *= temp1;
c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[iim1]) * temp1;
zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1));
zz[2] = z__[iip1] * z__[iip1];
}
zz[1] = z__[ii] * z__[ii];
dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info);
if (*info != 0) {
goto L250;
}
}
if (w * eta >= 0.) {
eta = -w / dw;
}
temp = tau + eta;
if (temp > dltub || temp < dltlb) {
if (w < 0.) {
eta = (dltub - tau) / 2.;
} else {
eta = (dltlb - tau) / 2.;
}
}
prew = w;
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
delta[j] -= eta;
}
dpsi = 0.;
psi = 0.;
erretm = 0.;
i__1 = iim1;
for (j = 1; j <= i__1; ++j) {
temp = z__[j] / delta[j];
psi += z__[j] * temp;
dpsi += temp * temp;
erretm += psi;
}
erretm = abs(erretm);
dphi = 0.;
phi = 0.;
i__1 = iip1;
for (j = *n; j >= i__1; --j) {
temp = z__[j] / delta[j];
phi += z__[j] * temp;
dphi += temp * temp;
erretm += phi;
}
temp = z__[ii] / delta[ii];
dw = dpsi + dphi + temp * temp;
temp = z__[ii] * temp;
w = rhoinv + phi + psi + temp;
erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. +
(d__1 = tau + eta, abs(d__1)) * dw;
swtch = FALSE_;
if (orgati) {
if (-w > abs(prew) / 10.) {
swtch = TRUE_;
}
} else {
if (w > abs(prew) / 10.) {
swtch = TRUE_;
}
}
tau += eta;
iter = niter + 1;
for (niter = iter; niter <= 30; ++niter) {
if (abs(w) <= eps * erretm) {
if (orgati) {
*dlam = d__[*i__] + tau;
} else {
*dlam = d__[ip1] + tau;
}
goto L250;
}
if (w <= 0.) {
dltlb = max(dltlb, tau);
} else {
dltub = min(dltub, tau);
}
if (!swtch3) {
if (!swtch) {
if (orgati) {
d__1 = z__[*i__] / delta[*i__];
c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (d__1 * d__1);
} else {
d__1 = z__[ip1] / delta[ip1];
c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (d__1 * d__1);
}
} else {
temp = z__[ii] / delta[ii];
if (orgati) {
dpsi += temp * temp;
} else {
dphi += temp * temp;
}
c__ = w - delta[*i__] * dpsi - delta[ip1] * dphi;
}
a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] * dw;
b = delta[*i__] * delta[ip1] * w;
if (c__ == 0.) {
if (a == 0.) {
if (!swtch) {
if (orgati) {
a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] * (dpsi + dphi);
} else {
a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] * (dpsi + dphi);
}
} else {
a = delta[*i__] * delta[*i__] * dpsi + delta[ip1] * delta[ip1] * dphi;
}
}
eta = b / a;
} else if (a <= 0.) {
eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
} else {
eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
}
} else {
temp = rhoinv + psi + phi;
if (swtch) {
c__ = temp - delta[iim1] * dpsi - delta[iip1] * dphi;
zz[0] = delta[iim1] * delta[iim1] * dpsi;
zz[2] = delta[iip1] * delta[iip1] * dphi;
} else {
if (orgati) {
temp1 = z__[iim1] / delta[iim1];
temp1 *= temp1;
c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[iip1]) * temp1;
zz[0] = z__[iim1] * z__[iim1];
zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi);
} else {
temp1 = z__[iip1] / delta[iip1];
temp1 *= temp1;
c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[iim1]) * temp1;
zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1));
zz[2] = z__[iip1] * z__[iip1];
}
}
dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info);
if (*info != 0) {
goto L250;
}
}
if (w * eta >= 0.) {
eta = -w / dw;
}
temp = tau + eta;
if (temp > dltub || temp < dltlb) {
if (w < 0.) {
eta = (dltub - tau) / 2.;
} else {
eta = (dltlb - tau) / 2.;
}
}
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
delta[j] -= eta;
}
tau += eta;
prew = w;
dpsi = 0.;
psi = 0.;
erretm = 0.;
i__1 = iim1;
for (j = 1; j <= i__1; ++j) {
temp = z__[j] / delta[j];
psi += z__[j] * temp;
dpsi += temp * temp;
erretm += psi;
}
erretm = abs(erretm);
dphi = 0.;
phi = 0.;
i__1 = iip1;
for (j = *n; j >= i__1; --j) {
temp = z__[j] / delta[j];
phi += z__[j] * temp;
dphi += temp * temp;
erretm += phi;
}
temp = z__[ii] / delta[ii];
dw = dpsi + dphi + temp * temp;
temp = z__[ii] * temp;
w = rhoinv + phi + psi + temp;
erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + abs(tau) * dw;
if (w * prew > 0. && abs(w) > abs(prew) / 10.) {
swtch = !swtch;
}
}
*info = 1;
if (orgati) {
*dlam = d__[*i__] + tau;
} else {
*dlam = d__[ip1] + tau;
}
}
L250:
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,917 +0,0 @@
*> \brief \b DLAED4 used by DSTEDC. Finds a single root of the secular equation.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLAED4 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed4.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed4.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed4.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLAED4( N, I, D, Z, DELTA, RHO, DLAM, INFO )
*
* .. Scalar Arguments ..
* INTEGER I, INFO, N
* DOUBLE PRECISION DLAM, RHO
* ..
* .. Array Arguments ..
* DOUBLE PRECISION D( * ), DELTA( * ), Z( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> This subroutine computes the I-th updated eigenvalue of a symmetric
*> rank-one modification to a diagonal matrix whose elements are
*> given in the array d, and that
*>
*> D(i) < D(j) for i < j
*>
*> and that RHO > 0. This is arranged by the calling routine, and is
*> no loss in generality. The rank-one modified system is thus
*>
*> diag( D ) + RHO * Z * Z_transpose.
*>
*> where we assume the Euclidean norm of Z is 1.
*>
*> The method consists of approximating the rational functions in the
*> secular equation by simpler interpolating rational functions.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The length of all arrays.
*> \endverbatim
*>
*> \param[in] I
*> \verbatim
*> I is INTEGER
*> The index of the eigenvalue to be computed. 1 <= I <= N.
*> \endverbatim
*>
*> \param[in] D
*> \verbatim
*> D is DOUBLE PRECISION array, dimension (N)
*> The original eigenvalues. It is assumed that they are in
*> order, D(I) < D(J) for I < J.
*> \endverbatim
*>
*> \param[in] Z
*> \verbatim
*> Z is DOUBLE PRECISION array, dimension (N)
*> The components of the updating vector.
*> \endverbatim
*>
*> \param[out] DELTA
*> \verbatim
*> DELTA is DOUBLE PRECISION array, dimension (N)
*> If N > 2, DELTA contains (D(j) - lambda_I) in its j-th
*> component. If N = 1, then DELTA(1) = 1. If N = 2, see DLAED5
*> for detail. The vector DELTA contains the information necessary
*> to construct the eigenvectors by DLAED3 and DLAED9.
*> \endverbatim
*>
*> \param[in] RHO
*> \verbatim
*> RHO is DOUBLE PRECISION
*> The scalar in the symmetric updating formula.
*> \endverbatim
*>
*> \param[out] DLAM
*> \verbatim
*> DLAM is DOUBLE PRECISION
*> The computed lambda_I, the I-th updated eigenvalue.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> > 0: if INFO = 1, the updating process failed.
*> \endverbatim
*
*> \par Internal Parameters:
* =========================
*>
*> \verbatim
*> Logical variable ORGATI (origin-at-i?) is used for distinguishing
*> whether D(i) or D(i+1) is treated as the origin.
*>
*> ORGATI = .true. origin at i
*> ORGATI = .false. origin at i+1
*>
*> Logical variable SWTCH3 (switch-for-3-poles?) is for noting
*> if we are working with THREE poles!
*>
*> MAXIT is the maximum number of iterations allowed for each
*> eigenvalue.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup auxOTHERcomputational
*
*> \par Contributors:
* ==================
*>
*> Ren-Cang Li, Computer Science Division, University of California
*> at Berkeley, USA
*>
* =====================================================================
SUBROUTINE DLAED4( N, I, D, Z, DELTA, RHO, DLAM, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER I, INFO, N
DOUBLE PRECISION DLAM, RHO
* ..
* .. Array Arguments ..
DOUBLE PRECISION D( * ), DELTA( * ), Z( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
INTEGER MAXIT
PARAMETER ( MAXIT = 30 )
DOUBLE PRECISION ZERO, ONE, TWO, THREE, FOUR, EIGHT, TEN
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0,
$ THREE = 3.0D0, FOUR = 4.0D0, EIGHT = 8.0D0,
$ TEN = 10.0D0 )
* ..
* .. Local Scalars ..
LOGICAL ORGATI, SWTCH, SWTCH3
INTEGER II, IIM1, IIP1, IP1, ITER, J, NITER
DOUBLE PRECISION A, B, C, DEL, DLTLB, DLTUB, DPHI, DPSI, DW,
$ EPS, ERRETM, ETA, MIDPT, PHI, PREW, PSI,
$ RHOINV, TAU, TEMP, TEMP1, W
* ..
* .. Local Arrays ..
DOUBLE PRECISION ZZ( 3 )
* ..
* .. External Functions ..
DOUBLE PRECISION DLAMCH
EXTERNAL DLAMCH
* ..
* .. External Subroutines ..
EXTERNAL DLAED5, DLAED6
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, MAX, MIN, SQRT
* ..
* .. Executable Statements ..
*
* Since this routine is called in an inner loop, we do no argument
* checking.
*
* Quick return for N=1 and 2.
*
INFO = 0
IF( N.EQ.1 ) THEN
*
* Presumably, I=1 upon entry
*
DLAM = D( 1 ) + RHO*Z( 1 )*Z( 1 )
DELTA( 1 ) = ONE
RETURN
END IF
IF( N.EQ.2 ) THEN
CALL DLAED5( I, D, Z, DELTA, RHO, DLAM )
RETURN
END IF
*
* Compute machine epsilon
*
EPS = DLAMCH( 'Epsilon' )
RHOINV = ONE / RHO
*
* The case I = N
*
IF( I.EQ.N ) THEN
*
* Initialize some basic variables
*
II = N - 1
NITER = 1
*
* Calculate initial guess
*
MIDPT = RHO / TWO
*
* If ||Z||_2 is not one, then TEMP should be set to
* RHO * ||Z||_2^2 / TWO
*
DO 10 J = 1, N
DELTA( J ) = ( D( J )-D( I ) ) - MIDPT
10 CONTINUE
*
PSI = ZERO
DO 20 J = 1, N - 2
PSI = PSI + Z( J )*Z( J ) / DELTA( J )
20 CONTINUE
*
C = RHOINV + PSI
W = C + Z( II )*Z( II ) / DELTA( II ) +
$ Z( N )*Z( N ) / DELTA( N )
*
IF( W.LE.ZERO ) THEN
TEMP = Z( N-1 )*Z( N-1 ) / ( D( N )-D( N-1 )+RHO ) +
$ Z( N )*Z( N ) / RHO
IF( C.LE.TEMP ) THEN
TAU = RHO
ELSE
DEL = D( N ) - D( N-1 )
A = -C*DEL + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N )
B = Z( N )*Z( N )*DEL
IF( A.LT.ZERO ) THEN
TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A )
ELSE
TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C )
END IF
END IF
*
* It can be proved that
* D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO
*
DLTLB = MIDPT
DLTUB = RHO
ELSE
DEL = D( N ) - D( N-1 )
A = -C*DEL + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N )
B = Z( N )*Z( N )*DEL
IF( A.LT.ZERO ) THEN
TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A )
ELSE
TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C )
END IF
*
* It can be proved that
* D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2
*
DLTLB = ZERO
DLTUB = MIDPT
END IF
*
DO 30 J = 1, N
DELTA( J ) = ( D( J )-D( I ) ) - TAU
30 CONTINUE
*
* Evaluate PSI and the derivative DPSI
*
DPSI = ZERO
PSI = ZERO
ERRETM = ZERO
DO 40 J = 1, II
TEMP = Z( J ) / DELTA( J )
PSI = PSI + Z( J )*TEMP
DPSI = DPSI + TEMP*TEMP
ERRETM = ERRETM + PSI
40 CONTINUE
ERRETM = ABS( ERRETM )
*
* Evaluate PHI and the derivative DPHI
*
TEMP = Z( N ) / DELTA( N )
PHI = Z( N )*TEMP
DPHI = TEMP*TEMP
ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +
$ ABS( TAU )*( DPSI+DPHI )
*
W = RHOINV + PHI + PSI
*
* Test for convergence
*
IF( ABS( W ).LE.EPS*ERRETM ) THEN
DLAM = D( I ) + TAU
GO TO 250
END IF
*
IF( W.LE.ZERO ) THEN
DLTLB = MAX( DLTLB, TAU )
ELSE
DLTUB = MIN( DLTUB, TAU )
END IF
*
* Calculate the new step
*
NITER = NITER + 1
C = W - DELTA( N-1 )*DPSI - DELTA( N )*DPHI
A = ( DELTA( N-1 )+DELTA( N ) )*W -
$ DELTA( N-1 )*DELTA( N )*( DPSI+DPHI )
B = DELTA( N-1 )*DELTA( N )*W
IF( C.LT.ZERO )
$ C = ABS( C )
IF( C.EQ.ZERO ) THEN
* ETA = B/A
* ETA = RHO - TAU
* ETA = DLTUB - TAU
*
* Update proposed by Li, Ren-Cang:
ETA = -W / ( DPSI+DPHI )
ELSE IF( A.GE.ZERO ) THEN
ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
ELSE
ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) )
END IF
*
* Note, eta should be positive if w is negative, and
* eta should be negative otherwise. However,
* if for some reason caused by roundoff, eta*w > 0,
* we simply use one Newton step instead. This way
* will guarantee eta*w < 0.
*
IF( W*ETA.GT.ZERO )
$ ETA = -W / ( DPSI+DPHI )
TEMP = TAU + ETA
IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN
IF( W.LT.ZERO ) THEN
ETA = ( DLTUB-TAU ) / TWO
ELSE
ETA = ( DLTLB-TAU ) / TWO
END IF
END IF
DO 50 J = 1, N
DELTA( J ) = DELTA( J ) - ETA
50 CONTINUE
*
TAU = TAU + ETA
*
* Evaluate PSI and the derivative DPSI
*
DPSI = ZERO
PSI = ZERO
ERRETM = ZERO
DO 60 J = 1, II
TEMP = Z( J ) / DELTA( J )
PSI = PSI + Z( J )*TEMP
DPSI = DPSI + TEMP*TEMP
ERRETM = ERRETM + PSI
60 CONTINUE
ERRETM = ABS( ERRETM )
*
* Evaluate PHI and the derivative DPHI
*
TEMP = Z( N ) / DELTA( N )
PHI = Z( N )*TEMP
DPHI = TEMP*TEMP
ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +
$ ABS( TAU )*( DPSI+DPHI )
*
W = RHOINV + PHI + PSI
*
* Main loop to update the values of the array DELTA
*
ITER = NITER + 1
*
DO 90 NITER = ITER, MAXIT
*
* Test for convergence
*
IF( ABS( W ).LE.EPS*ERRETM ) THEN
DLAM = D( I ) + TAU
GO TO 250
END IF
*
IF( W.LE.ZERO ) THEN
DLTLB = MAX( DLTLB, TAU )
ELSE
DLTUB = MIN( DLTUB, TAU )
END IF
*
* Calculate the new step
*
C = W - DELTA( N-1 )*DPSI - DELTA( N )*DPHI
A = ( DELTA( N-1 )+DELTA( N ) )*W -
$ DELTA( N-1 )*DELTA( N )*( DPSI+DPHI )
B = DELTA( N-1 )*DELTA( N )*W
IF( A.GE.ZERO ) THEN
ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
ELSE
ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) )
END IF
*
* Note, eta should be positive if w is negative, and
* eta should be negative otherwise. However,
* if for some reason caused by roundoff, eta*w > 0,
* we simply use one Newton step instead. This way
* will guarantee eta*w < 0.
*
IF( W*ETA.GT.ZERO )
$ ETA = -W / ( DPSI+DPHI )
TEMP = TAU + ETA
IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN
IF( W.LT.ZERO ) THEN
ETA = ( DLTUB-TAU ) / TWO
ELSE
ETA = ( DLTLB-TAU ) / TWO
END IF
END IF
DO 70 J = 1, N
DELTA( J ) = DELTA( J ) - ETA
70 CONTINUE
*
TAU = TAU + ETA
*
* Evaluate PSI and the derivative DPSI
*
DPSI = ZERO
PSI = ZERO
ERRETM = ZERO
DO 80 J = 1, II
TEMP = Z( J ) / DELTA( J )
PSI = PSI + Z( J )*TEMP
DPSI = DPSI + TEMP*TEMP
ERRETM = ERRETM + PSI
80 CONTINUE
ERRETM = ABS( ERRETM )
*
* Evaluate PHI and the derivative DPHI
*
TEMP = Z( N ) / DELTA( N )
PHI = Z( N )*TEMP
DPHI = TEMP*TEMP
ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +
$ ABS( TAU )*( DPSI+DPHI )
*
W = RHOINV + PHI + PSI
90 CONTINUE
*
* Return with INFO = 1, NITER = MAXIT and not converged
*
INFO = 1
DLAM = D( I ) + TAU
GO TO 250
*
* End for the case I = N
*
ELSE
*
* The case for I < N
*
NITER = 1
IP1 = I + 1
*
* Calculate initial guess
*
DEL = D( IP1 ) - D( I )
MIDPT = DEL / TWO
DO 100 J = 1, N
DELTA( J ) = ( D( J )-D( I ) ) - MIDPT
100 CONTINUE
*
PSI = ZERO
DO 110 J = 1, I - 1
PSI = PSI + Z( J )*Z( J ) / DELTA( J )
110 CONTINUE
*
PHI = ZERO
DO 120 J = N, I + 2, -1
PHI = PHI + Z( J )*Z( J ) / DELTA( J )
120 CONTINUE
C = RHOINV + PSI + PHI
W = C + Z( I )*Z( I ) / DELTA( I ) +
$ Z( IP1 )*Z( IP1 ) / DELTA( IP1 )
*
IF( W.GT.ZERO ) THEN
*
* d(i)< the ith eigenvalue < (d(i)+d(i+1))/2
*
* We choose d(i) as origin.
*
ORGATI = .TRUE.
A = C*DEL + Z( I )*Z( I ) + Z( IP1 )*Z( IP1 )
B = Z( I )*Z( I )*DEL
IF( A.GT.ZERO ) THEN
TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
ELSE
TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
END IF
DLTLB = ZERO
DLTUB = MIDPT
ELSE
*
* (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1)
*
* We choose d(i+1) as origin.
*
ORGATI = .FALSE.
A = C*DEL - Z( I )*Z( I ) - Z( IP1 )*Z( IP1 )
B = Z( IP1 )*Z( IP1 )*DEL
IF( A.LT.ZERO ) THEN
TAU = TWO*B / ( A-SQRT( ABS( A*A+FOUR*B*C ) ) )
ELSE
TAU = -( A+SQRT( ABS( A*A+FOUR*B*C ) ) ) / ( TWO*C )
END IF
DLTLB = -MIDPT
DLTUB = ZERO
END IF
*
IF( ORGATI ) THEN
DO 130 J = 1, N
DELTA( J ) = ( D( J )-D( I ) ) - TAU
130 CONTINUE
ELSE
DO 140 J = 1, N
DELTA( J ) = ( D( J )-D( IP1 ) ) - TAU
140 CONTINUE
END IF
IF( ORGATI ) THEN
II = I
ELSE
II = I + 1
END IF
IIM1 = II - 1
IIP1 = II + 1
*
* Evaluate PSI and the derivative DPSI
*
DPSI = ZERO
PSI = ZERO
ERRETM = ZERO
DO 150 J = 1, IIM1
TEMP = Z( J ) / DELTA( J )
PSI = PSI + Z( J )*TEMP
DPSI = DPSI + TEMP*TEMP
ERRETM = ERRETM + PSI
150 CONTINUE
ERRETM = ABS( ERRETM )
*
* Evaluate PHI and the derivative DPHI
*
DPHI = ZERO
PHI = ZERO
DO 160 J = N, IIP1, -1
TEMP = Z( J ) / DELTA( J )
PHI = PHI + Z( J )*TEMP
DPHI = DPHI + TEMP*TEMP
ERRETM = ERRETM + PHI
160 CONTINUE
*
W = RHOINV + PHI + PSI
*
* W is the value of the secular function with
* its ii-th element removed.
*
SWTCH3 = .FALSE.
IF( ORGATI ) THEN
IF( W.LT.ZERO )
$ SWTCH3 = .TRUE.
ELSE
IF( W.GT.ZERO )
$ SWTCH3 = .TRUE.
END IF
IF( II.EQ.1 .OR. II.EQ.N )
$ SWTCH3 = .FALSE.
*
TEMP = Z( II ) / DELTA( II )
DW = DPSI + DPHI + TEMP*TEMP
TEMP = Z( II )*TEMP
W = W + TEMP
ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV +
$ THREE*ABS( TEMP ) + ABS( TAU )*DW
*
* Test for convergence
*
IF( ABS( W ).LE.EPS*ERRETM ) THEN
IF( ORGATI ) THEN
DLAM = D( I ) + TAU
ELSE
DLAM = D( IP1 ) + TAU
END IF
GO TO 250
END IF
*
IF( W.LE.ZERO ) THEN
DLTLB = MAX( DLTLB, TAU )
ELSE
DLTUB = MIN( DLTUB, TAU )
END IF
*
* Calculate the new step
*
NITER = NITER + 1
IF( .NOT.SWTCH3 ) THEN
IF( ORGATI ) THEN
C = W - DELTA( IP1 )*DW - ( D( I )-D( IP1 ) )*
$ ( Z( I ) / DELTA( I ) )**2
ELSE
C = W - DELTA( I )*DW - ( D( IP1 )-D( I ) )*
$ ( Z( IP1 ) / DELTA( IP1 ) )**2
END IF
A = ( DELTA( I )+DELTA( IP1 ) )*W -
$ DELTA( I )*DELTA( IP1 )*DW
B = DELTA( I )*DELTA( IP1 )*W
IF( C.EQ.ZERO ) THEN
IF( A.EQ.ZERO ) THEN
IF( ORGATI ) THEN
A = Z( I )*Z( I ) + DELTA( IP1 )*DELTA( IP1 )*
$ ( DPSI+DPHI )
ELSE
A = Z( IP1 )*Z( IP1 ) + DELTA( I )*DELTA( I )*
$ ( DPSI+DPHI )
END IF
END IF
ETA = B / A
ELSE IF( A.LE.ZERO ) THEN
ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
ELSE
ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
END IF
ELSE
*
* Interpolation using THREE most relevant poles
*
TEMP = RHOINV + PSI + PHI
IF( ORGATI ) THEN
TEMP1 = Z( IIM1 ) / DELTA( IIM1 )
TEMP1 = TEMP1*TEMP1
C = TEMP - DELTA( IIP1 )*( DPSI+DPHI ) -
$ ( D( IIM1 )-D( IIP1 ) )*TEMP1
ZZ( 1 ) = Z( IIM1 )*Z( IIM1 )
ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )*
$ ( ( DPSI-TEMP1 )+DPHI )
ELSE
TEMP1 = Z( IIP1 ) / DELTA( IIP1 )
TEMP1 = TEMP1*TEMP1
C = TEMP - DELTA( IIM1 )*( DPSI+DPHI ) -
$ ( D( IIP1 )-D( IIM1 ) )*TEMP1
ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )*
$ ( DPSI+( DPHI-TEMP1 ) )
ZZ( 3 ) = Z( IIP1 )*Z( IIP1 )
END IF
ZZ( 2 ) = Z( II )*Z( II )
CALL DLAED6( NITER, ORGATI, C, DELTA( IIM1 ), ZZ, W, ETA,
$ INFO )
IF( INFO.NE.0 )
$ GO TO 250
END IF
*
* Note, eta should be positive if w is negative, and
* eta should be negative otherwise. However,
* if for some reason caused by roundoff, eta*w > 0,
* we simply use one Newton step instead. This way
* will guarantee eta*w < 0.
*
IF( W*ETA.GE.ZERO )
$ ETA = -W / DW
TEMP = TAU + ETA
IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN
IF( W.LT.ZERO ) THEN
ETA = ( DLTUB-TAU ) / TWO
ELSE
ETA = ( DLTLB-TAU ) / TWO
END IF
END IF
*
PREW = W
*
DO 180 J = 1, N
DELTA( J ) = DELTA( J ) - ETA
180 CONTINUE
*
* Evaluate PSI and the derivative DPSI
*
DPSI = ZERO
PSI = ZERO
ERRETM = ZERO
DO 190 J = 1, IIM1
TEMP = Z( J ) / DELTA( J )
PSI = PSI + Z( J )*TEMP
DPSI = DPSI + TEMP*TEMP
ERRETM = ERRETM + PSI
190 CONTINUE
ERRETM = ABS( ERRETM )
*
* Evaluate PHI and the derivative DPHI
*
DPHI = ZERO
PHI = ZERO
DO 200 J = N, IIP1, -1
TEMP = Z( J ) / DELTA( J )
PHI = PHI + Z( J )*TEMP
DPHI = DPHI + TEMP*TEMP
ERRETM = ERRETM + PHI
200 CONTINUE
*
TEMP = Z( II ) / DELTA( II )
DW = DPSI + DPHI + TEMP*TEMP
TEMP = Z( II )*TEMP
W = RHOINV + PHI + PSI + TEMP
ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV +
$ THREE*ABS( TEMP ) + ABS( TAU+ETA )*DW
*
SWTCH = .FALSE.
IF( ORGATI ) THEN
IF( -W.GT.ABS( PREW ) / TEN )
$ SWTCH = .TRUE.
ELSE
IF( W.GT.ABS( PREW ) / TEN )
$ SWTCH = .TRUE.
END IF
*
TAU = TAU + ETA
*
* Main loop to update the values of the array DELTA
*
ITER = NITER + 1
*
DO 240 NITER = ITER, MAXIT
*
* Test for convergence
*
IF( ABS( W ).LE.EPS*ERRETM ) THEN
IF( ORGATI ) THEN
DLAM = D( I ) + TAU
ELSE
DLAM = D( IP1 ) + TAU
END IF
GO TO 250
END IF
*
IF( W.LE.ZERO ) THEN
DLTLB = MAX( DLTLB, TAU )
ELSE
DLTUB = MIN( DLTUB, TAU )
END IF
*
* Calculate the new step
*
IF( .NOT.SWTCH3 ) THEN
IF( .NOT.SWTCH ) THEN
IF( ORGATI ) THEN
C = W - DELTA( IP1 )*DW -
$ ( D( I )-D( IP1 ) )*( Z( I ) / DELTA( I ) )**2
ELSE
C = W - DELTA( I )*DW - ( D( IP1 )-D( I ) )*
$ ( Z( IP1 ) / DELTA( IP1 ) )**2
END IF
ELSE
TEMP = Z( II ) / DELTA( II )
IF( ORGATI ) THEN
DPSI = DPSI + TEMP*TEMP
ELSE
DPHI = DPHI + TEMP*TEMP
END IF
C = W - DELTA( I )*DPSI - DELTA( IP1 )*DPHI
END IF
A = ( DELTA( I )+DELTA( IP1 ) )*W -
$ DELTA( I )*DELTA( IP1 )*DW
B = DELTA( I )*DELTA( IP1 )*W
IF( C.EQ.ZERO ) THEN
IF( A.EQ.ZERO ) THEN
IF( .NOT.SWTCH ) THEN
IF( ORGATI ) THEN
A = Z( I )*Z( I ) + DELTA( IP1 )*
$ DELTA( IP1 )*( DPSI+DPHI )
ELSE
A = Z( IP1 )*Z( IP1 ) +
$ DELTA( I )*DELTA( I )*( DPSI+DPHI )
END IF
ELSE
A = DELTA( I )*DELTA( I )*DPSI +
$ DELTA( IP1 )*DELTA( IP1 )*DPHI
END IF
END IF
ETA = B / A
ELSE IF( A.LE.ZERO ) THEN
ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
ELSE
ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
END IF
ELSE
*
* Interpolation using THREE most relevant poles
*
TEMP = RHOINV + PSI + PHI
IF( SWTCH ) THEN
C = TEMP - DELTA( IIM1 )*DPSI - DELTA( IIP1 )*DPHI
ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )*DPSI
ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )*DPHI
ELSE
IF( ORGATI ) THEN
TEMP1 = Z( IIM1 ) / DELTA( IIM1 )
TEMP1 = TEMP1*TEMP1
C = TEMP - DELTA( IIP1 )*( DPSI+DPHI ) -
$ ( D( IIM1 )-D( IIP1 ) )*TEMP1
ZZ( 1 ) = Z( IIM1 )*Z( IIM1 )
ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )*
$ ( ( DPSI-TEMP1 )+DPHI )
ELSE
TEMP1 = Z( IIP1 ) / DELTA( IIP1 )
TEMP1 = TEMP1*TEMP1
C = TEMP - DELTA( IIM1 )*( DPSI+DPHI ) -
$ ( D( IIP1 )-D( IIM1 ) )*TEMP1
ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )*
$ ( DPSI+( DPHI-TEMP1 ) )
ZZ( 3 ) = Z( IIP1 )*Z( IIP1 )
END IF
END IF
CALL DLAED6( NITER, ORGATI, C, DELTA( IIM1 ), ZZ, W, ETA,
$ INFO )
IF( INFO.NE.0 )
$ GO TO 250
END IF
*
* Note, eta should be positive if w is negative, and
* eta should be negative otherwise. However,
* if for some reason caused by roundoff, eta*w > 0,
* we simply use one Newton step instead. This way
* will guarantee eta*w < 0.
*
IF( W*ETA.GE.ZERO )
$ ETA = -W / DW
TEMP = TAU + ETA
IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN
IF( W.LT.ZERO ) THEN
ETA = ( DLTUB-TAU ) / TWO
ELSE
ETA = ( DLTLB-TAU ) / TWO
END IF
END IF
*
DO 210 J = 1, N
DELTA( J ) = DELTA( J ) - ETA
210 CONTINUE
*
TAU = TAU + ETA
PREW = W
*
* Evaluate PSI and the derivative DPSI
*
DPSI = ZERO
PSI = ZERO
ERRETM = ZERO
DO 220 J = 1, IIM1
TEMP = Z( J ) / DELTA( J )
PSI = PSI + Z( J )*TEMP
DPSI = DPSI + TEMP*TEMP
ERRETM = ERRETM + PSI
220 CONTINUE
ERRETM = ABS( ERRETM )
*
* Evaluate PHI and the derivative DPHI
*
DPHI = ZERO
PHI = ZERO
DO 230 J = N, IIP1, -1
TEMP = Z( J ) / DELTA( J )
PHI = PHI + Z( J )*TEMP
DPHI = DPHI + TEMP*TEMP
ERRETM = ERRETM + PHI
230 CONTINUE
*
TEMP = Z( II ) / DELTA( II )
DW = DPSI + DPHI + TEMP*TEMP
TEMP = Z( II )*TEMP
W = RHOINV + PHI + PSI + TEMP
ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV +
$ THREE*ABS( TEMP ) + ABS( TAU )*DW
IF( W*PREW.GT.ZERO .AND. ABS( W ).GT.ABS( PREW ) / TEN )
$ SWTCH = .NOT.SWTCH
*
240 CONTINUE
*
* Return with INFO = 1, NITER = MAXIT and not converged
*
INFO = 1
IF( ORGATI ) THEN
DLAM = D( I ) + TAU
ELSE
DLAM = D( IP1 ) + TAU
END IF
*
END IF
*
250 CONTINUE
*
RETURN
*
* End of DLAED4
*
END

58
lib/linalg/dlaed5.cpp Normal file
View File

@ -0,0 +1,58 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dlaed5_(integer *i__, doublereal *d__, doublereal *z__, doublereal *delta, doublereal *rho,
doublereal *dlam)
{
doublereal d__1;
double sqrt(doublereal);
doublereal b, c__, w, del, tau, temp;
--delta;
--z__;
--d__;
del = d__[2] - d__[1];
if (*i__ == 1) {
w = *rho * 2. * (z__[2] * z__[2] - z__[1] * z__[1]) / del + 1.;
if (w > 0.) {
b = del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
c__ = *rho * z__[1] * z__[1] * del;
tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1))));
*dlam = d__[1] + tau;
delta[1] = -z__[1] / tau;
delta[2] = z__[2] / (del - tau);
} else {
b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
c__ = *rho * z__[2] * z__[2] * del;
if (b > 0.) {
tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.));
} else {
tau = (b - sqrt(b * b + c__ * 4.)) / 2.;
}
*dlam = d__[2] + tau;
delta[1] = -z__[1] / (del + tau);
delta[2] = -z__[2] / tau;
}
temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
delta[1] /= temp;
delta[2] /= temp;
} else {
b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
c__ = *rho * z__[2] * z__[2] * del;
if (b > 0.) {
tau = (b + sqrt(b * b + c__ * 4.)) / 2.;
} else {
tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.));
}
*dlam = d__[2] + tau;
delta[1] = -z__[1] / (del + tau);
delta[2] = -z__[2] / tau;
temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
delta[1] /= temp;
delta[2] /= temp;
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,186 +0,0 @@
*> \brief \b DLAED5 used by DSTEDC. Solves the 2-by-2 secular equation.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLAED5 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed5.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed5.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed5.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLAED5( I, D, Z, DELTA, RHO, DLAM )
*
* .. Scalar Arguments ..
* INTEGER I
* DOUBLE PRECISION DLAM, RHO
* ..
* .. Array Arguments ..
* DOUBLE PRECISION D( 2 ), DELTA( 2 ), Z( 2 )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> This subroutine computes the I-th eigenvalue of a symmetric rank-one
*> modification of a 2-by-2 diagonal matrix
*>
*> diag( D ) + RHO * Z * transpose(Z) .
*>
*> The diagonal elements in the array D are assumed to satisfy
*>
*> D(i) < D(j) for i < j .
*>
*> We also assume RHO > 0 and that the Euclidean norm of the vector
*> Z is one.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] I
*> \verbatim
*> I is INTEGER
*> The index of the eigenvalue to be computed. I = 1 or I = 2.
*> \endverbatim
*>
*> \param[in] D
*> \verbatim
*> D is DOUBLE PRECISION array, dimension (2)
*> The original eigenvalues. We assume D(1) < D(2).
*> \endverbatim
*>
*> \param[in] Z
*> \verbatim
*> Z is DOUBLE PRECISION array, dimension (2)
*> The components of the updating vector.
*> \endverbatim
*>
*> \param[out] DELTA
*> \verbatim
*> DELTA is DOUBLE PRECISION array, dimension (2)
*> The vector DELTA contains the information necessary
*> to construct the eigenvectors.
*> \endverbatim
*>
*> \param[in] RHO
*> \verbatim
*> RHO is DOUBLE PRECISION
*> The scalar in the symmetric updating formula.
*> \endverbatim
*>
*> \param[out] DLAM
*> \verbatim
*> DLAM is DOUBLE PRECISION
*> The computed lambda_I, the I-th updated eigenvalue.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup auxOTHERcomputational
*
*> \par Contributors:
* ==================
*>
*> Ren-Cang Li, Computer Science Division, University of California
*> at Berkeley, USA
*>
* =====================================================================
SUBROUTINE DLAED5( I, D, Z, DELTA, RHO, DLAM )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER I
DOUBLE PRECISION DLAM, RHO
* ..
* .. Array Arguments ..
DOUBLE PRECISION D( 2 ), DELTA( 2 ), Z( 2 )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO, ONE, TWO, FOUR
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0,
$ FOUR = 4.0D0 )
* ..
* .. Local Scalars ..
DOUBLE PRECISION B, C, DEL, TAU, TEMP, W
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, SQRT
* ..
* .. Executable Statements ..
*
DEL = D( 2 ) - D( 1 )
IF( I.EQ.1 ) THEN
W = ONE + TWO*RHO*( Z( 2 )*Z( 2 )-Z( 1 )*Z( 1 ) ) / DEL
IF( W.GT.ZERO ) THEN
B = DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
C = RHO*Z( 1 )*Z( 1 )*DEL
*
* B > ZERO, always
*
TAU = TWO*C / ( B+SQRT( ABS( B*B-FOUR*C ) ) )
DLAM = D( 1 ) + TAU
DELTA( 1 ) = -Z( 1 ) / TAU
DELTA( 2 ) = Z( 2 ) / ( DEL-TAU )
ELSE
B = -DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
C = RHO*Z( 2 )*Z( 2 )*DEL
IF( B.GT.ZERO ) THEN
TAU = -TWO*C / ( B+SQRT( B*B+FOUR*C ) )
ELSE
TAU = ( B-SQRT( B*B+FOUR*C ) ) / TWO
END IF
DLAM = D( 2 ) + TAU
DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
DELTA( 2 ) = -Z( 2 ) / TAU
END IF
TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
DELTA( 1 ) = DELTA( 1 ) / TEMP
DELTA( 2 ) = DELTA( 2 ) / TEMP
ELSE
*
* Now I=2
*
B = -DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
C = RHO*Z( 2 )*Z( 2 )*DEL
IF( B.GT.ZERO ) THEN
TAU = ( B+SQRT( B*B+FOUR*C ) ) / TWO
ELSE
TAU = TWO*C / ( -B+SQRT( B*B+FOUR*C ) )
END IF
DLAM = D( 2 ) + TAU
DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
DELTA( 2 ) = -Z( 2 ) / TAU
TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
DELTA( 1 ) = DELTA( 1 ) / TEMP
DELTA( 2 ) = DELTA( 2 ) / TEMP
END IF
RETURN
*
* End of DLAED5
*
END

209
lib/linalg/dlaed6.cpp Normal file
View File

@ -0,0 +1,209 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dlaed6_(integer *kniter, logical *orgati, doublereal *rho, doublereal *d__, doublereal *z__,
doublereal *finit, doublereal *tau, integer *info)
{
integer i__1;
doublereal d__1, d__2, d__3, d__4;
double sqrt(doublereal), log(doublereal), pow_lmp_di(doublereal *, integer *);
doublereal a, b, c__, f;
integer i__;
doublereal fc, df, ddf, lbd, eta, ubd, eps, base;
integer iter;
doublereal temp, temp1, temp2, temp3, temp4;
logical scale;
integer niter;
doublereal small1, small2, sminv1, sminv2;
extern doublereal dlamch_(char *, ftnlen);
doublereal dscale[3], sclfac, zscale[3], erretm, sclinv;
--z__;
--d__;
*info = 0;
if (*orgati) {
lbd = d__[2];
ubd = d__[3];
} else {
lbd = d__[1];
ubd = d__[2];
}
if (*finit < 0.) {
lbd = 0.;
} else {
ubd = 0.;
}
niter = 1;
*tau = 0.;
if (*kniter == 2) {
if (*orgati) {
temp = (d__[3] - d__[2]) / 2.;
c__ = *rho + z__[1] / (d__[1] - d__[2] - temp);
a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3];
b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2];
} else {
temp = (d__[1] - d__[2]) / 2.;
c__ = *rho + z__[3] / (d__[3] - d__[2] - temp);
a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2];
b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1];
}
d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1, d__2), d__2 = abs(c__);
temp = max(d__1, d__2);
a /= temp;
b /= temp;
c__ /= temp;
if (c__ == 0.) {
*tau = b / a;
} else if (a <= 0.) {
*tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
} else {
*tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
}
if (*tau < lbd || *tau > ubd) {
*tau = (lbd + ubd) / 2.;
}
if (d__[1] == *tau || d__[2] == *tau || d__[3] == *tau) {
*tau = 0.;
} else {
temp = *finit + *tau * z__[1] / (d__[1] * (d__[1] - *tau)) +
*tau * z__[2] / (d__[2] * (d__[2] - *tau)) +
*tau * z__[3] / (d__[3] * (d__[3] - *tau));
if (temp <= 0.) {
lbd = *tau;
} else {
ubd = *tau;
}
if (abs(*finit) <= abs(temp)) {
*tau = 0.;
}
}
}
eps = dlamch_((char *)"Epsilon", (ftnlen)7);
base = dlamch_((char *)"Base", (ftnlen)4);
i__1 = (integer)(log(dlamch_((char *)"SafMin", (ftnlen)6)) / log(base) / 3.);
small1 = pow_lmp_di(&base, &i__1);
sminv1 = 1. / small1;
small2 = small1 * small1;
sminv2 = sminv1 * sminv1;
if (*orgati) {
d__3 = (d__1 = d__[2] - *tau, abs(d__1)), d__4 = (d__2 = d__[3] - *tau, abs(d__2));
temp = min(d__3, d__4);
} else {
d__3 = (d__1 = d__[1] - *tau, abs(d__1)), d__4 = (d__2 = d__[2] - *tau, abs(d__2));
temp = min(d__3, d__4);
}
scale = FALSE_;
if (temp <= small1) {
scale = TRUE_;
if (temp <= small2) {
sclfac = sminv2;
sclinv = small2;
} else {
sclfac = sminv1;
sclinv = small1;
}
for (i__ = 1; i__ <= 3; ++i__) {
dscale[i__ - 1] = d__[i__] * sclfac;
zscale[i__ - 1] = z__[i__] * sclfac;
}
*tau *= sclfac;
lbd *= sclfac;
ubd *= sclfac;
} else {
for (i__ = 1; i__ <= 3; ++i__) {
dscale[i__ - 1] = d__[i__];
zscale[i__ - 1] = z__[i__];
}
}
fc = 0.;
df = 0.;
ddf = 0.;
for (i__ = 1; i__ <= 3; ++i__) {
temp = 1. / (dscale[i__ - 1] - *tau);
temp1 = zscale[i__ - 1] * temp;
temp2 = temp1 * temp;
temp3 = temp2 * temp;
fc += temp1 / dscale[i__ - 1];
df += temp2;
ddf += temp3;
}
f = *finit + *tau * fc;
if (abs(f) <= 0.) {
goto L60;
}
if (f <= 0.) {
lbd = *tau;
} else {
ubd = *tau;
}
iter = niter + 1;
for (niter = iter; niter <= 40; ++niter) {
if (*orgati) {
temp1 = dscale[1] - *tau;
temp2 = dscale[2] - *tau;
} else {
temp1 = dscale[0] - *tau;
temp2 = dscale[1] - *tau;
}
a = (temp1 + temp2) * f - temp1 * temp2 * df;
b = temp1 * temp2 * f;
c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf;
d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1, d__2), d__2 = abs(c__);
temp = max(d__1, d__2);
a /= temp;
b /= temp;
c__ /= temp;
if (c__ == 0.) {
eta = b / a;
} else if (a <= 0.) {
eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.);
} else {
eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))));
}
if (f * eta >= 0.) {
eta = -f / df;
}
*tau += eta;
if (*tau < lbd || *tau > ubd) {
*tau = (lbd + ubd) / 2.;
}
fc = 0.;
erretm = 0.;
df = 0.;
ddf = 0.;
for (i__ = 1; i__ <= 3; ++i__) {
if (dscale[i__ - 1] - *tau != 0.) {
temp = 1. / (dscale[i__ - 1] - *tau);
temp1 = zscale[i__ - 1] * temp;
temp2 = temp1 * temp;
temp3 = temp2 * temp;
temp4 = temp1 / dscale[i__ - 1];
fc += temp4;
erretm += abs(temp4);
df += temp2;
ddf += temp3;
} else {
goto L60;
}
}
f = *finit + *tau * fc;
erretm = (abs(*finit) + abs(*tau) * erretm) * 8. + abs(*tau) * df;
if (abs(f) <= eps * 4. * erretm || ubd - lbd <= eps * 4. * abs(*tau)) {
goto L60;
}
if (f <= 0.) {
lbd = *tau;
} else {
ubd = *tau;
}
}
*info = 1;
L60:
if (scale) {
*tau *= sclinv;
}
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,407 +0,0 @@
*> \brief \b DLAED6 used by DSTEDC. Computes one Newton step in solution of the secular equation.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLAED6 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed6.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed6.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed6.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLAED6( KNITER, ORGATI, RHO, D, Z, FINIT, TAU, INFO )
*
* .. Scalar Arguments ..
* LOGICAL ORGATI
* INTEGER INFO, KNITER
* DOUBLE PRECISION FINIT, RHO, TAU
* ..
* .. Array Arguments ..
* DOUBLE PRECISION D( 3 ), Z( 3 )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DLAED6 computes the positive or negative root (closest to the origin)
*> of
*> z(1) z(2) z(3)
*> f(x) = rho + --------- + ---------- + ---------
*> d(1)-x d(2)-x d(3)-x
*>
*> It is assumed that
*>
*> if ORGATI = .true. the root is between d(2) and d(3);
*> otherwise it is between d(1) and d(2)
*>
*> This routine will be called by DLAED4 when necessary. In most cases,
*> the root sought is the smallest in magnitude, though it might not be
*> in some extremely rare situations.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] KNITER
*> \verbatim
*> KNITER is INTEGER
*> Refer to DLAED4 for its significance.
*> \endverbatim
*>
*> \param[in] ORGATI
*> \verbatim
*> ORGATI is LOGICAL
*> If ORGATI is true, the needed root is between d(2) and
*> d(3); otherwise it is between d(1) and d(2). See
*> DLAED4 for further details.
*> \endverbatim
*>
*> \param[in] RHO
*> \verbatim
*> RHO is DOUBLE PRECISION
*> Refer to the equation f(x) above.
*> \endverbatim
*>
*> \param[in] D
*> \verbatim
*> D is DOUBLE PRECISION array, dimension (3)
*> D satisfies d(1) < d(2) < d(3).
*> \endverbatim
*>
*> \param[in] Z
*> \verbatim
*> Z is DOUBLE PRECISION array, dimension (3)
*> Each of the elements in z must be positive.
*> \endverbatim
*>
*> \param[in] FINIT
*> \verbatim
*> FINIT is DOUBLE PRECISION
*> The value of f at 0. It is more accurate than the one
*> evaluated inside this routine (if someone wants to do
*> so).
*> \endverbatim
*>
*> \param[out] TAU
*> \verbatim
*> TAU is DOUBLE PRECISION
*> The root of the equation f(x).
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> > 0: if INFO = 1, failure to converge
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup auxOTHERcomputational
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> 10/02/03: This version has a few statements commented out for thread
*> safety (machine parameters are computed on each entry). SJH.
*>
*> 05/10/06: Modified from a new version of Ren-Cang Li, use
*> Gragg-Thornton-Warner cubic convergent scheme for better stability.
*> \endverbatim
*
*> \par Contributors:
* ==================
*>
*> Ren-Cang Li, Computer Science Division, University of California
*> at Berkeley, USA
*>
* =====================================================================
SUBROUTINE DLAED6( KNITER, ORGATI, RHO, D, Z, FINIT, TAU, INFO )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
LOGICAL ORGATI
INTEGER INFO, KNITER
DOUBLE PRECISION FINIT, RHO, TAU
* ..
* .. Array Arguments ..
DOUBLE PRECISION D( 3 ), Z( 3 )
* ..
*
* =====================================================================
*
* .. Parameters ..
INTEGER MAXIT
PARAMETER ( MAXIT = 40 )
DOUBLE PRECISION ZERO, ONE, TWO, THREE, FOUR, EIGHT
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0,
$ THREE = 3.0D0, FOUR = 4.0D0, EIGHT = 8.0D0 )
* ..
* .. External Functions ..
DOUBLE PRECISION DLAMCH
EXTERNAL DLAMCH
* ..
* .. Local Arrays ..
DOUBLE PRECISION DSCALE( 3 ), ZSCALE( 3 )
* ..
* .. Local Scalars ..
LOGICAL SCALE
INTEGER I, ITER, NITER
DOUBLE PRECISION A, B, BASE, C, DDF, DF, EPS, ERRETM, ETA, F,
$ FC, SCLFAC, SCLINV, SMALL1, SMALL2, SMINV1,
$ SMINV2, TEMP, TEMP1, TEMP2, TEMP3, TEMP4,
$ LBD, UBD
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, INT, LOG, MAX, MIN, SQRT
* ..
* .. Executable Statements ..
*
INFO = 0
*
IF( ORGATI ) THEN
LBD = D(2)
UBD = D(3)
ELSE
LBD = D(1)
UBD = D(2)
END IF
IF( FINIT .LT. ZERO )THEN
LBD = ZERO
ELSE
UBD = ZERO
END IF
*
NITER = 1
TAU = ZERO
IF( KNITER.EQ.2 ) THEN
IF( ORGATI ) THEN
TEMP = ( D( 3 )-D( 2 ) ) / TWO
C = RHO + Z( 1 ) / ( ( D( 1 )-D( 2 ) )-TEMP )
A = C*( D( 2 )+D( 3 ) ) + Z( 2 ) + Z( 3 )
B = C*D( 2 )*D( 3 ) + Z( 2 )*D( 3 ) + Z( 3 )*D( 2 )
ELSE
TEMP = ( D( 1 )-D( 2 ) ) / TWO
C = RHO + Z( 3 ) / ( ( D( 3 )-D( 2 ) )-TEMP )
A = C*( D( 1 )+D( 2 ) ) + Z( 1 ) + Z( 2 )
B = C*D( 1 )*D( 2 ) + Z( 1 )*D( 2 ) + Z( 2 )*D( 1 )
END IF
TEMP = MAX( ABS( A ), ABS( B ), ABS( C ) )
A = A / TEMP
B = B / TEMP
C = C / TEMP
IF( C.EQ.ZERO ) THEN
TAU = B / A
ELSE IF( A.LE.ZERO ) THEN
TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
ELSE
TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
END IF
IF( TAU .LT. LBD .OR. TAU .GT. UBD )
$ TAU = ( LBD+UBD )/TWO
IF( D(1).EQ.TAU .OR. D(2).EQ.TAU .OR. D(3).EQ.TAU ) THEN
TAU = ZERO
ELSE
TEMP = FINIT + TAU*Z(1)/( D(1)*( D( 1 )-TAU ) ) +
$ TAU*Z(2)/( D(2)*( D( 2 )-TAU ) ) +
$ TAU*Z(3)/( D(3)*( D( 3 )-TAU ) )
IF( TEMP .LE. ZERO )THEN
LBD = TAU
ELSE
UBD = TAU
END IF
IF( ABS( FINIT ).LE.ABS( TEMP ) )
$ TAU = ZERO
END IF
END IF
*
* get machine parameters for possible scaling to avoid overflow
*
* modified by Sven: parameters SMALL1, SMINV1, SMALL2,
* SMINV2, EPS are not SAVEd anymore between one call to the
* others but recomputed at each call
*
EPS = DLAMCH( 'Epsilon' )
BASE = DLAMCH( 'Base' )
SMALL1 = BASE**( INT( LOG( DLAMCH( 'SafMin' ) ) / LOG( BASE ) /
$ THREE ) )
SMINV1 = ONE / SMALL1
SMALL2 = SMALL1*SMALL1
SMINV2 = SMINV1*SMINV1
*
* Determine if scaling of inputs necessary to avoid overflow
* when computing 1/TEMP**3
*
IF( ORGATI ) THEN
TEMP = MIN( ABS( D( 2 )-TAU ), ABS( D( 3 )-TAU ) )
ELSE
TEMP = MIN( ABS( D( 1 )-TAU ), ABS( D( 2 )-TAU ) )
END IF
SCALE = .FALSE.
IF( TEMP.LE.SMALL1 ) THEN
SCALE = .TRUE.
IF( TEMP.LE.SMALL2 ) THEN
*
* Scale up by power of radix nearest 1/SAFMIN**(2/3)
*
SCLFAC = SMINV2
SCLINV = SMALL2
ELSE
*
* Scale up by power of radix nearest 1/SAFMIN**(1/3)
*
SCLFAC = SMINV1
SCLINV = SMALL1
END IF
*
* Scaling up safe because D, Z, TAU scaled elsewhere to be O(1)
*
DO 10 I = 1, 3
DSCALE( I ) = D( I )*SCLFAC
ZSCALE( I ) = Z( I )*SCLFAC
10 CONTINUE
TAU = TAU*SCLFAC
LBD = LBD*SCLFAC
UBD = UBD*SCLFAC
ELSE
*
* Copy D and Z to DSCALE and ZSCALE
*
DO 20 I = 1, 3
DSCALE( I ) = D( I )
ZSCALE( I ) = Z( I )
20 CONTINUE
END IF
*
FC = ZERO
DF = ZERO
DDF = ZERO
DO 30 I = 1, 3
TEMP = ONE / ( DSCALE( I )-TAU )
TEMP1 = ZSCALE( I )*TEMP
TEMP2 = TEMP1*TEMP
TEMP3 = TEMP2*TEMP
FC = FC + TEMP1 / DSCALE( I )
DF = DF + TEMP2
DDF = DDF + TEMP3
30 CONTINUE
F = FINIT + TAU*FC
*
IF( ABS( F ).LE.ZERO )
$ GO TO 60
IF( F .LE. ZERO )THEN
LBD = TAU
ELSE
UBD = TAU
END IF
*
* Iteration begins -- Use Gragg-Thornton-Warner cubic convergent
* scheme
*
* It is not hard to see that
*
* 1) Iterations will go up monotonically
* if FINIT < 0;
*
* 2) Iterations will go down monotonically
* if FINIT > 0.
*
ITER = NITER + 1
*
DO 50 NITER = ITER, MAXIT
*
IF( ORGATI ) THEN
TEMP1 = DSCALE( 2 ) - TAU
TEMP2 = DSCALE( 3 ) - TAU
ELSE
TEMP1 = DSCALE( 1 ) - TAU
TEMP2 = DSCALE( 2 ) - TAU
END IF
A = ( TEMP1+TEMP2 )*F - TEMP1*TEMP2*DF
B = TEMP1*TEMP2*F
C = F - ( TEMP1+TEMP2 )*DF + TEMP1*TEMP2*DDF
TEMP = MAX( ABS( A ), ABS( B ), ABS( C ) )
A = A / TEMP
B = B / TEMP
C = C / TEMP
IF( C.EQ.ZERO ) THEN
ETA = B / A
ELSE IF( A.LE.ZERO ) THEN
ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
ELSE
ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
END IF
IF( F*ETA.GE.ZERO ) THEN
ETA = -F / DF
END IF
*
TAU = TAU + ETA
IF( TAU .LT. LBD .OR. TAU .GT. UBD )
$ TAU = ( LBD + UBD )/TWO
*
FC = ZERO
ERRETM = ZERO
DF = ZERO
DDF = ZERO
DO 40 I = 1, 3
IF ( ( DSCALE( I )-TAU ).NE.ZERO ) THEN
TEMP = ONE / ( DSCALE( I )-TAU )
TEMP1 = ZSCALE( I )*TEMP
TEMP2 = TEMP1*TEMP
TEMP3 = TEMP2*TEMP
TEMP4 = TEMP1 / DSCALE( I )
FC = FC + TEMP4
ERRETM = ERRETM + ABS( TEMP4 )
DF = DF + TEMP2
DDF = DDF + TEMP3
ELSE
GO TO 60
END IF
40 CONTINUE
F = FINIT + TAU*FC
ERRETM = EIGHT*( ABS( FINIT )+ABS( TAU )*ERRETM ) +
$ ABS( TAU )*DF
IF( ( ABS( F ).LE.FOUR*EPS*ERRETM ) .OR.
$ ( (UBD-LBD).LE.FOUR*EPS*ABS(TAU) ) )
$ GO TO 60
IF( F .LE. ZERO )THEN
LBD = TAU
ELSE
UBD = TAU
END IF
50 CONTINUE
INFO = 1
60 CONTINUE
*
* Undo scaling
*
IF( SCALE )
$ TAU = TAU*SCLINV
RETURN
*
* End of DLAED6
*
END

131
lib/linalg/dlaed7.cpp Normal file
View File

@ -0,0 +1,131 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__2 = 2;
static integer c__1 = 1;
static doublereal c_b10 = 1.;
static doublereal c_b11 = 0.;
static integer c_n1 = -1;
int dlaed7_(integer *icompq, integer *n, integer *qsiz, integer *tlvls, integer *curlvl,
integer *curpbm, doublereal *d__, doublereal *q, integer *ldq, integer *indxq,
doublereal *rho, integer *cutpnt, doublereal *qstore, integer *qptr, integer *prmptr,
integer *perm, integer *givptr, integer *givcol, doublereal *givnum, doublereal *work,
integer *iwork, integer *info)
{
integer q_dim1, q_offset, i__1, i__2;
integer pow_lmp_ii(integer *, integer *);
integer i__, k, n1, n2, is, iw, iz, iq2, ptr, ldq2, indx, curr;
extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
ftnlen, ftnlen);
integer indxc, indxp;
extern int dlaed8_(integer *, integer *, integer *, integer *, doublereal *, doublereal *,
integer *, integer *, doublereal *, integer *, doublereal *, doublereal *,
doublereal *, integer *, doublereal *, integer *, integer *, integer *,
doublereal *, integer *, integer *, integer *),
dlaed9_(integer *, integer *, integer *, integer *, doublereal *, doublereal *, integer *,
doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *),
dlaeda_(integer *, integer *, integer *, integer *, integer *, integer *, integer *,
integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *,
integer *);
integer idlmda;
extern int dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *),
xerbla_(char *, integer *, ftnlen);
integer coltyp;
--d__;
q_dim1 = *ldq;
q_offset = 1 + q_dim1;
q -= q_offset;
--indxq;
--qstore;
--qptr;
--prmptr;
--perm;
--givptr;
givcol -= 3;
givnum -= 3;
--work;
--iwork;
*info = 0;
if (*icompq < 0 || *icompq > 1) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*icompq == 1 && *qsiz < *n) {
*info = -3;
} else if (*ldq < max(1, *n)) {
*info = -9;
} else if (min(1, *n) > *cutpnt || *n < *cutpnt) {
*info = -12;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DLAED7", &i__1, (ftnlen)6);
return 0;
}
if (*n == 0) {
return 0;
}
if (*icompq == 1) {
ldq2 = *qsiz;
} else {
ldq2 = *n;
}
iz = 1;
idlmda = iz + *n;
iw = idlmda + *n;
iq2 = iw + *n;
is = iq2 + *n * ldq2;
indx = 1;
indxc = indx + *n;
coltyp = indxc + *n;
indxp = coltyp + *n;
ptr = pow_lmp_ii(&c__2, tlvls) + 1;
i__1 = *curlvl - 1;
for (i__ = 1; i__ <= i__1; ++i__) {
i__2 = *tlvls - i__;
ptr += pow_lmp_ii(&c__2, &i__2);
}
curr = ptr + *curpbm;
dlaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], &givcol[3], &givnum[3],
&qstore[1], &qptr[1], &work[iz], &work[iz + *n], info);
if (*curlvl == *tlvls) {
qptr[curr] = 1;
prmptr[curr] = 1;
givptr[curr] = 1;
}
dlaed8_(icompq, &k, n, qsiz, &d__[1], &q[q_offset], ldq, &indxq[1], rho, cutpnt, &work[iz],
&work[idlmda], &work[iq2], &ldq2, &work[iw], &perm[prmptr[curr]], &givptr[curr + 1],
&givcol[(givptr[curr] << 1) + 1], &givnum[(givptr[curr] << 1) + 1], &iwork[indxp],
&iwork[indx], info);
prmptr[curr + 1] = prmptr[curr] + *n;
givptr[curr + 1] += givptr[curr];
if (k != 0) {
dlaed9_(&k, &c__1, &k, n, &d__[1], &work[is], &k, rho, &work[idlmda], &work[iw],
&qstore[qptr[curr]], &k, info);
if (*info != 0) {
goto L30;
}
if (*icompq == 1) {
dgemm_((char *)"N", (char *)"N", qsiz, &k, &k, &c_b10, &work[iq2], &ldq2, &qstore[qptr[curr]], &k,
&c_b11, &q[q_offset], ldq, (ftnlen)1, (ftnlen)1);
}
i__1 = k;
qptr[curr + 1] = qptr[curr] + i__1 * i__1;
n1 = k;
n2 = *n - k;
dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
} else {
qptr[curr + 1] = qptr[curr];
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
indxq[i__] = i__;
}
}
L30:
return 0;
}
#ifdef __cplusplus
}
#endif

Some files were not shown because too many files have changed in this diff Show More