diff --git a/lib/linalg/Makefile.gfortran b/lib/linalg/Makefile.gfortran index 28d1eee07f..89b7f2d7a0 100755 --- a/lib/linalg/Makefile.gfortran +++ b/lib/linalg/Makefile.gfortran @@ -1,4 +1,4 @@ -# * +# -*- makefile -*- # *_________________________________________________________________________* # * Minimal BLAS/LAPACK Library for use by other LAMMPS packages @@ -6,12 +6,7 @@ SHELL = /bin/sh # ------ FILES ------ -SRC = dasum.f daxpy.f dcopy.f ddot.f dgecon.f dgemm.f dgemv.f dger.f \ - dgetf2.f dgetrf.f dgetri.f disnan.f dlabad.f dlaisnan.f dlamch.f\ - dlacn2.f dlange.f dlassq.f dlaswp.f dlatrs.f drscl.f dscal.f \ - dswap.f dtrmm.f dtrmv.f dtrsm.f dtrsv.f dtrti2.f dtrtri.f \ - idamax.f ieeeck.f ilaenv.f iparmq.f lsame.f xerbla.f zdotc.f \ - zdscal.f zhpr.f zpptrf.f zpptri.f zscal.f ztpmv.f ztpsv.f ztptri.f +SRC = $(wildcard *.f) FILES = $(SRC) Makefile.* README diff --git a/lib/linalg/Makefile.mingw32-cross b/lib/linalg/Makefile.mingw32-cross index 3facd2b009..02aa3f71a3 100755 --- a/lib/linalg/Makefile.mingw32-cross +++ b/lib/linalg/Makefile.mingw32-cross @@ -1,30 +1,12 @@ -# * +# -*- makefile -*- # *_________________________________________________________________________* -# * Minimal BLAS/LAPACK Library for ATC and AWPMD - -# To compile and link LAMMPS to the linalg library generated by this Makefile, -# adjust the corresponding definitions in the library's Makefile.lammps file. -# -# for lib/atc/Makefile.lammps use: -# user-atc_SYSINC = -# user-atc_SYSLIB = ../../lib/linalg/$(LIBOBJDIR)liblinalg.a -lgfortran -# user-atc_SYSPATH = -# -# for lib/awpmd/Makefile.lammps use: -# user-awpmd_SYSINC = -# user-awpmd_SYSLIB = ../../lib/linalg/$(LIBOBJDIR)liblinalg.a -lgfortran -# user-awpmd_SYSPATH = +# * Minimal BLAS/LAPACK Library for use by other LAMMPS packages SHELL = /bin/sh # ------ FILES ------ -SRC = dasum.f daxpy.f dcopy.f ddot.f dgecon.f dgemm.f dgemv.f dger.f \ - dgetf2.f dgetrf.f dgetri.f disnan.f dlabad.f dlaisnan.f dlamch.f\ - dlacn2.f dlange.f dlassq.f dlaswp.f dlatrs.f drscl.f dscal.f \ - dswap.f dtrmm.f dtrmv.f dtrsm.f dtrsv.f dtrti2.f dtrtri.f \ - idamax.f ieeeck.f ilaenv.f iparmq.f lsame.f xerbla.f zdotc.f \ - zdscal.f zhpr.f zpptrf.f zpptri.f zscal.f ztpmv.f ztpsv.f ztptri.f +SRC = $(wildcard *.f) FILES = $(SRC) Makefile.* README diff --git a/lib/linalg/Makefile.mingw64-cross b/lib/linalg/Makefile.mingw64-cross index fa918a2287..ee6eef819b 100755 --- a/lib/linalg/Makefile.mingw64-cross +++ b/lib/linalg/Makefile.mingw64-cross @@ -1,30 +1,12 @@ -# * +# -*- makefile -*- # *_________________________________________________________________________* -# * Minimal BLAS/LAPACK Library for ATC and AWPMD - -# To compile and link LAMMPS to the linalg library generated by this Makefile, -# adjust the corresponding definitions in the library's Makefile.lammps file. -# -# for lib/atc/Makefile.lammps use: -# user-atc_SYSINC = -# user-atc_SYSLIB = ../../lib/linalg/$(LIBOBJDIR)liblinalg.a -lgfortran -# user-atc_SYSPATH = -# -# for lib/awpmd/Makefile.lammps use: -# user-awpmd_SYSINC = -# user-awpmd_SYSLIB = ../../lib/linalg/$(LIBOBJDIR)liblinalg.a -lgfortran -# user-awpmd_SYSPATH = +# * Minimal BLAS/LAPACK Library for use by other LAMMPS packages SHELL = /bin/sh # ------ FILES ------ -SRC = dasum.f daxpy.f dcopy.f ddot.f dgecon.f dgemm.f dgemv.f dger.f \ - dgetf2.f dgetrf.f dgetri.f disnan.f dlabad.f dlaisnan.f dlamch.f\ - dlacn2.f dlange.f dlassq.f dlaswp.f dlatrs.f drscl.f dscal.f \ - dswap.f dtrmm.f dtrmv.f dtrsm.f dtrsv.f dtrti2.f dtrtri.f \ - idamax.f ieeeck.f ilaenv.f iparmq.f lsame.f xerbla.f zdotc.f \ - zdscal.f zhpr.f zpptrf.f zpptri.f zscal.f ztpmv.f ztpsv.f ztptri.f +SRC = $(wildcard *.f) FILES = $(SRC) Makefile.* README @@ -77,7 +59,7 @@ $(DIR)dlamch.o: dlamch.f # ------ CLEAN ------ clean: - -rm $(DIR)*.o $(DOR)*.mod *~ $(LIB) + -rm $(DIR)*.o $(DIR)*.mod *~ $(LIB) -rmdir $(DIR) tar: diff --git a/lib/linalg/dbdsqr.f b/lib/linalg/dbdsqr.f new file mode 100644 index 0000000000..007e99779b --- /dev/null +++ b/lib/linalg/dbdsqr.f @@ -0,0 +1,850 @@ +*> \brief \b DBDSQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DBDSQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, +* LDU, C, LDC, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( LDC, * ), D( * ), E( * ), U( LDU, * ), +* $ VT( LDVT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DBDSQR computes the singular values and, optionally, the right and/or +*> left singular vectors from the singular value decomposition (SVD) of +*> a real N-by-N (upper or lower) bidiagonal matrix B using the implicit +*> zero-shift QR algorithm. The SVD of B has the form +*> +*> B = Q * S * P**T +*> +*> where S is the diagonal matrix of singular values, Q is an orthogonal +*> matrix of left singular vectors, and P is an orthogonal matrix of +*> right singular vectors. If left singular vectors are requested, this +*> subroutine actually returns U*Q instead of Q, and, if right singular +*> vectors are requested, this subroutine returns P**T*VT instead of +*> P**T, for given real input matrices U and VT. When U and VT are the +*> orthogonal matrices that reduce a general matrix A to bidiagonal +*> form: A = U*B*VT, as computed by DGEBRD, then +*> +*> A = (U*Q) * S * (P**T*VT) +*> +*> is the SVD of A. Optionally, the subroutine may also compute Q**T*C +*> for a given real input matrix C. +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices With +*> Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, +*> LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, +*> no. 5, pp. 873-912, Sept 1990) and +*> "Accurate singular values and differential qd algorithms," by +*> B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics +*> Department, University of California at Berkeley, July 1992 +*> for a detailed description of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': B is upper bidiagonal; +*> = 'L': B is lower bidiagonal. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix B. N >= 0. +*> \endverbatim +*> +*> \param[in] NCVT +*> \verbatim +*> NCVT is INTEGER +*> The number of columns of the matrix VT. NCVT >= 0. +*> \endverbatim +*> +*> \param[in] NRU +*> \verbatim +*> NRU is INTEGER +*> The number of rows of the matrix U. NRU >= 0. +*> \endverbatim +*> +*> \param[in] NCC +*> \verbatim +*> NCC is INTEGER +*> The number of columns of the matrix C. NCC >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the n diagonal elements of the bidiagonal matrix B. +*> On exit, if INFO=0, the singular values of B in decreasing +*> order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> On entry, the N-1 offdiagonal elements of the bidiagonal +*> matrix B. +*> On exit, if INFO = 0, E is destroyed; if INFO > 0, D and E +*> will contain the diagonal and superdiagonal elements of a +*> bidiagonal matrix orthogonally equivalent to the one given +*> as input. +*> \endverbatim +*> +*> \param[in,out] VT +*> \verbatim +*> VT is DOUBLE PRECISION array, dimension (LDVT, NCVT) +*> On entry, an N-by-NCVT matrix VT. +*> On exit, VT is overwritten by P**T * VT. +*> Not referenced if NCVT = 0. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. +*> LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. +*> \endverbatim +*> +*> \param[in,out] U +*> \verbatim +*> U is DOUBLE PRECISION array, dimension (LDU, N) +*> On entry, an NRU-by-N matrix U. +*> On exit, U is overwritten by U * Q. +*> Not referenced if NRU = 0. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max(1,NRU). +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC, NCC) +*> On entry, an N-by-NCC matrix C. +*> On exit, C is overwritten by Q**T * C. +*> Not referenced if NCC = 0. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. +*> LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0: +*> if NCVT = NRU = NCC = 0, +*> = 1, a split was marked by a positive value in E +*> = 2, current block of Z not diagonalized after 30*N +*> iterations (in inner while loop) +*> = 3, termination criterion of outer while loop not met +*> (program created more than N unreduced blocks) +*> else NCVT = NRU = NCC = 0, +*> the algorithm did not converge; D and E contain the +*> elements of a bidiagonal matrix which is orthogonally +*> similar to the input matrix B; if INFO = i, i +*> elements of E have not converged to zero. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> TOLMUL DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8))) +*> TOLMUL controls the convergence criterion of the QR loop. +*> If it is positive, TOLMUL*EPS is the desired relative +*> precision in the computed singular values. +*> If it is negative, abs(TOLMUL*EPS*sigma_max) is the +*> desired absolute accuracy in the computed singular +*> values (corresponds to relative accuracy +*> abs(TOLMUL*EPS) in the largest singular value. +*> abs(TOLMUL) should be between 1 and 1/EPS, and preferably +*> between 10 (for fast convergence) and .1/EPS +*> (for there to be some accuracy in the results). +*> Default is to lose at either one eighth or 2 of the +*> available decimal digits in each computed singular value +*> (whichever is smaller). +*> +*> MAXITR INTEGER, default = 6 +*> MAXITR controls the maximum number of passes of the +*> algorithm through its inner loop. The algorithms stops +*> (and so fails to converge) if the number of passes +*> through the inner loop exceeds MAXITR*N**2. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE DBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, + $ LDU, C, LDC, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( LDC, * ), D( * ), E( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) + DOUBLE PRECISION NEGONE + PARAMETER ( NEGONE = -1.0D0 ) + DOUBLE PRECISION HNDRTH + PARAMETER ( HNDRTH = 0.01D0 ) + DOUBLE PRECISION TEN + PARAMETER ( TEN = 10.0D0 ) + DOUBLE PRECISION HNDRD + PARAMETER ( HNDRD = 100.0D0 ) + DOUBLE PRECISION MEIGTH + PARAMETER ( MEIGTH = -0.125D0 ) + INTEGER MAXITR + PARAMETER ( MAXITR = 6 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, ROTATE + INTEGER I, IDIR, ISUB, ITER, J, LL, LLL, M, MAXIT, NM1, + $ NM12, NM13, OLDLL, OLDM + DOUBLE PRECISION ABSE, ABSS, COSL, COSR, CS, EPS, F, G, H, MU, + $ OLDCS, OLDSN, R, SHIFT, SIGMN, SIGMX, SINL, + $ SINR, SLL, SMAX, SMIN, SMINL, SMINOA, + $ SN, THRESH, TOL, TOLMUL, UNFL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLARTG, DLAS2, DLASQ1, DLASR, DLASV2, DROT, + $ DSCAL, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, MIN, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LOWER ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NCVT.LT.0 ) THEN + INFO = -3 + ELSE IF( NRU.LT.0 ) THEN + INFO = -4 + ELSE IF( NCC.LT.0 ) THEN + INFO = -5 + ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR. + $ ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN + INFO = -9 + ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN + INFO = -11 + ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR. + $ ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DBDSQR', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN + IF( N.EQ.1 ) + $ GO TO 160 +* +* ROTATE is true if any singular vectors desired, false otherwise +* + ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 ) +* +* If no singular vectors desired, use qd algorithm +* + IF( .NOT.ROTATE ) THEN + CALL DLASQ1( N, D, E, WORK, INFO ) +* +* If INFO equals 2, dqds didn't finish, try to finish +* + IF( INFO .NE. 2 ) RETURN + INFO = 0 + END IF +* + NM1 = N - 1 + NM12 = NM1 + NM1 + NM13 = NM12 + NM1 + IDIR = 0 +* +* Get machine constants +* + EPS = DLAMCH( 'Epsilon' ) + UNFL = DLAMCH( 'Safe minimum' ) +* +* If matrix lower bidiagonal, rotate to be upper bidiagonal +* by applying Givens rotations on the left +* + IF( LOWER ) THEN + DO 10 I = 1, N - 1 + CALL DLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + WORK( I ) = CS + WORK( NM1+I ) = SN + 10 CONTINUE +* +* Update singular vectors if desired +* + IF( NRU.GT.0 ) + $ CALL DLASR( 'R', 'V', 'F', NRU, N, WORK( 1 ), WORK( N ), U, + $ LDU ) + IF( NCC.GT.0 ) + $ CALL DLASR( 'L', 'V', 'F', N, NCC, WORK( 1 ), WORK( N ), C, + $ LDC ) + END IF +* +* Compute singular values to relative accuracy TOL +* (By setting TOL to be negative, algorithm will compute +* singular values to absolute accuracy ABS(TOL)*norm(input matrix)) +* + TOLMUL = MAX( TEN, MIN( HNDRD, EPS**MEIGTH ) ) + TOL = TOLMUL*EPS +* +* Compute approximate maximum, minimum singular values +* + SMAX = ZERO + DO 20 I = 1, N + SMAX = MAX( SMAX, ABS( D( I ) ) ) + 20 CONTINUE + DO 30 I = 1, N - 1 + SMAX = MAX( SMAX, ABS( E( I ) ) ) + 30 CONTINUE + SMINL = ZERO + IF( TOL.GE.ZERO ) THEN +* +* Relative accuracy desired +* + SMINOA = ABS( D( 1 ) ) + IF( SMINOA.EQ.ZERO ) + $ GO TO 50 + MU = SMINOA + DO 40 I = 2, N + MU = ABS( D( I ) )*( MU / ( MU+ABS( E( I-1 ) ) ) ) + SMINOA = MIN( SMINOA, MU ) + IF( SMINOA.EQ.ZERO ) + $ GO TO 50 + 40 CONTINUE + 50 CONTINUE + SMINOA = SMINOA / SQRT( DBLE( N ) ) + THRESH = MAX( TOL*SMINOA, MAXITR*N*N*UNFL ) + ELSE +* +* Absolute accuracy desired +* + THRESH = MAX( ABS( TOL )*SMAX, MAXITR*N*N*UNFL ) + END IF +* +* Prepare for main iteration loop for the singular values +* (MAXIT is the maximum number of passes through the inner +* loop permitted before nonconvergence signalled.) +* + MAXIT = MAXITR*N*N + ITER = 0 + OLDLL = -1 + OLDM = -1 +* +* M points to last element of unconverged part of matrix +* + M = N +* +* Begin main iteration loop +* + 60 CONTINUE +* +* Check for convergence or exceeding iteration count +* + IF( M.LE.1 ) + $ GO TO 160 + IF( ITER.GT.MAXIT ) + $ GO TO 200 +* +* Find diagonal block of matrix to work on +* + IF( TOL.LT.ZERO .AND. ABS( D( M ) ).LE.THRESH ) + $ D( M ) = ZERO + SMAX = ABS( D( M ) ) + SMIN = SMAX + DO 70 LLL = 1, M - 1 + LL = M - LLL + ABSS = ABS( D( LL ) ) + ABSE = ABS( E( LL ) ) + IF( TOL.LT.ZERO .AND. ABSS.LE.THRESH ) + $ D( LL ) = ZERO + IF( ABSE.LE.THRESH ) + $ GO TO 80 + SMIN = MIN( SMIN, ABSS ) + SMAX = MAX( SMAX, ABSS, ABSE ) + 70 CONTINUE + LL = 0 + GO TO 90 + 80 CONTINUE + E( LL ) = ZERO +* +* Matrix splits since E(LL) = 0 +* + IF( LL.EQ.M-1 ) THEN +* +* Convergence of bottom singular value, return to top of loop +* + M = M - 1 + GO TO 60 + END IF + 90 CONTINUE + LL = LL + 1 +* +* E(LL) through E(M-1) are nonzero, E(LL-1) is zero +* + IF( LL.EQ.M-1 ) THEN +* +* 2 by 2 block, handle separately +* + CALL DLASV2( D( M-1 ), E( M-1 ), D( M ), SIGMN, SIGMX, SINR, + $ COSR, SINL, COSL ) + D( M-1 ) = SIGMX + E( M-1 ) = ZERO + D( M ) = SIGMN +* +* Compute singular vectors, if desired +* + IF( NCVT.GT.0 ) + $ CALL DROT( NCVT, VT( M-1, 1 ), LDVT, VT( M, 1 ), LDVT, COSR, + $ SINR ) + IF( NRU.GT.0 ) + $ CALL DROT( NRU, U( 1, M-1 ), 1, U( 1, M ), 1, COSL, SINL ) + IF( NCC.GT.0 ) + $ CALL DROT( NCC, C( M-1, 1 ), LDC, C( M, 1 ), LDC, COSL, + $ SINL ) + M = M - 2 + GO TO 60 + END IF +* +* If working on new submatrix, choose shift direction +* (from larger end diagonal element towards smaller) +* + IF( LL.GT.OLDM .OR. M.LT.OLDLL ) THEN + IF( ABS( D( LL ) ).GE.ABS( D( M ) ) ) THEN +* +* Chase bulge from top (big end) to bottom (small end) +* + IDIR = 1 + ELSE +* +* Chase bulge from bottom (big end) to top (small end) +* + IDIR = 2 + END IF + END IF +* +* Apply convergence tests +* + IF( IDIR.EQ.1 ) THEN +* +* Run convergence test in forward direction +* First apply standard test to bottom of matrix +* + IF( ABS( E( M-1 ) ).LE.ABS( TOL )*ABS( D( M ) ) .OR. + $ ( TOL.LT.ZERO .AND. ABS( E( M-1 ) ).LE.THRESH ) ) THEN + E( M-1 ) = ZERO + GO TO 60 + END IF +* + IF( TOL.GE.ZERO ) THEN +* +* If relative accuracy desired, +* apply convergence criterion forward +* + MU = ABS( D( LL ) ) + SMINL = MU + DO 100 LLL = LL, M - 1 + IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN + E( LLL ) = ZERO + GO TO 60 + END IF + MU = ABS( D( LLL+1 ) )*( MU / ( MU+ABS( E( LLL ) ) ) ) + SMINL = MIN( SMINL, MU ) + 100 CONTINUE + END IF +* + ELSE +* +* Run convergence test in backward direction +* First apply standard test to top of matrix +* + IF( ABS( E( LL ) ).LE.ABS( TOL )*ABS( D( LL ) ) .OR. + $ ( TOL.LT.ZERO .AND. ABS( E( LL ) ).LE.THRESH ) ) THEN + E( LL ) = ZERO + GO TO 60 + END IF +* + IF( TOL.GE.ZERO ) THEN +* +* If relative accuracy desired, +* apply convergence criterion backward +* + MU = ABS( D( M ) ) + SMINL = MU + DO 110 LLL = M - 1, LL, -1 + IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN + E( LLL ) = ZERO + GO TO 60 + END IF + MU = ABS( D( LLL ) )*( MU / ( MU+ABS( E( LLL ) ) ) ) + SMINL = MIN( SMINL, MU ) + 110 CONTINUE + END IF + END IF + OLDLL = LL + OLDM = M +* +* Compute shift. First, test if shifting would ruin relative +* accuracy, and if so set the shift to zero. +* + IF( TOL.GE.ZERO .AND. N*TOL*( SMINL / SMAX ).LE. + $ MAX( EPS, HNDRTH*TOL ) ) THEN +* +* Use a zero shift to avoid loss of relative accuracy +* + SHIFT = ZERO + ELSE +* +* Compute the shift from 2-by-2 block at end of matrix +* + IF( IDIR.EQ.1 ) THEN + SLL = ABS( D( LL ) ) + CALL DLAS2( D( M-1 ), E( M-1 ), D( M ), SHIFT, R ) + ELSE + SLL = ABS( D( M ) ) + CALL DLAS2( D( LL ), E( LL ), D( LL+1 ), SHIFT, R ) + END IF +* +* Test if shift negligible, and if so set to zero +* + IF( SLL.GT.ZERO ) THEN + IF( ( SHIFT / SLL )**2.LT.EPS ) + $ SHIFT = ZERO + END IF + END IF +* +* Increment iteration count +* + ITER = ITER + M - LL +* +* If SHIFT = 0, do simplified QR iteration +* + IF( SHIFT.EQ.ZERO ) THEN + IF( IDIR.EQ.1 ) THEN +* +* Chase bulge from top to bottom +* Save cosines and sines for later singular vector updates +* + CS = ONE + OLDCS = ONE + DO 120 I = LL, M - 1 + CALL DLARTG( D( I )*CS, E( I ), CS, SN, R ) + IF( I.GT.LL ) + $ E( I-1 ) = OLDSN*R + CALL DLARTG( OLDCS*R, D( I+1 )*SN, OLDCS, OLDSN, D( I ) ) + WORK( I-LL+1 ) = CS + WORK( I-LL+1+NM1 ) = SN + WORK( I-LL+1+NM12 ) = OLDCS + WORK( I-LL+1+NM13 ) = OLDSN + 120 CONTINUE + H = D( M )*CS + D( M ) = H*OLDCS + E( M-1 ) = H*OLDSN +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL DLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ), + $ WORK( N ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL DLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ), + $ WORK( NM13+1 ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL DLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ), + $ WORK( NM13+1 ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( M-1 ) ).LE.THRESH ) + $ E( M-1 ) = ZERO +* + ELSE +* +* Chase bulge from bottom to top +* Save cosines and sines for later singular vector updates +* + CS = ONE + OLDCS = ONE + DO 130 I = M, LL + 1, -1 + CALL DLARTG( D( I )*CS, E( I-1 ), CS, SN, R ) + IF( I.LT.M ) + $ E( I ) = OLDSN*R + CALL DLARTG( OLDCS*R, D( I-1 )*SN, OLDCS, OLDSN, D( I ) ) + WORK( I-LL ) = CS + WORK( I-LL+NM1 ) = -SN + WORK( I-LL+NM12 ) = OLDCS + WORK( I-LL+NM13 ) = -OLDSN + 130 CONTINUE + H = D( LL )*CS + D( LL ) = H*OLDCS + E( LL ) = H*OLDSN +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL DLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ), + $ WORK( NM13+1 ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL DLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ), + $ WORK( N ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL DLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ), + $ WORK( N ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( LL ) ).LE.THRESH ) + $ E( LL ) = ZERO + END IF + ELSE +* +* Use nonzero shift +* + IF( IDIR.EQ.1 ) THEN +* +* Chase bulge from top to bottom +* Save cosines and sines for later singular vector updates +* + F = ( ABS( D( LL ) )-SHIFT )* + $ ( SIGN( ONE, D( LL ) )+SHIFT / D( LL ) ) + G = E( LL ) + DO 140 I = LL, M - 1 + CALL DLARTG( F, G, COSR, SINR, R ) + IF( I.GT.LL ) + $ E( I-1 ) = R + F = COSR*D( I ) + SINR*E( I ) + E( I ) = COSR*E( I ) - SINR*D( I ) + G = SINR*D( I+1 ) + D( I+1 ) = COSR*D( I+1 ) + CALL DLARTG( F, G, COSL, SINL, R ) + D( I ) = R + F = COSL*E( I ) + SINL*D( I+1 ) + D( I+1 ) = COSL*D( I+1 ) - SINL*E( I ) + IF( I.LT.M-1 ) THEN + G = SINL*E( I+1 ) + E( I+1 ) = COSL*E( I+1 ) + END IF + WORK( I-LL+1 ) = COSR + WORK( I-LL+1+NM1 ) = SINR + WORK( I-LL+1+NM12 ) = COSL + WORK( I-LL+1+NM13 ) = SINL + 140 CONTINUE + E( M-1 ) = F +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL DLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ), + $ WORK( N ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL DLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ), + $ WORK( NM13+1 ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL DLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ), + $ WORK( NM13+1 ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( M-1 ) ).LE.THRESH ) + $ E( M-1 ) = ZERO +* + ELSE +* +* Chase bulge from bottom to top +* Save cosines and sines for later singular vector updates +* + F = ( ABS( D( M ) )-SHIFT )*( SIGN( ONE, D( M ) )+SHIFT / + $ D( M ) ) + G = E( M-1 ) + DO 150 I = M, LL + 1, -1 + CALL DLARTG( F, G, COSR, SINR, R ) + IF( I.LT.M ) + $ E( I ) = R + F = COSR*D( I ) + SINR*E( I-1 ) + E( I-1 ) = COSR*E( I-1 ) - SINR*D( I ) + G = SINR*D( I-1 ) + D( I-1 ) = COSR*D( I-1 ) + CALL DLARTG( F, G, COSL, SINL, R ) + D( I ) = R + F = COSL*E( I-1 ) + SINL*D( I-1 ) + D( I-1 ) = COSL*D( I-1 ) - SINL*E( I-1 ) + IF( I.GT.LL+1 ) THEN + G = SINL*E( I-2 ) + E( I-2 ) = COSL*E( I-2 ) + END IF + WORK( I-LL ) = COSR + WORK( I-LL+NM1 ) = -SINR + WORK( I-LL+NM12 ) = COSL + WORK( I-LL+NM13 ) = -SINL + 150 CONTINUE + E( LL ) = F +* +* Test convergence +* + IF( ABS( E( LL ) ).LE.THRESH ) + $ E( LL ) = ZERO +* +* Update singular vectors if desired +* + IF( NCVT.GT.0 ) + $ CALL DLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ), + $ WORK( NM13+1 ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL DLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ), + $ WORK( N ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL DLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ), + $ WORK( N ), C( LL, 1 ), LDC ) + END IF + END IF +* +* QR iteration finished, go back and check convergence +* + GO TO 60 +* +* All singular values converged, so make them positive +* + 160 CONTINUE + DO 170 I = 1, N + IF( D( I ).LT.ZERO ) THEN + D( I ) = -D( I ) +* +* Change sign of singular vectors, if desired +* + IF( NCVT.GT.0 ) + $ CALL DSCAL( NCVT, NEGONE, VT( I, 1 ), LDVT ) + END IF + 170 CONTINUE +* +* Sort the singular values into decreasing order (insertion sort on +* singular values, but only one transposition per singular vector) +* + DO 190 I = 1, N - 1 +* +* Scan for smallest D(I) +* + ISUB = 1 + SMIN = D( 1 ) + DO 180 J = 2, N + 1 - I + IF( D( J ).LE.SMIN ) THEN + ISUB = J + SMIN = D( J ) + END IF + 180 CONTINUE + IF( ISUB.NE.N+1-I ) THEN +* +* Swap singular values and vectors +* + D( ISUB ) = D( N+1-I ) + D( N+1-I ) = SMIN + IF( NCVT.GT.0 ) + $ CALL DSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( N+1-I, 1 ), + $ LDVT ) + IF( NRU.GT.0 ) + $ CALL DSWAP( NRU, U( 1, ISUB ), 1, U( 1, N+1-I ), 1 ) + IF( NCC.GT.0 ) + $ CALL DSWAP( NCC, C( ISUB, 1 ), LDC, C( N+1-I, 1 ), LDC ) + END IF + 190 CONTINUE + GO TO 220 +* +* Maximum number of iterations exceeded, failure to converge +* + 200 CONTINUE + INFO = 0 + DO 210 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 210 CONTINUE + 220 CONTINUE + RETURN +* +* End of DBDSQR +* + END diff --git a/lib/linalg/dgebd2.f b/lib/linalg/dgebd2.f new file mode 100644 index 0000000000..4b4dcc9641 --- /dev/null +++ b/lib/linalg/dgebd2.f @@ -0,0 +1,320 @@ +*> \brief \b DGEBD2 reduces a general matrix to bidiagonal form using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEBD2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ), +* $ TAUQ( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEBD2 reduces a real general m by n matrix A to upper or lower +*> bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. +*> +*> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows in the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns in the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the m by n general matrix to be reduced. +*> On exit, +*> if m >= n, the diagonal and the first superdiagonal are +*> overwritten with the upper bidiagonal matrix B; the +*> elements below the diagonal, with the array TAUQ, represent +*> the orthogonal matrix Q as a product of elementary +*> reflectors, and the elements above the first superdiagonal, +*> with the array TAUP, represent the orthogonal matrix P as +*> a product of elementary reflectors; +*> if m < n, the diagonal and the first subdiagonal are +*> overwritten with the lower bidiagonal matrix B; the +*> elements below the first subdiagonal, with the array TAUQ, +*> represent the orthogonal matrix Q as a product of +*> elementary reflectors, and the elements above the diagonal, +*> with the array TAUP, represent the orthogonal matrix P as +*> a product of elementary reflectors. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (min(M,N)) +*> The diagonal elements of the bidiagonal matrix B: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (min(M,N)-1) +*> The off-diagonal elements of the bidiagonal matrix B: +*> if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; +*> if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. +*> \endverbatim +*> +*> \param[out] TAUQ +*> \verbatim +*> TAUQ is DOUBLE PRECISION array dimension (min(M,N)) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix Q. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP +*> \verbatim +*> TAUP is DOUBLE PRECISION array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix P. See Further Details. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (max(M,N)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrices Q and P are represented as products of elementary +*> reflectors: +*> +*> If m >= n, +*> +*> Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) +*> +*> Each H(i) and G(i) has the form: +*> +*> H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T +*> +*> where tauq and taup are real scalars, and v and u are real vectors; +*> v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); +*> u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); +*> tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> If m < n, +*> +*> Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) +*> +*> Each H(i) and G(i) has the form: +*> +*> H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T +*> +*> where tauq and taup are real scalars, and v and u are real vectors; +*> v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); +*> u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); +*> tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> The contents of A on exit are illustrated by the following examples: +*> +*> m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +*> +*> ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) +*> ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) +*> ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) +*> ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) +*> ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) +*> ( v1 v2 v3 v4 v5 ) +*> +*> where d and e denote diagonal and off-diagonal elements of B, vi +*> denotes an element of the vector defining H(i), and ui an element of +*> the vector defining G(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ), + $ TAUQ( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.LT.0 ) THEN + CALL XERBLA( 'DGEBD2', -INFO ) + RETURN + END IF +* + IF( M.GE.N ) THEN +* +* Reduce to upper bidiagonal form +* + DO 10 I = 1, N +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAUQ( I ) ) + D( I ) = A( I, I ) + A( I, I ) = ONE +* +* Apply H(i) to A(i:m,i+1:n) from the left +* + IF( I.LT.N ) + $ CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAUQ( I ), + $ A( I, I+1 ), LDA, WORK ) + A( I, I ) = D( I ) +* + IF( I.LT.N ) THEN +* +* Generate elementary reflector G(i) to annihilate +* A(i,i+2:n) +* + CALL DLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ), + $ LDA, TAUP( I ) ) + E( I ) = A( I, I+1 ) + A( I, I+1 ) = ONE +* +* Apply G(i) to A(i+1:m,i+1:n) from the right +* + CALL DLARF( 'Right', M-I, N-I, A( I, I+1 ), LDA, + $ TAUP( I ), A( I+1, I+1 ), LDA, WORK ) + A( I, I+1 ) = E( I ) + ELSE + TAUP( I ) = ZERO + END IF + 10 CONTINUE + ELSE +* +* Reduce to lower bidiagonal form +* + DO 20 I = 1, M +* +* Generate elementary reflector G(i) to annihilate A(i,i+1:n) +* + CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA, + $ TAUP( I ) ) + D( I ) = A( I, I ) + A( I, I ) = ONE +* +* Apply G(i) to A(i+1:m,i:n) from the right +* + IF( I.LT.M ) + $ CALL DLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ TAUP( I ), A( I+1, I ), LDA, WORK ) + A( I, I ) = D( I ) +* + IF( I.LT.M ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(i+2:m,i) +* + CALL DLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1, + $ TAUQ( I ) ) + E( I ) = A( I+1, I ) + A( I+1, I ) = ONE +* +* Apply H(i) to A(i+1:m,i+1:n) from the left +* + CALL DLARF( 'Left', M-I, N-I, A( I+1, I ), 1, TAUQ( I ), + $ A( I+1, I+1 ), LDA, WORK ) + A( I+1, I ) = E( I ) + ELSE + TAUQ( I ) = ZERO + END IF + 20 CONTINUE + END IF + RETURN +* +* End of DGEBD2 +* + END diff --git a/lib/linalg/dgebrd.f b/lib/linalg/dgebrd.f new file mode 100644 index 0000000000..6cb61f002f --- /dev/null +++ b/lib/linalg/dgebrd.f @@ -0,0 +1,353 @@ +*> \brief \b DGEBRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEBRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ), +* $ TAUQ( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEBRD reduces a general real M-by-N matrix A to upper or lower +*> bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. +*> +*> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows in the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns in the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N general matrix to be reduced. +*> On exit, +*> if m >= n, the diagonal and the first superdiagonal are +*> overwritten with the upper bidiagonal matrix B; the +*> elements below the diagonal, with the array TAUQ, represent +*> the orthogonal matrix Q as a product of elementary +*> reflectors, and the elements above the first superdiagonal, +*> with the array TAUP, represent the orthogonal matrix P as +*> a product of elementary reflectors; +*> if m < n, the diagonal and the first subdiagonal are +*> overwritten with the lower bidiagonal matrix B; the +*> elements below the first subdiagonal, with the array TAUQ, +*> represent the orthogonal matrix Q as a product of +*> elementary reflectors, and the elements above the diagonal, +*> with the array TAUP, represent the orthogonal matrix P as +*> a product of elementary reflectors. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (min(M,N)) +*> The diagonal elements of the bidiagonal matrix B: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (min(M,N)-1) +*> The off-diagonal elements of the bidiagonal matrix B: +*> if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; +*> if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. +*> \endverbatim +*> +*> \param[out] TAUQ +*> \verbatim +*> TAUQ is DOUBLE PRECISION array dimension (min(M,N)) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix Q. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP +*> \verbatim +*> TAUP is DOUBLE PRECISION array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix P. See Further Details. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,M,N). +*> For optimum performance LWORK >= (M+N)*NB, where NB +*> is the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrices Q and P are represented as products of elementary +*> reflectors: +*> +*> If m >= n, +*> +*> Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) +*> +*> Each H(i) and G(i) has the form: +*> +*> H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T +*> +*> where tauq and taup are real scalars, and v and u are real vectors; +*> v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); +*> u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); +*> tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> If m < n, +*> +*> Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) +*> +*> Each H(i) and G(i) has the form: +*> +*> H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T +*> +*> where tauq and taup are real scalars, and v and u are real vectors; +*> v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); +*> u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); +*> tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> The contents of A on exit are illustrated by the following examples: +*> +*> m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +*> +*> ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) +*> ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) +*> ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) +*> ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) +*> ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) +*> ( v1 v2 v3 v4 v5 ) +*> +*> where d and e denote diagonal and off-diagonal elements of B, vi +*> denotes an element of the vector defining H(i), and ui an element of +*> the vector defining G(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ), + $ TAUQ( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IINFO, J, LDWRKX, LDWRKY, LWKOPT, MINMN, NB, + $ NBMIN, NX + DOUBLE PRECISION WS +* .. +* .. External Subroutines .. + EXTERNAL DGEBD2, DGEMM, DLABRD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB = MAX( 1, ILAENV( 1, 'DGEBRD', ' ', M, N, -1, -1 ) ) + LWKOPT = ( M+N )*NB + WORK( 1 ) = DBLE( LWKOPT ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M, N ) .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + IF( INFO.LT.0 ) THEN + CALL XERBLA( 'DGEBRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + MINMN = MIN( M, N ) + IF( MINMN.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + WS = MAX( M, N ) + LDWRKX = M + LDWRKY = N +* + IF( NB.GT.1 .AND. NB.LT.MINMN ) THEN +* +* Set the crossover point NX. +* + NX = MAX( NB, ILAENV( 3, 'DGEBRD', ' ', M, N, -1, -1 ) ) +* +* Determine when to switch from blocked to unblocked code. +* + IF( NX.LT.MINMN ) THEN + WS = ( M+N )*NB + IF( LWORK.LT.WS ) THEN +* +* Not enough work space for the optimal NB, consider using +* a smaller block size. +* + NBMIN = ILAENV( 2, 'DGEBRD', ' ', M, N, -1, -1 ) + IF( LWORK.GE.( M+N )*NBMIN ) THEN + NB = LWORK / ( M+N ) + ELSE + NB = 1 + NX = MINMN + END IF + END IF + END IF + ELSE + NX = MINMN + END IF +* + DO 30 I = 1, MINMN - NX, NB +* +* Reduce rows and columns i:i+nb-1 to bidiagonal form and return +* the matrices X and Y which are needed to update the unreduced +* part of the matrix +* + CALL DLABRD( M-I+1, N-I+1, NB, A( I, I ), LDA, D( I ), E( I ), + $ TAUQ( I ), TAUP( I ), WORK, LDWRKX, + $ WORK( LDWRKX*NB+1 ), LDWRKY ) +* +* Update the trailing submatrix A(i+nb:m,i+nb:n), using an update +* of the form A := A - V*Y**T - X*U**T +* + CALL DGEMM( 'No transpose', 'Transpose', M-I-NB+1, N-I-NB+1, + $ NB, -ONE, A( I+NB, I ), LDA, + $ WORK( LDWRKX*NB+NB+1 ), LDWRKY, ONE, + $ A( I+NB, I+NB ), LDA ) + CALL DGEMM( 'No transpose', 'No transpose', M-I-NB+1, N-I-NB+1, + $ NB, -ONE, WORK( NB+1 ), LDWRKX, A( I, I+NB ), LDA, + $ ONE, A( I+NB, I+NB ), LDA ) +* +* Copy diagonal and off-diagonal elements of B back into A +* + IF( M.GE.N ) THEN + DO 10 J = I, I + NB - 1 + A( J, J ) = D( J ) + A( J, J+1 ) = E( J ) + 10 CONTINUE + ELSE + DO 20 J = I, I + NB - 1 + A( J, J ) = D( J ) + A( J+1, J ) = E( J ) + 20 CONTINUE + END IF + 30 CONTINUE +* +* Use unblocked code to reduce the remainder of the matrix +* + CALL DGEBD2( M-I+1, N-I+1, A( I, I ), LDA, D( I ), E( I ), + $ TAUQ( I ), TAUP( I ), WORK, IINFO ) + WORK( 1 ) = WS + RETURN +* +* End of DGEBRD +* + END diff --git a/lib/linalg/dgelq2.f b/lib/linalg/dgelq2.f new file mode 100644 index 0000000000..0d64ba5210 --- /dev/null +++ b/lib/linalg/dgelq2.f @@ -0,0 +1,192 @@ +*> \brief \b DGELQ2 computes the LQ factorization of a general rectangular matrix using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGELQ2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGELQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGELQ2 computes an LQ factorization of a real m by n matrix A: +*> A = L * Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the m by n matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the m by min(m,n) lower trapezoidal matrix L (L is +*> lower triangular if m <= n); the elements above the diagonal, +*> with the array TAU, represent the orthogonal matrix Q as a +*> product of elementary reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), +*> and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGELQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K + DOUBLE PRECISION AII +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGELQ2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i,i+1:n) +* + CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA, + $ TAU( I ) ) + IF( I.LT.M ) THEN +* +* Apply H(i) to A(i+1:m,i:n) from the right +* + AII = A( I, I ) + A( I, I ) = ONE + CALL DLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAU( I ), + $ A( I+1, I ), LDA, WORK ) + A( I, I ) = AII + END IF + 10 CONTINUE + RETURN +* +* End of DGELQ2 +* + END diff --git a/lib/linalg/dgelqf.f b/lib/linalg/dgelqf.f new file mode 100644 index 0000000000..d27b04ab1d --- /dev/null +++ b/lib/linalg/dgelqf.f @@ -0,0 +1,269 @@ +*> \brief \b DGELQF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGELQF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGELQF computes an LQ factorization of a real M-by-N matrix A: +*> A = L * Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the m-by-min(m,n) lower trapezoidal matrix L (L is +*> lower triangular if m <= n); the elements above the diagonal, +*> with the array TAU, represent the orthogonal matrix Q as a +*> product of elementary reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M). +*> For optimum performance LWORK >= M*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), +*> and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL DGELQ2, DLARFB, DLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGELQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'DGELQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DGELQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the LQ factorization of the current block +* A(i:i+ib-1,i:n) +* + CALL DGELQ2( IB, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.M ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL DLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(i+ib:m,i:n) from the right +* + CALL DLARFB( 'Right', 'No transpose', 'Forward', + $ 'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ), + $ LDA, WORK, LDWORK, A( I+IB, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL DGELQ2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of DGELQF +* + END diff --git a/lib/linalg/dgeqr2.f b/lib/linalg/dgeqr2.f new file mode 100644 index 0000000000..8e63db8866 --- /dev/null +++ b/lib/linalg/dgeqr2.f @@ -0,0 +1,192 @@ +*> \brief \b DGEQR2 computes the QR factorization of a general rectangular matrix using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEQR2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEQR2( M, N, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEQR2 computes a QR factorization of a real m by n matrix A: +*> A = Q * R. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the m by n matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(m,n) by n upper trapezoidal matrix R (R is +*> upper triangular if m >= n); the elements below the diagonal, +*> with the array TAU, represent the orthogonal matrix Q as a +*> product of elementary reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +*> and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGEQR2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K + DOUBLE PRECISION AII +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEQR2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAU( I ) ) + IF( I.LT.N ) THEN +* +* Apply H(i) to A(i:m,i+1:n) from the left +* + AII = A( I, I ) + A( I, I ) = ONE + CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK ) + A( I, I ) = AII + END IF + 10 CONTINUE + RETURN +* +* End of DGEQR2 +* + END diff --git a/lib/linalg/dgeqrf.f b/lib/linalg/dgeqrf.f new file mode 100644 index 0000000000..2990257581 --- /dev/null +++ b/lib/linalg/dgeqrf.f @@ -0,0 +1,270 @@ +*> \brief \b DGEQRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEQRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEQRF computes a QR factorization of a real M-by-N matrix A: +*> A = Q * R. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(M,N)-by-N upper trapezoidal matrix R (R is +*> upper triangular if m >= n); the elements below the diagonal, +*> with the array TAU, represent the orthogonal matrix Q as a +*> product of min(m,n) elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimum performance LWORK >= N*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +*> and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL DGEQR2, DLARFB, DLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEQRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'DGEQRF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DGEQRF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the QR factorization of the current block +* A(i:m,i:i+ib-1) +* + CALL DGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL DLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H**T to A(i:m,i+ib:n) from the left +* + CALL DLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL DGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of DGEQRF +* + END diff --git a/lib/linalg/dgesvd.f b/lib/linalg/dgesvd.f new file mode 100644 index 0000000000..898570b669 --- /dev/null +++ b/lib/linalg/dgesvd.f @@ -0,0 +1,3493 @@ +*> \brief DGESVD computes the singular value decomposition (SVD) for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGESVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBU, JOBVT +* INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ), +* $ VT( LDVT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGESVD computes the singular value decomposition (SVD) of a real +*> M-by-N matrix A, optionally computing the left and/or right singular +*> vectors. The SVD is written +*> +*> A = U * SIGMA * transpose(V) +*> +*> where SIGMA is an M-by-N matrix which is zero except for its +*> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and +*> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA +*> are the singular values of A; they are real and non-negative, and +*> are returned in descending order. The first min(m,n) columns of +*> U and V are the left and right singular vectors of A. +*> +*> Note that the routine returns V**T, not V. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> Specifies options for computing all or part of the matrix U: +*> = 'A': all M columns of U are returned in array U: +*> = 'S': the first min(m,n) columns of U (the left singular +*> vectors) are returned in the array U; +*> = 'O': the first min(m,n) columns of U (the left singular +*> vectors) are overwritten on the array A; +*> = 'N': no columns of U (no left singular vectors) are +*> computed. +*> \endverbatim +*> +*> \param[in] JOBVT +*> \verbatim +*> JOBVT is CHARACTER*1 +*> Specifies options for computing all or part of the matrix +*> V**T: +*> = 'A': all N rows of V**T are returned in the array VT; +*> = 'S': the first min(m,n) rows of V**T (the right singular +*> vectors) are returned in the array VT; +*> = 'O': the first min(m,n) rows of V**T (the right singular +*> vectors) are overwritten on the array A; +*> = 'N': no rows of V**T (no right singular vectors) are +*> computed. +*> +*> JOBVT and JOBU cannot both be 'O'. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if JOBU = 'O', A is overwritten with the first min(m,n) +*> columns of U (the left singular vectors, +*> stored columnwise); +*> if JOBVT = 'O', A is overwritten with the first min(m,n) +*> rows of V**T (the right singular vectors, +*> stored rowwise); +*> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A +*> are destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (min(M,N)) +*> The singular values of A, sorted so that S(i) >= S(i+1). +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is DOUBLE PRECISION array, dimension (LDU,UCOL) +*> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. +*> If JOBU = 'A', U contains the M-by-M orthogonal matrix U; +*> if JOBU = 'S', U contains the first min(m,n) columns of U +*> (the left singular vectors, stored columnwise); +*> if JOBU = 'N' or 'O', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= 1; if +*> JOBU = 'S' or 'A', LDU >= M. +*> \endverbatim +*> +*> \param[out] VT +*> \verbatim +*> VT is DOUBLE PRECISION array, dimension (LDVT,N) +*> If JOBVT = 'A', VT contains the N-by-N orthogonal matrix +*> V**T; +*> if JOBVT = 'S', VT contains the first min(m,n) rows of +*> V**T (the right singular vectors, stored rowwise); +*> if JOBVT = 'N' or 'O', VT is not referenced. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. LDVT >= 1; if +*> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK; +*> if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged +*> superdiagonal elements of an upper bidiagonal matrix B +*> whose diagonal is in S (not necessarily sorted). B +*> satisfies A = U * B * VT, so it has the same singular values +*> as A, and singular vectors related by U and VT. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code): +*> - PATH 1 (M much larger than N, JOBU='N') +*> - PATH 1t (N much larger than M, JOBVT='N') +*> LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if DBDSQR did not converge, INFO specifies how many +*> superdiagonals of an intermediate bidiagonal form B +*> did not converge to zero. See the description of WORK +*> above for details. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doubleGEsing +* +* ===================================================================== + SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, + $ VT, LDVT, WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.4.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER JOBU, JOBVT + INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS, + $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS + INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL, + $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU, + $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU, + $ NRVT, WRKBL + INTEGER LWORK_DGEQRF, LWORK_DORGQR_N, LWORK_DORGQR_M, + $ LWORK_DGEBRD, LWORK_DORGBR_P, LWORK_DORGBR_Q, + $ LWORK_DGELQF, LWORK_DORGLQ_N, LWORK_DORGLQ_M + DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DBDSQR, DGEBRD, DGELQF, DGEMM, DGEQRF, DLACPY, + $ DLASCL, DLASET, DORGBR, DORGLQ, DORGQR, DORMBR, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + WNTUA = LSAME( JOBU, 'A' ) + WNTUS = LSAME( JOBU, 'S' ) + WNTUAS = WNTUA .OR. WNTUS + WNTUO = LSAME( JOBU, 'O' ) + WNTUN = LSAME( JOBU, 'N' ) + WNTVA = LSAME( JOBVT, 'A' ) + WNTVS = LSAME( JOBVT, 'S' ) + WNTVAS = WNTVA .OR. WNTVS + WNTVO = LSAME( JOBVT, 'O' ) + WNTVN = LSAME( JOBVT, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR. + $ ( WNTVO .AND. WNTUO ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN + INFO = -9 + ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR. + $ ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN + INFO = -11 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + IF( M.GE.N .AND. MINMN.GT.0 ) THEN +* +* Compute space needed for DBDSQR +* + MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 ) + BDSPAC = 5*N +* Compute space needed for DGEQRF + CALL DGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR ) + LWORK_DGEQRF=DUM(1) +* Compute space needed for DORGQR + CALL DORGQR( M, N, N, A, LDA, DUM(1), DUM(1), -1, IERR ) + LWORK_DORGQR_N=DUM(1) + CALL DORGQR( M, M, N, A, LDA, DUM(1), DUM(1), -1, IERR ) + LWORK_DORGQR_M=DUM(1) +* Compute space needed for DGEBRD + CALL DGEBRD( N, N, A, LDA, S, DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, IERR ) + LWORK_DGEBRD=DUM(1) +* Compute space needed for DORGBR P + CALL DORGBR( 'P', N, N, N, A, LDA, DUM(1), + $ DUM(1), -1, IERR ) + LWORK_DORGBR_P=DUM(1) +* Compute space needed for DORGBR Q + CALL DORGBR( 'Q', N, N, N, A, LDA, DUM(1), + $ DUM(1), -1, IERR ) + LWORK_DORGBR_Q=DUM(1) +* + IF( M.GE.MNTHR ) THEN + IF( WNTUN ) THEN +* +* Path 1 (M much larger than N, JOBU='N') +* + MAXWRK = N + LWORK_DGEQRF + MAXWRK = MAX( MAXWRK, 3*N+LWORK_DGEBRD ) + IF( WNTVO .OR. WNTVAS ) + $ MAXWRK = MAX( MAXWRK, 3*N+LWORK_DORGBR_P ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MINWRK = MAX( 4*N, BDSPAC ) + ELSE IF( WNTUO .AND. WNTVN ) THEN +* +* Path 2 (M much larger than N, JOBU='O', JOBVT='N') +* + WRKBL = N + LWORK_DGEQRF + WRKBL = MAX( WRKBL, N+LWORK_DORGQR_N ) + WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N ) + MINWRK = MAX( 3*N+M, BDSPAC ) + ELSE IF( WNTUO .AND. WNTVAS ) THEN +* +* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or +* 'A') +* + WRKBL = N + LWORK_DGEQRF + WRKBL = MAX( WRKBL, N+LWORK_DORGQR_N ) + WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N ) + MINWRK = MAX( 3*N+M, BDSPAC ) + ELSE IF( WNTUS .AND. WNTVN ) THEN +* +* Path 4 (M much larger than N, JOBU='S', JOBVT='N') +* + WRKBL = N + LWORK_DGEQRF + WRKBL = MAX( WRKBL, N+LWORK_DORGQR_N ) + WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + ELSE IF( WNTUS .AND. WNTVO ) THEN +* +* Path 5 (M much larger than N, JOBU='S', JOBVT='O') +* + WRKBL = N + LWORK_DGEQRF + WRKBL = MAX( WRKBL, N+LWORK_DORGQR_N ) + WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = 2*N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + ELSE IF( WNTUS .AND. WNTVAS ) THEN +* +* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or +* 'A') +* + WRKBL = N + LWORK_DGEQRF + WRKBL = MAX( WRKBL, N+LWORK_DORGQR_N ) + WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + ELSE IF( WNTUA .AND. WNTVN ) THEN +* +* Path 7 (M much larger than N, JOBU='A', JOBVT='N') +* + WRKBL = N + LWORK_DGEQRF + WRKBL = MAX( WRKBL, N+LWORK_DORGQR_M ) + WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + ELSE IF( WNTUA .AND. WNTVO ) THEN +* +* Path 8 (M much larger than N, JOBU='A', JOBVT='O') +* + WRKBL = N + LWORK_DGEQRF + WRKBL = MAX( WRKBL, N+LWORK_DORGQR_M ) + WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = 2*N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + ELSE IF( WNTUA .AND. WNTVAS ) THEN +* +* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or +* 'A') +* + WRKBL = N + LWORK_DGEQRF + WRKBL = MAX( WRKBL, N+LWORK_DORGQR_M ) + WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + END IF + ELSE +* +* Path 10 (M at least N, but not much larger) +* + CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, IERR ) + LWORK_DGEBRD=DUM(1) + MAXWRK = 3*N + LWORK_DGEBRD + IF( WNTUS .OR. WNTUO ) THEN + CALL DORGBR( 'Q', M, N, N, A, LDA, DUM(1), + $ DUM(1), -1, IERR ) + LWORK_DORGBR_Q=DUM(1) + MAXWRK = MAX( MAXWRK, 3*N+LWORK_DORGBR_Q ) + END IF + IF( WNTUA ) THEN + CALL DORGBR( 'Q', M, M, N, A, LDA, DUM(1), + $ DUM(1), -1, IERR ) + LWORK_DORGBR_Q=DUM(1) + MAXWRK = MAX( MAXWRK, 3*N+LWORK_DORGBR_Q ) + END IF + IF( .NOT.WNTVN ) THEN + MAXWRK = MAX( MAXWRK, 3*N+LWORK_DORGBR_P ) + END IF + MAXWRK = MAX( MAXWRK, BDSPAC ) + MINWRK = MAX( 3*N+M, BDSPAC ) + END IF + ELSE IF( MINMN.GT.0 ) THEN +* +* Compute space needed for DBDSQR +* + MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 ) + BDSPAC = 5*M +* Compute space needed for DGELQF + CALL DGELQF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR ) + LWORK_DGELQF=DUM(1) +* Compute space needed for DORGLQ + CALL DORGLQ( N, N, M, DUM(1), N, DUM(1), DUM(1), -1, IERR ) + LWORK_DORGLQ_N=DUM(1) + CALL DORGLQ( M, N, M, A, LDA, DUM(1), DUM(1), -1, IERR ) + LWORK_DORGLQ_M=DUM(1) +* Compute space needed for DGEBRD + CALL DGEBRD( M, M, A, LDA, S, DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, IERR ) + LWORK_DGEBRD=DUM(1) +* Compute space needed for DORGBR P + CALL DORGBR( 'P', M, M, M, A, N, DUM(1), + $ DUM(1), -1, IERR ) + LWORK_DORGBR_P=DUM(1) +* Compute space needed for DORGBR Q + CALL DORGBR( 'Q', M, M, M, A, N, DUM(1), + $ DUM(1), -1, IERR ) + LWORK_DORGBR_Q=DUM(1) + IF( N.GE.MNTHR ) THEN + IF( WNTVN ) THEN +* +* Path 1t(N much larger than M, JOBVT='N') +* + MAXWRK = M + LWORK_DGELQF + MAXWRK = MAX( MAXWRK, 3*M+LWORK_DGEBRD ) + IF( WNTUO .OR. WNTUAS ) + $ MAXWRK = MAX( MAXWRK, 3*M+LWORK_DORGBR_Q ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MINWRK = MAX( 4*M, BDSPAC ) + ELSE IF( WNTVO .AND. WNTUN ) THEN +* +* Path 2t(N much larger than M, JOBU='N', JOBVT='O') +* + WRKBL = M + LWORK_DGELQF + WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_M ) + WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M ) + MINWRK = MAX( 3*M+N, BDSPAC ) + ELSE IF( WNTVO .AND. WNTUAS ) THEN +* +* Path 3t(N much larger than M, JOBU='S' or 'A', +* JOBVT='O') +* + WRKBL = M + LWORK_DGELQF + WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_M ) + WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M ) + MINWRK = MAX( 3*M+N, BDSPAC ) + ELSE IF( WNTVS .AND. WNTUN ) THEN +* +* Path 4t(N much larger than M, JOBU='N', JOBVT='S') +* + WRKBL = M + LWORK_DGELQF + WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_M ) + WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + ELSE IF( WNTVS .AND. WNTUO ) THEN +* +* Path 5t(N much larger than M, JOBU='O', JOBVT='S') +* + WRKBL = M + LWORK_DGELQF + WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_M ) + WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = 2*M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + ELSE IF( WNTVS .AND. WNTUAS ) THEN +* +* Path 6t(N much larger than M, JOBU='S' or 'A', +* JOBVT='S') +* + WRKBL = M + LWORK_DGELQF + WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_M ) + WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + ELSE IF( WNTVA .AND. WNTUN ) THEN +* +* Path 7t(N much larger than M, JOBU='N', JOBVT='A') +* + WRKBL = M + LWORK_DGELQF + WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_N ) + WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + ELSE IF( WNTVA .AND. WNTUO ) THEN +* +* Path 8t(N much larger than M, JOBU='O', JOBVT='A') +* + WRKBL = M + LWORK_DGELQF + WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_N ) + WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = 2*M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + ELSE IF( WNTVA .AND. WNTUAS ) THEN +* +* Path 9t(N much larger than M, JOBU='S' or 'A', +* JOBVT='A') +* + WRKBL = M + LWORK_DGELQF + WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_N ) + WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + END IF + ELSE +* +* Path 10t(N greater than M, but not much larger) +* + CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, IERR ) + LWORK_DGEBRD=DUM(1) + MAXWRK = 3*M + LWORK_DGEBRD + IF( WNTVS .OR. WNTVO ) THEN +* Compute space needed for DORGBR P + CALL DORGBR( 'P', M, N, M, A, N, DUM(1), + $ DUM(1), -1, IERR ) + LWORK_DORGBR_P=DUM(1) + MAXWRK = MAX( MAXWRK, 3*M+LWORK_DORGBR_P ) + END IF + IF( WNTVA ) THEN + CALL DORGBR( 'P', N, N, M, A, N, DUM(1), + $ DUM(1), -1, IERR ) + LWORK_DORGBR_P=DUM(1) + MAXWRK = MAX( MAXWRK, 3*M+LWORK_DORGBR_P ) + END IF + IF( .NOT.WNTUN ) THEN + MAXWRK = MAX( MAXWRK, 3*M+LWORK_DORGBR_Q ) + END IF + MAXWRK = MAX( MAXWRK, BDSPAC ) + MINWRK = MAX( 3*M+N, BDSPAC ) + END IF + END IF + MAXWRK = MAX( MAXWRK, MINWRK ) + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGESVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', M, N, A, LDA, DUM ) + ISCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ISCL = 1 + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR ) + ELSE IF( ANRM.GT.BIGNUM ) THEN + ISCL = 1 + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR ) + END IF +* + IF( M.GE.N ) THEN +* +* A has at least as many rows as columns. If A has sufficiently +* more rows than columns, first reduce using the QR +* decomposition (if sufficient workspace available) +* + IF( M.GE.MNTHR ) THEN +* + IF( WNTUN ) THEN +* +* Path 1 (M much larger than N, JOBU='N') +* No left singular vectors to be computed +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Zero out below R +* + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA ) + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + NCVT = 0 + IF( WNTVO .OR. WNTVAS ) THEN +* +* If right singular vectors desired, generate P'. +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + NCVT = N + END IF + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in A if desired +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA, + $ DUM, 1, DUM, 1, WORK( IWORK ), INFO ) +* +* If right singular vectors desired in VT, copy them there +* + IF( WNTVAS ) + $ CALL DLACPY( 'F', N, N, A, LDA, VT, LDVT ) +* + ELSE IF( WNTUO .AND. WNTVN ) THEN +* +* Path 2 (M much larger than N, JOBU='O', JOBVT='N') +* N left singular vectors to be overwritten on A and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN +* +* WORK(IU) is LDA by N, WORK(IR) is LDA by N +* + LDWRKU = LDA + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN +* +* WORK(IU) is LDA by N, WORK(IR) is N by N +* + LDWRKU = LDA + LDWRKR = N + ELSE +* +* WORK(IU) is LDWRKU by N, WORK(IR) is N by N +* + LDWRKU = ( LWORK-N*N-N ) / N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IR) and zero out below it +* + CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ), + $ LDWRKR ) +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing R +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (Workspace: need N*N+BDSPAC) +* + CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1, + $ WORK( IR ), LDWRKR, DUM, 1, + $ WORK( IWORK ), INFO ) + IU = IE + N +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in WORK(IU) and copying to A +* (Workspace: need N*N+2*N, prefer N*N+M*N+N) +* + DO 10 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ), + $ LDA, WORK( IR ), LDWRKR, ZERO, + $ WORK( IU ), LDWRKU ) + CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 10 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize A +* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) +* + CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing A +* (Workspace: need 4*N, prefer 3*N+N*NB) +* + CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1, + $ A, LDA, DUM, 1, WORK( IWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUO .AND. WNTVAS ) THEN +* +* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') +* N left singular vectors to be overwritten on A and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + LDWRKR = N + ELSE +* +* WORK(IU) is LDWRKU by N and WORK(IR) is N by N +* + LDWRKU = ( LWORK-N*N-N ) / N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ VT( 2, 1 ), LDVT ) +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT, copying result to WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR ) +* +* Generate left vectors bidiagonalizing R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in VT +* (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) and computing right +* singular vectors of R in VT +* (Workspace: need N*N+BDSPAC) +* + CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT, + $ WORK( IR ), LDWRKR, DUM, 1, + $ WORK( IWORK ), INFO ) + IU = IE + N +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in WORK(IU) and copying to A +* (Workspace: need N*N+2*N, prefer N*N+M*N+N) +* + DO 20 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ), + $ LDA, WORK( IR ), LDWRKR, ZERO, + $ WORK( IU ), LDWRKU ) + CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 20 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ VT( 2, 1 ), LDVT ) +* +* Generate Q in A +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in A by left vectors bidiagonalizing R +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), A, LDA, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in VT +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT, + $ A, LDA, DUM, 1, WORK( IWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUS ) THEN +* + IF( WNTVN ) THEN +* +* Path 4 (M much larger than N, JOBU='S', JOBVT='N') +* N left singular vectors to be computed in U and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IR) is LDA by N +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is N by N +* + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (Workspace: need N*N+BDSPAC) +* + CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, + $ 1, WORK( IR ), LDWRKR, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in U +* (Workspace: need N*N) +* + CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA, + $ WORK( IR ), LDWRKR, ZERO, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), + $ LDA ) +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left vectors bidiagonalizing R +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, + $ 1, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVO ) THEN +* +* Path 5 (M much larger than N, JOBU='S', JOBVT='O') +* N left singular vectors to be computed in U and +* N right singular vectors to be overwritten on A +* + IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = N + ELSE +* +* WORK(IU) is N by N and WORK(IR) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IU+1 ), LDWRKU ) +* +* Generate Q in A +* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* + CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to +* WORK(IR) +* (Workspace: need 2*N*N+4*N, +* prefer 2*N*N+3*N+2*N*NB) +* + CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) +* + CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (Workspace: need 2*N*N+4*N-1, +* prefer 2*N*N+3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in WORK(IR) +* (Workspace: need 2*N*N+BDSPAC) +* + CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, WORK( IU ), + $ LDWRKU, DUM, 1, WORK( IWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in U +* (Workspace: need N*N) +* + CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA, + $ WORK( IU ), LDWRKU, ZERO, U, LDU ) +* +* Copy right singular vectors of R to A +* (Workspace: need N*N) +* + CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), + $ LDA ) +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left vectors bidiagonalizing R +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in A +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A, + $ LDA, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVAS ) THEN +* +* Path 6 (M much larger than N, JOBU='S', JOBVT='S' +* or 'A') +* N left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is N by N +* + LDWRKU = N + END IF + ITAU = IU + LDWRKU*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IU+1 ), LDWRKU ) +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to VT +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, + $ LDVT ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (Workspace: need N*N+4*N-1, +* prefer N*N+3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in VT +* (Workspace: need N*N+BDSPAC) +* + CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, + $ LDVT, WORK( IU ), LDWRKU, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in U +* (Workspace: need N*N) +* + CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA, + $ WORK( IU ), LDWRKU, ZERO, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ VT( 2, 1 ), LDVT ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in VT +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + END IF +* + ELSE IF( WNTUA ) THEN +* + IF( WNTVN ) THEN +* +* Path 7 (M much larger than N, JOBU='A', JOBVT='N') +* M left singular vectors to be computed in U and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IR) is LDA by N +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is N by N +* + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in U +* (Workspace: need N*N+N+M, prefer N*N+N+M*NB) +* + CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (Workspace: need N*N+BDSPAC) +* + CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, + $ 1, WORK( IR ), LDWRKR, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IR), storing result in A +* (Workspace: need N*N) +* + CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU, + $ WORK( IR ), LDWRKR, ZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL DLACPY( 'F', M, N, A, LDA, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N+M, prefer N+M*NB) +* + CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), + $ LDA ) +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in A +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, + $ 1, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVO ) THEN +* +* Path 8 (M much larger than N, JOBU='A', JOBVT='O') +* M left singular vectors to be computed in U and +* N right singular vectors to be overwritten on A +* + IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = N + ELSE +* +* WORK(IU) is N by N and WORK(IR) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB) +* + CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IU+1 ), LDWRKU ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to +* WORK(IR) +* (Workspace: need 2*N*N+4*N, +* prefer 2*N*N+3*N+2*N*NB) +* + CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) +* + CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (Workspace: need 2*N*N+4*N-1, +* prefer 2*N*N+3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in WORK(IR) +* (Workspace: need 2*N*N+BDSPAC) +* + CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, WORK( IU ), + $ LDWRKU, DUM, 1, WORK( IWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (Workspace: need N*N) +* + CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU, + $ WORK( IU ), LDWRKU, ZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL DLACPY( 'F', M, N, A, LDA, U, LDU ) +* +* Copy right singular vectors of R from WORK(IR) to A +* + CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N+M, prefer N+M*NB) +* + CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), + $ LDA ) +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in A +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in A +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A, + $ LDA, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVAS ) THEN +* +* Path 9 (M much larger than N, JOBU='A', JOBVT='S' +* or 'A') +* M left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is N by N +* + LDWRKU = N + END IF + ITAU = IU + LDWRKU*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N*N+N+M, prefer N*N+N+M*NB) +* + CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IU+1 ), LDWRKU ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to VT +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, + $ LDVT ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (Workspace: need N*N+4*N-1, +* prefer N*N+3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in VT +* (Workspace: need N*N+BDSPAC) +* + CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, + $ LDVT, WORK( IU ), LDWRKU, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (Workspace: need N*N) +* + CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU, + $ WORK( IU ), LDWRKU, ZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL DLACPY( 'F', M, N, A, LDA, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N+M, prefer N+M*NB) +* + CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R from A to VT, zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ VT( 2, 1 ), LDVT ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in VT +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* M .LT. MNTHR +* +* Path 10 (M at least N, but not much larger) +* Reduce to bidiagonal form without QR decomposition +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize A +* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) +* + CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUAS ) THEN +* +* If left singular vectors desired in U, copy result to U +* and generate left bidiagonalizing vectors in U +* (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB) +* + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) + IF( WNTUS ) + $ NCU = N + IF( WNTUA ) + $ NCU = M + CALL DORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVAS ) THEN +* +* If right singular vectors desired in VT, copy result to +* VT and generate right bidiagonalizing vectors in VT +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTUO ) THEN +* +* If left singular vectors desired in A, generate left +* bidiagonalizing vectors in A +* (Workspace: need 4*N, prefer 3*N+N*NB) +* + CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVO ) THEN +* +* If right singular vectors desired in A, generate right +* bidiagonalizing vectors in A +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IWORK = IE + N + IF( WNTUAS .OR. WNTUO ) + $ NRU = M + IF( WNTUN ) + $ NRU = 0 + IF( WNTVAS .OR. WNTVO ) + $ NCVT = N + IF( WNTVN ) + $ NCVT = 0 + IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO ) + ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in A +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA, + $ U, LDU, DUM, 1, WORK( IWORK ), INFO ) + ELSE +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in A and computing right singular +* vectors in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT, + $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO ) + END IF +* + END IF +* + ELSE +* +* A has more columns than rows. If A has sufficiently more +* columns than rows, first reduce using the LQ decomposition (if +* sufficient workspace available) +* + IF( N.GE.MNTHR ) THEN +* + IF( WNTVN ) THEN +* +* Path 1t(N much larger than M, JOBVT='N') +* No right singular vectors to be computed +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Zero out above L +* + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA ) + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUO .OR. WNTUAS ) THEN +* +* If left singular vectors desired, generate Q +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IWORK = IE + M + NRU = 0 + IF( WNTUO .OR. WNTUAS ) + $ NRU = M +* +* Perform bidiagonal QR iteration, computing left singular +* vectors of A in A if desired +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A, + $ LDA, DUM, 1, WORK( IWORK ), INFO ) +* +* If left singular vectors desired in U, copy them there +* + IF( WNTUAS ) + $ CALL DLACPY( 'F', M, M, A, LDA, U, LDU ) +* + ELSE IF( WNTVO .AND. WNTUN ) THEN +* +* Path 2t(N much larger than M, JOBU='N', JOBVT='O') +* M right singular vectors to be overwritten on A and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is M by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = M + ELSE +* +* WORK(IU) is M by CHUNK and WORK(IR) is M by M +* + LDWRKU = M + CHUNK = ( LWORK-M*M-M ) / M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IR) and zero out above it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing L +* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (Workspace: need M*M+BDSPAC) +* + CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1, + $ WORK( IWORK ), INFO ) + IU = IE + M +* +* Multiply right singular vectors of L in WORK(IR) by Q +* in A, storing result in WORK(IU) and copying to A +* (Workspace: need M*M+2*M, prefer M*M+M*N+M) +* + DO 30 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ), + $ LDWRKR, A( 1, I ), LDA, ZERO, + $ WORK( IU ), LDWRKU ) + CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, + $ A( 1, I ), LDA ) + 30 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +* + CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing A +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA, + $ DUM, 1, DUM, 1, WORK( IWORK ), INFO ) +* + END IF +* + ELSE IF( WNTVO .AND. WNTUAS ) THEN +* +* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') +* M right singular vectors to be overwritten on A and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is M by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = M + ELSE +* +* WORK(IU) is M by CHUNK and WORK(IR) is M by M +* + LDWRKU = M + CHUNK = ( LWORK-M*M-M ) / M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing about above it +* + CALL DLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), + $ LDU ) +* +* Generate Q in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U, copying result to WORK(IR) +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL DGEBRD( M, M, U, LDU, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR ) +* +* Generate right vectors bidiagonalizing L in WORK(IR) +* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing L in U +* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) +* + CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U, and computing right +* singular vectors of L in WORK(IR) +* (Workspace: need M*M+BDSPAC) +* + CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, U, LDU, DUM, 1, + $ WORK( IWORK ), INFO ) + IU = IE + M +* +* Multiply right singular vectors of L in WORK(IR) by Q +* in A, storing result in WORK(IU) and copying to A +* (Workspace: need M*M+2*M, prefer M*M+M*N+M)) +* + DO 40 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ), + $ LDWRKR, A( 1, I ), LDA, ZERO, + $ WORK( IU ), LDWRKU ) + CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, + $ A( 1, I ), LDA ) + 40 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), + $ LDU ) +* +* Generate Q in A +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL DGEBRD( M, M, U, LDU, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in A +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU, + $ WORK( ITAUP ), A, LDA, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing L in U +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA, + $ U, LDU, DUM, 1, WORK( IWORK ), INFO ) +* + END IF +* + ELSE IF( WNTVS ) THEN +* + IF( WNTUN ) THEN +* +* Path 4t(N much larger than M, JOBU='N', JOBVT='S') +* M right singular vectors to be computed in VT and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IR) is LDA by M +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is M by M +* + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IR), zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing L in +* WORK(IR) +* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (Workspace: need M*M+BDSPAC) +* + CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IR) by +* Q in A, storing result in VT +* (Workspace: need M*M) +* + CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ), + $ LDWRKR, A, LDA, ZERO, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy result to VT +* + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), + $ LDA ) +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT, + $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUO ) THEN +* +* Path 5t(N much larger than M, JOBU='O', JOBVT='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be overwritten on A +* + IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is LDA by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is M by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = M + ELSE +* +* WORK(IU) is M by M and WORK(IR) is M by M +* + LDWRKU = M + IR = IU + LDWRKU*M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out below it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) +* +* Generate Q in A +* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* + CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to +* WORK(IR) +* (Workspace: need 2*M*M+4*M, +* prefer 2*M*M+3*M+2*M*NB) +* + CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (Workspace: need 2*M*M+4*M-1, +* prefer 2*M*M+3*M+(M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) +* + CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in WORK(IR) and computing +* right singular vectors of L in WORK(IU) +* (Workspace: need 2*M*M+BDSPAC) +* + CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IU ), LDWRKU, WORK( IR ), + $ LDWRKR, DUM, 1, WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in A, storing result in VT +* (Workspace: need M*M) +* + CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), + $ LDWRKU, A, LDA, ZERO, VT, LDVT ) +* +* Copy left singular vectors of L to A +* (Workspace: need M*M) +* + CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), + $ LDA ) +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors of L in A +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, compute left +* singular vectors of A in A and compute right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, + $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUAS ) THEN +* +* Path 6t(N much larger than M, JOBU='S' or 'A', +* JOBVT='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is LDA by M +* + LDWRKU = M + END IF + ITAU = IU + LDWRKU*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) +* +* Generate Q in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to U +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, + $ LDU ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (Workspace: need M*M+4*M-1, +* prefer M*M+3*M+(M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) +* + CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U and computing right +* singular vectors of L in WORK(IU) +* (Workspace: need M*M+BDSPAC) +* + CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IU ), LDWRKU, U, LDU, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in A, storing result in VT +* (Workspace: need M*M) +* + CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), + $ LDWRKU, A, LDA, ZERO, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), + $ LDU ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL DGEBRD( M, M, U, LDU, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in U by Q +* in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + END IF +* + ELSE IF( WNTVA ) THEN +* + IF( WNTUN ) THEN +* +* Path 7t(N much larger than M, JOBU='N', JOBVT='A') +* N right singular vectors to be computed in VT and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IR) is LDA by M +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is M by M +* + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Copy L to WORK(IR), zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in VT +* (Workspace: need M*M+M+N, prefer M*M+M+N*NB) +* + CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (Workspace: need M*M+4*M-1, +* prefer M*M+3*M+(M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (Workspace: need M*M+BDSPAC) +* + CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IR) by +* Q in VT, storing result in A +* (Workspace: need M*M) +* + CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ), + $ LDWRKR, VT, LDVT, ZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M+N, prefer M+N*NB) +* + CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), + $ LDA ) +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in A by Q +* in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT, + $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUO ) THEN +* +* Path 8t(N much larger than M, JOBU='O', JOBVT='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be overwritten on A +* + IF( LWORK.GE.2*M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is LDA by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is M by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = M + ELSE +* +* WORK(IU) is M by M and WORK(IR) is M by M +* + LDWRKU = M + IR = IU + LDWRKU*M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB) +* + CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to +* WORK(IR) +* (Workspace: need 2*M*M+4*M, +* prefer 2*M*M+3*M+2*M*NB) +* + CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (Workspace: need 2*M*M+4*M-1, +* prefer 2*M*M+3*M+(M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) +* + CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in WORK(IR) and computing +* right singular vectors of L in WORK(IU) +* (Workspace: need 2*M*M+BDSPAC) +* + CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IU ), LDWRKU, WORK( IR ), + $ LDWRKR, DUM, 1, WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in VT, storing result in A +* (Workspace: need M*M) +* + CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), + $ LDWRKU, VT, LDVT, ZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* +* Copy left singular vectors of A from WORK(IR) to A +* + CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M+N, prefer M+N*NB) +* + CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), + $ LDA ) +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in A by Q +* in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in A +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, + $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUAS ) THEN +* +* Path 9t(N much larger than M, JOBU='S' or 'A', +* JOBVT='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IU) is LDA by M +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is M by M +* + LDWRKU = M + END IF + ITAU = IU + LDWRKU*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M*M+M+N, prefer M*M+M+N*NB) +* + CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to U +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, + $ LDU ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) +* + CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U and computing right +* singular vectors of L in WORK(IU) +* (Workspace: need M*M+BDSPAC) +* + CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IU ), LDWRKU, U, LDU, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in VT, storing result in A +* (Workspace: need M*M) +* + CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), + $ LDWRKU, VT, LDVT, ZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M+N, prefer M+N*NB) +* + CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), + $ LDU ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL DGEBRD( M, M, U, LDU, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in U by Q +* in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* N .LT. MNTHR +* +* Path 10t(N greater than M, but not much larger) +* Reduce to bidiagonal form without LQ decomposition +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +* + CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUAS ) THEN +* +* If left singular vectors desired in U, copy result to U +* and generate left bidiagonalizing vectors in U +* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) +* + CALL DLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL DORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVAS ) THEN +* +* If right singular vectors desired in VT, copy result to +* VT and generate right bidiagonalizing vectors in VT +* (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB) +* + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) + IF( WNTVA ) + $ NRVT = N + IF( WNTVS ) + $ NRVT = M + CALL DORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTUO ) THEN +* +* If left singular vectors desired in A, generate left +* bidiagonalizing vectors in A +* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) +* + CALL DORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVO ) THEN +* +* If right singular vectors desired in A, generate right +* bidiagonalizing vectors in A +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IWORK = IE + M + IF( WNTUAS .OR. WNTUO ) + $ NRU = M + IF( WNTUN ) + $ NRU = 0 + IF( WNTVAS .OR. WNTVO ) + $ NCVT = N + IF( WNTVN ) + $ NCVT = 0 + IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO ) + ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in A +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA, + $ U, LDU, DUM, 1, WORK( IWORK ), INFO ) + ELSE +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in A and computing right singular +* vectors in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT, + $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO ) + END IF +* + END IF +* + END IF +* +* If DBDSQR failed to converge, copy unconverged superdiagonals +* to WORK( 2:MINMN ) +* + IF( INFO.NE.0 ) THEN + IF( IE.GT.2 ) THEN + DO 50 I = 1, MINMN - 1 + WORK( I+1 ) = WORK( I+IE-1 ) + 50 CONTINUE + END IF + IF( IE.LT.2 ) THEN + DO 60 I = MINMN - 1, 1, -1 + WORK( I+1 ) = WORK( I+IE-1 ) + 60 CONTINUE + END IF + END IF +* +* Undo scaling if necessary +* + IF( ISCL.EQ.1 ) THEN + IF( ANRM.GT.BIGNUM ) + $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM ) + $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ), + $ MINMN, IERR ) + IF( ANRM.LT.SMLNUM ) + $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM ) + $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ), + $ MINMN, IERR ) + END IF +* +* Return optimal workspace in WORK(1) +* + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of DGESVD +* + END diff --git a/lib/linalg/dlabrd.f b/lib/linalg/dlabrd.f new file mode 100644 index 0000000000..72d148119a --- /dev/null +++ b/lib/linalg/dlabrd.f @@ -0,0 +1,381 @@ +*> \brief \b DLABRD reduces the first nb rows and columns of a general matrix to a bidiagonal form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLABRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y, +* LDY ) +* +* .. Scalar Arguments .. +* INTEGER LDA, LDX, LDY, M, N, NB +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ), +* $ TAUQ( * ), X( LDX, * ), Y( LDY, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLABRD reduces the first NB rows and columns of a real general +*> m by n matrix A to upper or lower bidiagonal form by an orthogonal +*> transformation Q**T * A * P, and returns the matrices X and Y which +*> are needed to apply the transformation to the unreduced part of A. +*> +*> If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower +*> bidiagonal form. +*> +*> This is an auxiliary routine called by DGEBRD +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows in the matrix A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns in the matrix A. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The number of leading rows and columns of A to be reduced. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the m by n general matrix to be reduced. +*> On exit, the first NB rows and columns of the matrix are +*> overwritten; the rest of the array is unchanged. +*> If m >= n, elements on and below the diagonal in the first NB +*> columns, with the array TAUQ, represent the orthogonal +*> matrix Q as a product of elementary reflectors; and +*> elements above the diagonal in the first NB rows, with the +*> array TAUP, represent the orthogonal matrix P as a product +*> of elementary reflectors. +*> If m < n, elements below the diagonal in the first NB +*> columns, with the array TAUQ, represent the orthogonal +*> matrix Q as a product of elementary reflectors, and +*> elements on and above the diagonal in the first NB rows, +*> with the array TAUP, represent the orthogonal matrix P as +*> a product of elementary reflectors. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (NB) +*> The diagonal elements of the first NB rows and columns of +*> the reduced matrix. D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (NB) +*> The off-diagonal elements of the first NB rows and columns of +*> the reduced matrix. +*> \endverbatim +*> +*> \param[out] TAUQ +*> \verbatim +*> TAUQ is DOUBLE PRECISION array dimension (NB) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix Q. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP +*> \verbatim +*> TAUP is DOUBLE PRECISION array, dimension (NB) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix P. See Further Details. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NB) +*> The m-by-nb matrix X required to update the unreduced part +*> of A. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,M). +*> \endverbatim +*> +*> \param[out] Y +*> \verbatim +*> Y is DOUBLE PRECISION array, dimension (LDY,NB) +*> The n-by-nb matrix Y required to update the unreduced part +*> of A. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrices Q and P are represented as products of elementary +*> reflectors: +*> +*> Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) +*> +*> Each H(i) and G(i) has the form: +*> +*> H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T +*> +*> where tauq and taup are real scalars, and v and u are real vectors. +*> +*> If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in +*> A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in +*> A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in +*> A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in +*> A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> The elements of the vectors v and u together form the m-by-nb matrix +*> V and the nb-by-n matrix U**T which are needed, with X and Y, to apply +*> the transformation to the unreduced part of the matrix, using a block +*> update of the form: A := A - V*Y**T - X*U**T. +*> +*> The contents of A on exit are illustrated by the following examples +*> with nb = 2: +*> +*> m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +*> +*> ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) +*> ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) +*> ( v1 v2 a a a ) ( v1 1 a a a a ) +*> ( v1 v2 a a a ) ( v1 v2 a a a a ) +*> ( v1 v2 a a a ) ( v1 v2 a a a a ) +*> ( v1 v2 a a a ) +*> +*> where a denotes an element of the original matrix which is unchanged, +*> vi denotes an element of the vector defining H(i), and ui an element +*> of the vector defining G(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y, + $ LDY ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER LDA, LDX, LDY, M, N, NB +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ), + $ TAUQ( * ), X( LDX, * ), Y( LDY, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DLARFG, DSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( M.GE.N ) THEN +* +* Reduce to upper bidiagonal form +* + DO 10 I = 1, NB +* +* Update A(i:m,i) +* + CALL DGEMV( 'No transpose', M-I+1, I-1, -ONE, A( I, 1 ), + $ LDA, Y( I, 1 ), LDY, ONE, A( I, I ), 1 ) + CALL DGEMV( 'No transpose', M-I+1, I-1, -ONE, X( I, 1 ), + $ LDX, A( 1, I ), 1, ONE, A( I, I ), 1 ) +* +* Generate reflection Q(i) to annihilate A(i+1:m,i) +* + CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAUQ( I ) ) + D( I ) = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = ONE +* +* Compute Y(i+1:n,i) +* + CALL DGEMV( 'Transpose', M-I+1, N-I, ONE, A( I, I+1 ), + $ LDA, A( I, I ), 1, ZERO, Y( I+1, I ), 1 ) + CALL DGEMV( 'Transpose', M-I+1, I-1, ONE, A( I, 1 ), LDA, + $ A( I, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL DGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ), + $ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL DGEMV( 'Transpose', M-I+1, I-1, ONE, X( I, 1 ), LDX, + $ A( I, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL DGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ), + $ LDA, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL DSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 ) +* +* Update A(i,i+1:n) +* + CALL DGEMV( 'No transpose', N-I, I, -ONE, Y( I+1, 1 ), + $ LDY, A( I, 1 ), LDA, ONE, A( I, I+1 ), LDA ) + CALL DGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ), + $ LDA, X( I, 1 ), LDX, ONE, A( I, I+1 ), LDA ) +* +* Generate reflection P(i) to annihilate A(i,i+2:n) +* + CALL DLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ), + $ LDA, TAUP( I ) ) + E( I ) = A( I, I+1 ) + A( I, I+1 ) = ONE +* +* Compute X(i+1:m,i) +* + CALL DGEMV( 'No transpose', M-I, N-I, ONE, A( I+1, I+1 ), + $ LDA, A( I, I+1 ), LDA, ZERO, X( I+1, I ), 1 ) + CALL DGEMV( 'Transpose', N-I, I, ONE, Y( I+1, 1 ), LDY, + $ A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 ) + CALL DGEMV( 'No transpose', M-I, I, -ONE, A( I+1, 1 ), + $ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL DGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 ) + CALL DGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ), + $ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL DSCAL( M-I, TAUP( I ), X( I+1, I ), 1 ) + END IF + 10 CONTINUE + ELSE +* +* Reduce to lower bidiagonal form +* + DO 20 I = 1, NB +* +* Update A(i,i:n) +* + CALL DGEMV( 'No transpose', N-I+1, I-1, -ONE, Y( I, 1 ), + $ LDY, A( I, 1 ), LDA, ONE, A( I, I ), LDA ) + CALL DGEMV( 'Transpose', I-1, N-I+1, -ONE, A( 1, I ), LDA, + $ X( I, 1 ), LDX, ONE, A( I, I ), LDA ) +* +* Generate reflection P(i) to annihilate A(i,i+1:n) +* + CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA, + $ TAUP( I ) ) + D( I ) = A( I, I ) + IF( I.LT.M ) THEN + A( I, I ) = ONE +* +* Compute X(i+1:m,i) +* + CALL DGEMV( 'No transpose', M-I, N-I+1, ONE, A( I+1, I ), + $ LDA, A( I, I ), LDA, ZERO, X( I+1, I ), 1 ) + CALL DGEMV( 'Transpose', N-I+1, I-1, ONE, Y( I, 1 ), LDY, + $ A( I, I ), LDA, ZERO, X( 1, I ), 1 ) + CALL DGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL DGEMV( 'No transpose', I-1, N-I+1, ONE, A( 1, I ), + $ LDA, A( I, I ), LDA, ZERO, X( 1, I ), 1 ) + CALL DGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ), + $ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL DSCAL( M-I, TAUP( I ), X( I+1, I ), 1 ) +* +* Update A(i+1:m,i) +* + CALL DGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, Y( I, 1 ), LDY, ONE, A( I+1, I ), 1 ) + CALL DGEMV( 'No transpose', M-I, I, -ONE, X( I+1, 1 ), + $ LDX, A( 1, I ), 1, ONE, A( I+1, I ), 1 ) +* +* Generate reflection Q(i) to annihilate A(i+2:m,i) +* + CALL DLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1, + $ TAUQ( I ) ) + E( I ) = A( I+1, I ) + A( I+1, I ) = ONE +* +* Compute Y(i+1:n,i) +* + CALL DGEMV( 'Transpose', M-I, N-I, ONE, A( I+1, I+1 ), + $ LDA, A( I+1, I ), 1, ZERO, Y( I+1, I ), 1 ) + CALL DGEMV( 'Transpose', M-I, I-1, ONE, A( I+1, 1 ), LDA, + $ A( I+1, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL DGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ), + $ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL DGEMV( 'Transpose', M-I, I, ONE, X( I+1, 1 ), LDX, + $ A( I+1, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL DGEMV( 'Transpose', I, N-I, -ONE, A( 1, I+1 ), LDA, + $ Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL DSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 ) + END IF + 20 CONTINUE + END IF + RETURN +* +* End of DLABRD +* + END diff --git a/lib/linalg/dlacpy.f b/lib/linalg/dlacpy.f new file mode 100644 index 0000000000..a9a23c9454 --- /dev/null +++ b/lib/linalg/dlacpy.f @@ -0,0 +1,156 @@ +*> \brief \b DLACPY copies all or part of one two-dimensional array to another. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLACPY + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLACPY( UPLO, M, N, A, LDA, B, LDB ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER LDA, LDB, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLACPY copies all or part of a two-dimensional matrix A to another +*> matrix B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies the part of the matrix A to be copied to B. +*> = 'U': Upper triangular part +*> = 'L': Lower triangular part +*> Otherwise: All of the matrix A +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The m by n matrix A. If UPLO = 'U', only the upper triangle +*> or trapezoid is accessed; if UPLO = 'L', only the lower +*> triangle or trapezoid is accessed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> On exit, B = A in the locations specified by UPLO. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLACPY( UPLO, M, N, A, LDA, B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDB, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, MIN( J, M ) + B( I, J ) = A( I, J ) + 10 CONTINUE + 20 CONTINUE + ELSE IF( LSAME( UPLO, 'L' ) ) THEN + DO 40 J = 1, N + DO 30 I = J, M + B( I, J ) = A( I, J ) + 30 CONTINUE + 40 CONTINUE + ELSE + DO 60 J = 1, N + DO 50 I = 1, M + B( I, J ) = A( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF + RETURN +* +* End of DLACPY +* + END diff --git a/lib/linalg/dlae2.f b/lib/linalg/dlae2.f new file mode 100644 index 0000000000..302eeaa1f7 --- /dev/null +++ b/lib/linalg/dlae2.f @@ -0,0 +1,185 @@ +*> \brief \b DLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAE2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAE2( A, B, C, RT1, RT2 ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION A, B, C, RT1, RT2 +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix +*> [ A B ] +*> [ B C ]. +*> On return, RT1 is the eigenvalue of larger absolute value, and RT2 +*> is the eigenvalue of smaller absolute value. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION +*> The (1,1) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION +*> The (1,2) and (2,1) elements of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION +*> The (2,2) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[out] RT1 +*> \verbatim +*> RT1 is DOUBLE PRECISION +*> The eigenvalue of larger absolute value. +*> \endverbatim +*> +*> \param[out] RT2 +*> \verbatim +*> RT2 is DOUBLE PRECISION +*> The eigenvalue of smaller absolute value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> RT1 is accurate to a few ulps barring over/underflow. +*> +*> RT2 may be inaccurate if there is massive cancellation in the +*> determinant A*C-B*B; higher precision or correctly rounded or +*> correctly truncated arithmetic would be needed to compute RT2 +*> accurately in all cases. +*> +*> Overflow is possible only if RT1 is within a factor of 5 of overflow. +*> Underflow is harmless if the input data is 0 or exceeds +*> underflow_threshold / macheps. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLAE2( A, B, C, RT1, RT2 ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + DOUBLE PRECISION A, B, C, RT1, RT2 +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D0 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION HALF + PARAMETER ( HALF = 0.5D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION AB, ACMN, ACMX, ADF, DF, RT, SM, TB +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* +* Compute the eigenvalues +* + SM = A + C + DF = A - C + ADF = ABS( DF ) + TB = B + B + AB = ABS( TB ) + IF( ABS( A ).GT.ABS( C ) ) THEN + ACMX = A + ACMN = C + ELSE + ACMX = C + ACMN = A + END IF + IF( ADF.GT.AB ) THEN + RT = ADF*SQRT( ONE+( AB / ADF )**2 ) + ELSE IF( ADF.LT.AB ) THEN + RT = AB*SQRT( ONE+( ADF / AB )**2 ) + ELSE +* +* Includes case AB=ADF=0 +* + RT = AB*SQRT( TWO ) + END IF + IF( SM.LT.ZERO ) THEN + RT1 = HALF*( SM-RT ) +* +* Order of execution important. +* To get fully accurate smaller eigenvalue, +* next line needs to be executed in higher precision. +* + RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B + ELSE IF( SM.GT.ZERO ) THEN + RT1 = HALF*( SM+RT ) +* +* Order of execution important. +* To get fully accurate smaller eigenvalue, +* next line needs to be executed in higher precision. +* + RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B + ELSE +* +* Includes case RT1 = RT2 = 0 +* + RT1 = HALF*RT + RT2 = -HALF*RT + END IF + RETURN +* +* End of DLAE2 +* + END diff --git a/lib/linalg/dlaed0.f b/lib/linalg/dlaed0.f new file mode 100644 index 0000000000..d8d7f53e1d --- /dev/null +++ b/lib/linalg/dlaed0.f @@ -0,0 +1,434 @@ +*> \brief \b DLAED0 used by sstedc. Computes all eigenvalues and corresponding eigenvectors of an unreduced symmetric tridiagonal matrix using the divide and conquer method. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAED0 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAED0( ICOMPQ, QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER ICOMPQ, INFO, LDQ, LDQS, N, QSIZ +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION D( * ), E( * ), Q( LDQ, * ), QSTORE( LDQS, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAED0 computes all eigenvalues and corresponding eigenvectors of a +*> symmetric tridiagonal matrix using the divide and conquer method. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ICOMPQ +*> \verbatim +*> ICOMPQ is INTEGER +*> = 0: Compute eigenvalues only. +*> = 1: Compute eigenvectors of original dense symmetric matrix +*> also. On entry, Q contains the orthogonal matrix used +*> to reduce the original matrix to tridiagonal form. +*> = 2: Compute eigenvalues and eigenvectors of tridiagonal +*> matrix. +*> \endverbatim +*> +*> \param[in] QSIZ +*> \verbatim +*> QSIZ is INTEGER +*> The dimension of the orthogonal matrix used to reduce +*> the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the main diagonal of the tridiagonal matrix. +*> On exit, its eigenvalues. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ, N) +*> On entry, Q must contain an N-by-N orthogonal matrix. +*> If ICOMPQ = 0 Q is not referenced. +*> If ICOMPQ = 1 On entry, Q is a subset of the columns of the +*> orthogonal matrix used to reduce the full +*> matrix to tridiagonal form corresponding to +*> the subset of the full matrix which is being +*> decomposed at this time. +*> If ICOMPQ = 2 On entry, Q will be the identity matrix. +*> On exit, Q contains the eigenvectors of the +*> tridiagonal matrix. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. If eigenvectors are +*> desired, then LDQ >= max(1,N). In any case, LDQ >= 1. +*> \endverbatim +*> +*> \param[out] QSTORE +*> \verbatim +*> QSTORE is DOUBLE PRECISION array, dimension (LDQS, N) +*> Referenced only when ICOMPQ = 1. Used to store parts of +*> the eigenvector matrix when the updating matrix multiplies +*> take place. +*> \endverbatim +*> +*> \param[in] LDQS +*> \verbatim +*> LDQS is INTEGER +*> The leading dimension of the array QSTORE. If ICOMPQ = 1, +*> then LDQS >= max(1,N). In any case, LDQS >= 1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, +*> If ICOMPQ = 0 or 1, the dimension of WORK must be at least +*> 1 + 3*N + 2*N*lg N + 3*N**2 +*> ( lg( N ) = smallest integer k +*> such that 2^k >= N ) +*> If ICOMPQ = 2, the dimension of WORK must be at least +*> 4*N + N**2. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, +*> If ICOMPQ = 0 or 1, the dimension of IWORK must be at least +*> 6 + 6*N + 5*N*lg N. +*> ( lg( N ) = smallest integer k +*> such that 2^k >= N ) +*> If ICOMPQ = 2, the dimension of IWORK must be at least +*> 3 + 5*N. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: The algorithm failed to compute an eigenvalue while +*> working on the submatrix lying in rows and columns +*> INFO/(N+1) through mod(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA +* +* ===================================================================== + SUBROUTINE DLAED0( ICOMPQ, QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS, + $ WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, LDQ, LDQS, N, QSIZ +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), Q( LDQ, * ), QSTORE( LDQS, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.D0, ONE = 1.D0, TWO = 2.D0 ) +* .. +* .. Local Scalars .. + INTEGER CURLVL, CURPRB, CURR, I, IGIVCL, IGIVNM, + $ IGIVPT, INDXQ, IPERM, IPRMPT, IQ, IQPTR, IWREM, + $ J, K, LGN, MATSIZ, MSD2, SMLSIZ, SMM1, SPM1, + $ SPM2, SUBMAT, SUBPBS, TLVLS + DOUBLE PRECISION TEMP +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMM, DLACPY, DLAED1, DLAED7, DSTEQR, + $ XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, INT, LOG, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.2 ) THEN + INFO = -1 + ELSE IF( ( ICOMPQ.EQ.1 ) .AND. ( QSIZ.LT.MAX( 0, N ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDQS.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAED0', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + SMLSIZ = ILAENV( 9, 'DLAED0', ' ', 0, 0, 0, 0 ) +* +* Determine the size and placement of the submatrices, and save in +* the leading elements of IWORK. +* + IWORK( 1 ) = N + SUBPBS = 1 + TLVLS = 0 + 10 CONTINUE + IF( IWORK( SUBPBS ).GT.SMLSIZ ) THEN + DO 20 J = SUBPBS, 1, -1 + IWORK( 2*J ) = ( IWORK( J )+1 ) / 2 + IWORK( 2*J-1 ) = IWORK( J ) / 2 + 20 CONTINUE + TLVLS = TLVLS + 1 + SUBPBS = 2*SUBPBS + GO TO 10 + END IF + DO 30 J = 2, SUBPBS + IWORK( J ) = IWORK( J ) + IWORK( J-1 ) + 30 CONTINUE +* +* Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 +* using rank-1 modifications (cuts). +* + SPM1 = SUBPBS - 1 + DO 40 I = 1, SPM1 + SUBMAT = IWORK( I ) + 1 + SMM1 = SUBMAT - 1 + D( SMM1 ) = D( SMM1 ) - ABS( E( SMM1 ) ) + D( SUBMAT ) = D( SUBMAT ) - ABS( E( SMM1 ) ) + 40 CONTINUE +* + INDXQ = 4*N + 3 + IF( ICOMPQ.NE.2 ) THEN +* +* Set up workspaces for eigenvalues only/accumulate new vectors +* routine +* + TEMP = LOG( DBLE( N ) ) / LOG( TWO ) + LGN = INT( TEMP ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IPRMPT = INDXQ + N + 1 + IPERM = IPRMPT + N*LGN + IQPTR = IPERM + N*LGN + IGIVPT = IQPTR + N + 2 + IGIVCL = IGIVPT + N*LGN +* + IGIVNM = 1 + IQ = IGIVNM + 2*N*LGN + IWREM = IQ + N**2 + 1 +* +* Initialize pointers +* + DO 50 I = 0, SUBPBS + IWORK( IPRMPT+I ) = 1 + IWORK( IGIVPT+I ) = 1 + 50 CONTINUE + IWORK( IQPTR ) = 1 + END IF +* +* Solve each submatrix eigenproblem at the bottom of the divide and +* conquer tree. +* + CURR = 0 + DO 70 I = 0, SPM1 + IF( I.EQ.0 ) THEN + SUBMAT = 1 + MATSIZ = IWORK( 1 ) + ELSE + SUBMAT = IWORK( I ) + 1 + MATSIZ = IWORK( I+1 ) - IWORK( I ) + END IF + IF( ICOMPQ.EQ.2 ) THEN + CALL DSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ), + $ Q( SUBMAT, SUBMAT ), LDQ, WORK, INFO ) + IF( INFO.NE.0 ) + $ GO TO 130 + ELSE + CALL DSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ), + $ WORK( IQ-1+IWORK( IQPTR+CURR ) ), MATSIZ, WORK, + $ INFO ) + IF( INFO.NE.0 ) + $ GO TO 130 + IF( ICOMPQ.EQ.1 ) THEN + CALL DGEMM( 'N', 'N', QSIZ, MATSIZ, MATSIZ, ONE, + $ Q( 1, SUBMAT ), LDQ, WORK( IQ-1+IWORK( IQPTR+ + $ CURR ) ), MATSIZ, ZERO, QSTORE( 1, SUBMAT ), + $ LDQS ) + END IF + IWORK( IQPTR+CURR+1 ) = IWORK( IQPTR+CURR ) + MATSIZ**2 + CURR = CURR + 1 + END IF + K = 1 + DO 60 J = SUBMAT, IWORK( I+1 ) + IWORK( INDXQ+J ) = K + K = K + 1 + 60 CONTINUE + 70 CONTINUE +* +* Successively merge eigensystems of adjacent submatrices +* into eigensystem for the corresponding larger matrix. +* +* while ( SUBPBS > 1 ) +* + CURLVL = 1 + 80 CONTINUE + IF( SUBPBS.GT.1 ) THEN + SPM2 = SUBPBS - 2 + DO 90 I = 0, SPM2, 2 + IF( I.EQ.0 ) THEN + SUBMAT = 1 + MATSIZ = IWORK( 2 ) + MSD2 = IWORK( 1 ) + CURPRB = 0 + ELSE + SUBMAT = IWORK( I ) + 1 + MATSIZ = IWORK( I+2 ) - IWORK( I ) + MSD2 = MATSIZ / 2 + CURPRB = CURPRB + 1 + END IF +* +* Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) +* into an eigensystem of size MATSIZ. +* DLAED1 is used only for the full eigensystem of a tridiagonal +* matrix. +* DLAED7 handles the cases in which eigenvalues only or eigenvalues +* and eigenvectors of a full symmetric matrix (which was reduced to +* tridiagonal form) are desired. +* + IF( ICOMPQ.EQ.2 ) THEN + CALL DLAED1( MATSIZ, D( SUBMAT ), Q( SUBMAT, SUBMAT ), + $ LDQ, IWORK( INDXQ+SUBMAT ), + $ E( SUBMAT+MSD2-1 ), MSD2, WORK, + $ IWORK( SUBPBS+1 ), INFO ) + ELSE + CALL DLAED7( ICOMPQ, MATSIZ, QSIZ, TLVLS, CURLVL, CURPRB, + $ D( SUBMAT ), QSTORE( 1, SUBMAT ), LDQS, + $ IWORK( INDXQ+SUBMAT ), E( SUBMAT+MSD2-1 ), + $ MSD2, WORK( IQ ), IWORK( IQPTR ), + $ IWORK( IPRMPT ), IWORK( IPERM ), + $ IWORK( IGIVPT ), IWORK( IGIVCL ), + $ WORK( IGIVNM ), WORK( IWREM ), + $ IWORK( SUBPBS+1 ), INFO ) + END IF + IF( INFO.NE.0 ) + $ GO TO 130 + IWORK( I / 2+1 ) = IWORK( I+2 ) + 90 CONTINUE + SUBPBS = SUBPBS / 2 + CURLVL = CURLVL + 1 + GO TO 80 + END IF +* +* end while +* +* Re-merge the eigenvalues/vectors which were deflated at the final +* merge step. +* + IF( ICOMPQ.EQ.1 ) THEN + DO 100 I = 1, N + J = IWORK( INDXQ+I ) + WORK( I ) = D( J ) + CALL DCOPY( QSIZ, QSTORE( 1, J ), 1, Q( 1, I ), 1 ) + 100 CONTINUE + CALL DCOPY( N, WORK, 1, D, 1 ) + ELSE IF( ICOMPQ.EQ.2 ) THEN + DO 110 I = 1, N + J = IWORK( INDXQ+I ) + WORK( I ) = D( J ) + CALL DCOPY( N, Q( 1, J ), 1, WORK( N*I+1 ), 1 ) + 110 CONTINUE + CALL DCOPY( N, WORK, 1, D, 1 ) + CALL DLACPY( 'A', N, N, WORK( N+1 ), N, Q, LDQ ) + ELSE + DO 120 I = 1, N + J = IWORK( INDXQ+I ) + WORK( I ) = D( J ) + 120 CONTINUE + CALL DCOPY( N, WORK, 1, D, 1 ) + END IF + GO TO 140 +* + 130 CONTINUE + INFO = SUBMAT*( N+1 ) + SUBMAT + MATSIZ - 1 +* + 140 CONTINUE + RETURN +* +* End of DLAED0 +* + END diff --git a/lib/linalg/dlaed1.f b/lib/linalg/dlaed1.f new file mode 100644 index 0000000000..c37c1d2100 --- /dev/null +++ b/lib/linalg/dlaed1.f @@ -0,0 +1,274 @@ +*> \brief \b DLAED1 used by sstedc. Computes the updated eigensystem of a diagonal matrix after modification by a rank-one symmetric matrix. Used when the original matrix is tridiagonal. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAED1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAED1( N, D, Q, LDQ, INDXQ, RHO, CUTPNT, WORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER CUTPNT, INFO, LDQ, N +* DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. +* INTEGER INDXQ( * ), IWORK( * ) +* DOUBLE PRECISION D( * ), Q( LDQ, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAED1 computes the updated eigensystem of a diagonal +*> matrix after modification by a rank-one symmetric matrix. This +*> routine is used only for the eigenproblem which requires all +*> eigenvalues and eigenvectors of a tridiagonal matrix. DLAED7 handles +*> the case in which eigenvalues only or eigenvalues and eigenvectors +*> of a full symmetric matrix (which was reduced to tridiagonal form) +*> are desired. +*> +*> T = Q(in) ( D(in) + RHO * Z*Z**T ) Q**T(in) = Q(out) * D(out) * Q**T(out) +*> +*> where Z = Q**T*u, u is a vector of length N with ones in the +*> CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. +*> +*> The eigenvectors of the original matrix are stored in Q, and the +*> eigenvalues are in D. The algorithm consists of three stages: +*> +*> The first stage consists of deflating the size of the problem +*> when there are multiple eigenvalues or if there is a zero in +*> the Z vector. For each such occurence the dimension of the +*> secular equation problem is reduced by one. This stage is +*> performed by the routine DLAED2. +*> +*> The second stage consists of calculating the updated +*> eigenvalues. This is done by finding the roots of the secular +*> equation via the routine DLAED4 (as called by DLAED3). +*> This routine also calculates the eigenvectors of the current +*> problem. +*> +*> The final stage consists of computing the updated eigenvectors +*> directly using the updated eigenvalues. The eigenvectors for +*> the current problem are multiplied with the eigenvectors from +*> the overall problem. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the eigenvalues of the rank-1-perturbed matrix. +*> On exit, the eigenvalues of the repaired matrix. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ,N) +*> On entry, the eigenvectors of the rank-1-perturbed matrix. +*> On exit, the eigenvectors of the repaired tridiagonal matrix. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] INDXQ +*> \verbatim +*> INDXQ is INTEGER array, dimension (N) +*> On entry, the permutation which separately sorts the two +*> subproblems in D into ascending order. +*> On exit, the permutation which will reintegrate the +*> subproblems back into sorted order, +*> i.e. D( INDXQ( I = 1, N ) ) will be in ascending order. +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is DOUBLE PRECISION +*> The subdiagonal entry used to create the rank-1 modification. +*> \endverbatim +*> +*> \param[in] CUTPNT +*> \verbatim +*> CUTPNT is INTEGER +*> The location of the last eigenvalue in the leading sub-matrix. +*> min(1,N) <= CUTPNT <= N/2. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (4*N + N**2) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, an eigenvalue did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA \n +*> Modified by Francoise Tisseur, University of Tennessee +*> +* ===================================================================== + SUBROUTINE DLAED1( N, D, Q, LDQ, INDXQ, RHO, CUTPNT, WORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER CUTPNT, INFO, LDQ, N + DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. + INTEGER INDXQ( * ), IWORK( * ) + DOUBLE PRECISION D( * ), Q( LDQ, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER COLTYP, I, IDLMDA, INDX, INDXC, INDXP, IQ2, IS, + $ IW, IZ, K, N1, N2, ZPP1 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLAED2, DLAED3, DLAMRG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( MIN( 1, N / 2 ).GT.CUTPNT .OR. ( N / 2 ).LT.CUTPNT ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAED1', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* The following values are integer pointers which indicate +* the portion of the workspace +* used by a particular array in DLAED2 and DLAED3. +* + IZ = 1 + IDLMDA = IZ + N + IW = IDLMDA + N + IQ2 = IW + N +* + INDX = 1 + INDXC = INDX + N + COLTYP = INDXC + N + INDXP = COLTYP + N +* +* +* Form the z-vector which consists of the last row of Q_1 and the +* first row of Q_2. +* + CALL DCOPY( CUTPNT, Q( CUTPNT, 1 ), LDQ, WORK( IZ ), 1 ) + ZPP1 = CUTPNT + 1 + CALL DCOPY( N-CUTPNT, Q( ZPP1, ZPP1 ), LDQ, WORK( IZ+CUTPNT ), 1 ) +* +* Deflate eigenvalues. +* + CALL DLAED2( K, N, CUTPNT, D, Q, LDQ, INDXQ, RHO, WORK( IZ ), + $ WORK( IDLMDA ), WORK( IW ), WORK( IQ2 ), + $ IWORK( INDX ), IWORK( INDXC ), IWORK( INDXP ), + $ IWORK( COLTYP ), INFO ) +* + IF( INFO.NE.0 ) + $ GO TO 20 +* +* Solve Secular Equation. +* + IF( K.NE.0 ) THEN + IS = ( IWORK( COLTYP )+IWORK( COLTYP+1 ) )*CUTPNT + + $ ( IWORK( COLTYP+1 )+IWORK( COLTYP+2 ) )*( N-CUTPNT ) + IQ2 + CALL DLAED3( K, N, CUTPNT, D, Q, LDQ, RHO, WORK( IDLMDA ), + $ WORK( IQ2 ), IWORK( INDXC ), IWORK( COLTYP ), + $ WORK( IW ), WORK( IS ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 20 +* +* Prepare the INDXQ sorting permutation. +* + N1 = K + N2 = N - K + CALL DLAMRG( N1, N2, D, 1, -1, INDXQ ) + ELSE + DO 10 I = 1, N + INDXQ( I ) = I + 10 CONTINUE + END IF +* + 20 CONTINUE + RETURN +* +* End of DLAED1 +* + END diff --git a/lib/linalg/dlaed2.f b/lib/linalg/dlaed2.f new file mode 100644 index 0000000000..a75d72a737 --- /dev/null +++ b/lib/linalg/dlaed2.f @@ -0,0 +1,539 @@ +*> \brief \b DLAED2 used by sstedc. Merges eigenvalues and deflates secular equation. Used when the original matrix is tridiagonal. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAED2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W, +* Q2, INDX, INDXC, INDXP, COLTYP, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDQ, N, N1 +* DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. +* INTEGER COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ), +* $ INDXQ( * ) +* DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ), +* $ W( * ), Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAED2 merges the two sets of eigenvalues together into a single +*> sorted set. Then it tries to deflate the size of the problem. +*> There are two ways in which deflation can occur: when two or more +*> eigenvalues are close together or if there is a tiny entry in the +*> Z vector. For each such occurrence the order of the related secular +*> equation problem is reduced by one. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> The number of non-deflated eigenvalues, and the order of the +*> related secular equation. 0 <= K <=N. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] N1 +*> \verbatim +*> N1 is INTEGER +*> The location of the last eigenvalue in the leading sub-matrix. +*> min(1,N) <= N1 <= N/2. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, D contains the eigenvalues of the two submatrices to +*> be combined. +*> On exit, D contains the trailing (N-K) updated eigenvalues +*> (those which were deflated) sorted into increasing order. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ, N) +*> On entry, Q contains the eigenvectors of two submatrices in +*> the two square blocks with corners at (1,1), (N1,N1) +*> and (N1+1, N1+1), (N,N). +*> On exit, Q contains the trailing (N-K) updated eigenvectors +*> (those which were deflated) in its last N-K columns. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] INDXQ +*> \verbatim +*> INDXQ is INTEGER array, dimension (N) +*> The permutation which separately sorts the two sub-problems +*> in D into ascending order. Note that elements in the second +*> half of this permutation must first have N1 added to their +*> values. Destroyed on exit. +*> \endverbatim +*> +*> \param[in,out] RHO +*> \verbatim +*> RHO is DOUBLE PRECISION +*> On entry, the off-diagonal element associated with the rank-1 +*> cut which originally split the two submatrices which are now +*> being recombined. +*> On exit, RHO has been modified to the value required by +*> DLAED3. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (N) +*> On entry, Z contains the updating vector (the last +*> row of the first sub-eigenvector matrix and the first row of +*> the second sub-eigenvector matrix). +*> On exit, the contents of Z have been destroyed by the updating +*> process. +*> \endverbatim +*> +*> \param[out] DLAMDA +*> \verbatim +*> DLAMDA is DOUBLE PRECISION array, dimension (N) +*> A copy of the first K eigenvalues which will be used by +*> DLAED3 to form the secular equation. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first k values of the final deflation-altered z-vector +*> which will be passed to DLAED3. +*> \endverbatim +*> +*> \param[out] Q2 +*> \verbatim +*> Q2 is DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2) +*> A copy of the first K eigenvectors which will be used by +*> DLAED3 in a matrix multiply (DGEMM) to solve for the new +*> eigenvectors. +*> \endverbatim +*> +*> \param[out] INDX +*> \verbatim +*> INDX is INTEGER array, dimension (N) +*> The permutation used to sort the contents of DLAMDA into +*> ascending order. +*> \endverbatim +*> +*> \param[out] INDXC +*> \verbatim +*> INDXC is INTEGER array, dimension (N) +*> The permutation used to arrange the columns of the deflated +*> Q matrix into three groups: the first group contains non-zero +*> elements only at and above N1, the second contains +*> non-zero elements only below N1, and the third is dense. +*> \endverbatim +*> +*> \param[out] INDXP +*> \verbatim +*> INDXP is INTEGER array, dimension (N) +*> The permutation used to place deflated values of D at the end +*> of the array. INDXP(1:K) points to the nondeflated D-values +*> and INDXP(K+1:N) points to the deflated eigenvalues. +*> \endverbatim +*> +*> \param[out] COLTYP +*> \verbatim +*> COLTYP is INTEGER array, dimension (N) +*> During execution, a label which will indicate which of the +*> following types a column in the Q2 matrix is: +*> 1 : non-zero in the upper half only; +*> 2 : dense; +*> 3 : non-zero in the lower half only; +*> 4 : deflated. +*> On exit, COLTYP(i) is the number of columns of type i, +*> for i=1 to 4 only. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA \n +*> Modified by Francoise Tisseur, University of Tennessee +*> +* ===================================================================== + SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W, + $ Q2, INDX, INDXC, INDXP, COLTYP, INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDQ, N, N1 + DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. + INTEGER COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ), + $ INDXQ( * ) + DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ), + $ W( * ), Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION MONE, ZERO, ONE, TWO, EIGHT + PARAMETER ( MONE = -1.0D0, ZERO = 0.0D0, ONE = 1.0D0, + $ TWO = 2.0D0, EIGHT = 8.0D0 ) +* .. +* .. Local Arrays .. + INTEGER CTOT( 4 ), PSM( 4 ) +* .. +* .. Local Scalars .. + INTEGER CT, I, IMAX, IQ1, IQ2, J, JMAX, JS, K2, N1P1, + $ N2, NJ, PJ + DOUBLE PRECISION C, EPS, S, T, TAU, TOL +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DLAPY2 + EXTERNAL IDAMAX, DLAMCH, DLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLACPY, DLAMRG, DROT, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( MIN( 1, ( N / 2 ) ).GT.N1 .OR. ( N / 2 ).LT.N1 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAED2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + N2 = N - N1 + N1P1 = N1 + 1 +* + IF( RHO.LT.ZERO ) THEN + CALL DSCAL( N2, MONE, Z( N1P1 ), 1 ) + END IF +* +* Normalize z so that norm(z) = 1. Since z is the concatenation of +* two normalized vectors, norm2(z) = sqrt(2). +* + T = ONE / SQRT( TWO ) + CALL DSCAL( N, T, Z, 1 ) +* +* RHO = ABS( norm(z)**2 * RHO ) +* + RHO = ABS( TWO*RHO ) +* +* Sort the eigenvalues into increasing order +* + DO 10 I = N1P1, N + INDXQ( I ) = INDXQ( I ) + N1 + 10 CONTINUE +* +* re-integrate the deflated parts from the last pass +* + DO 20 I = 1, N + DLAMDA( I ) = D( INDXQ( I ) ) + 20 CONTINUE + CALL DLAMRG( N1, N2, DLAMDA, 1, 1, INDXC ) + DO 30 I = 1, N + INDX( I ) = INDXQ( INDXC( I ) ) + 30 CONTINUE +* +* Calculate the allowable deflation tolerance +* + IMAX = IDAMAX( N, Z, 1 ) + JMAX = IDAMAX( N, D, 1 ) + EPS = DLAMCH( 'Epsilon' ) + TOL = EIGHT*EPS*MAX( ABS( D( JMAX ) ), ABS( Z( IMAX ) ) ) +* +* If the rank-1 modifier is small enough, no more needs to be done +* except to reorganize Q so that its columns correspond with the +* elements in D. +* + IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN + K = 0 + IQ2 = 1 + DO 40 J = 1, N + I = INDX( J ) + CALL DCOPY( N, Q( 1, I ), 1, Q2( IQ2 ), 1 ) + DLAMDA( J ) = D( I ) + IQ2 = IQ2 + N + 40 CONTINUE + CALL DLACPY( 'A', N, N, Q2, N, Q, LDQ ) + CALL DCOPY( N, DLAMDA, 1, D, 1 ) + GO TO 190 + END IF +* +* If there are multiple eigenvalues then the problem deflates. Here +* the number of equal eigenvalues are found. As each equal +* eigenvalue is found, an elementary reflector is computed to rotate +* the corresponding eigensubspace so that the corresponding +* components of Z are zero in this new basis. +* + DO 50 I = 1, N1 + COLTYP( I ) = 1 + 50 CONTINUE + DO 60 I = N1P1, N + COLTYP( I ) = 3 + 60 CONTINUE +* +* + K = 0 + K2 = N + 1 + DO 70 J = 1, N + NJ = INDX( J ) + IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + COLTYP( NJ ) = 4 + INDXP( K2 ) = NJ + IF( J.EQ.N ) + $ GO TO 100 + ELSE + PJ = NJ + GO TO 80 + END IF + 70 CONTINUE + 80 CONTINUE + J = J + 1 + NJ = INDX( J ) + IF( J.GT.N ) + $ GO TO 100 + IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + COLTYP( NJ ) = 4 + INDXP( K2 ) = NJ + ELSE +* +* Check if eigenvalues are close enough to allow deflation. +* + S = Z( PJ ) + C = Z( NJ ) +* +* Find sqrt(a**2+b**2) without overflow or +* destructive underflow. +* + TAU = DLAPY2( C, S ) + T = D( NJ ) - D( PJ ) + C = C / TAU + S = -S / TAU + IF( ABS( T*C*S ).LE.TOL ) THEN +* +* Deflation is possible. +* + Z( NJ ) = TAU + Z( PJ ) = ZERO + IF( COLTYP( NJ ).NE.COLTYP( PJ ) ) + $ COLTYP( NJ ) = 2 + COLTYP( PJ ) = 4 + CALL DROT( N, Q( 1, PJ ), 1, Q( 1, NJ ), 1, C, S ) + T = D( PJ )*C**2 + D( NJ )*S**2 + D( NJ ) = D( PJ )*S**2 + D( NJ )*C**2 + D( PJ ) = T + K2 = K2 - 1 + I = 1 + 90 CONTINUE + IF( K2+I.LE.N ) THEN + IF( D( PJ ).LT.D( INDXP( K2+I ) ) ) THEN + INDXP( K2+I-1 ) = INDXP( K2+I ) + INDXP( K2+I ) = PJ + I = I + 1 + GO TO 90 + ELSE + INDXP( K2+I-1 ) = PJ + END IF + ELSE + INDXP( K2+I-1 ) = PJ + END IF + PJ = NJ + ELSE + K = K + 1 + DLAMDA( K ) = D( PJ ) + W( K ) = Z( PJ ) + INDXP( K ) = PJ + PJ = NJ + END IF + END IF + GO TO 80 + 100 CONTINUE +* +* Record the last eigenvalue. +* + K = K + 1 + DLAMDA( K ) = D( PJ ) + W( K ) = Z( PJ ) + INDXP( K ) = PJ +* +* Count up the total number of the various types of columns, then +* form a permutation which positions the four column types into +* four uniform groups (although one or more of these groups may be +* empty). +* + DO 110 J = 1, 4 + CTOT( J ) = 0 + 110 CONTINUE + DO 120 J = 1, N + CT = COLTYP( J ) + CTOT( CT ) = CTOT( CT ) + 1 + 120 CONTINUE +* +* PSM(*) = Position in SubMatrix (of types 1 through 4) +* + PSM( 1 ) = 1 + PSM( 2 ) = 1 + CTOT( 1 ) + PSM( 3 ) = PSM( 2 ) + CTOT( 2 ) + PSM( 4 ) = PSM( 3 ) + CTOT( 3 ) + K = N - CTOT( 4 ) +* +* Fill out the INDXC array so that the permutation which it induces +* will place all type-1 columns first, all type-2 columns next, +* then all type-3's, and finally all type-4's. +* + DO 130 J = 1, N + JS = INDXP( J ) + CT = COLTYP( JS ) + INDX( PSM( CT ) ) = JS + INDXC( PSM( CT ) ) = J + PSM( CT ) = PSM( CT ) + 1 + 130 CONTINUE +* +* Sort the eigenvalues and corresponding eigenvectors into DLAMDA +* and Q2 respectively. The eigenvalues/vectors which were not +* deflated go into the first K slots of DLAMDA and Q2 respectively, +* while those which were deflated go into the last N - K slots. +* + I = 1 + IQ1 = 1 + IQ2 = 1 + ( CTOT( 1 )+CTOT( 2 ) )*N1 + DO 140 J = 1, CTOT( 1 ) + JS = INDX( I ) + CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 ) + Z( I ) = D( JS ) + I = I + 1 + IQ1 = IQ1 + N1 + 140 CONTINUE +* + DO 150 J = 1, CTOT( 2 ) + JS = INDX( I ) + CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 ) + CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 ) + Z( I ) = D( JS ) + I = I + 1 + IQ1 = IQ1 + N1 + IQ2 = IQ2 + N2 + 150 CONTINUE +* + DO 160 J = 1, CTOT( 3 ) + JS = INDX( I ) + CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 ) + Z( I ) = D( JS ) + I = I + 1 + IQ2 = IQ2 + N2 + 160 CONTINUE +* + IQ1 = IQ2 + DO 170 J = 1, CTOT( 4 ) + JS = INDX( I ) + CALL DCOPY( N, Q( 1, JS ), 1, Q2( IQ2 ), 1 ) + IQ2 = IQ2 + N + Z( I ) = D( JS ) + I = I + 1 + 170 CONTINUE +* +* The deflated eigenvalues and their corresponding vectors go back +* into the last N - K slots of D and Q respectively. +* + IF( K.LT.N ) THEN + CALL DLACPY( 'A', N, CTOT( 4 ), Q2( IQ1 ), N, + $ Q( 1, K+1 ), LDQ ) + CALL DCOPY( N-K, Z( K+1 ), 1, D( K+1 ), 1 ) + END IF +* +* Copy CTOT into COLTYP for referencing in DLAED3. +* + DO 180 J = 1, 4 + COLTYP( J ) = CTOT( J ) + 180 CONTINUE +* + 190 CONTINUE + RETURN +* +* End of DLAED2 +* + END diff --git a/lib/linalg/dlaed3.f b/lib/linalg/dlaed3.f new file mode 100644 index 0000000000..411d0f890f --- /dev/null +++ b/lib/linalg/dlaed3.f @@ -0,0 +1,353 @@ +*> \brief \b DLAED3 used by sstedc. Finds the roots of the secular equation and updates the eigenvectors. Used when the original matrix is tridiagonal. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAED3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX, +* CTOT, W, S, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDQ, N, N1 +* DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. +* INTEGER CTOT( * ), INDX( * ) +* DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ), +* $ S( * ), W( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAED3 finds the roots of the secular equation, as defined by the +*> values in D, W, and RHO, between 1 and K. It makes the +*> appropriate calls to DLAED4 and then updates the eigenvectors by +*> multiplying the matrix of eigenvectors of the pair of eigensystems +*> being combined by the matrix of eigenvectors of the K-by-K system +*> which is solved here. +*> +*> This code makes very mild assumptions about floating point +*> arithmetic. It will work on machines with a guard digit in +*> add/subtract, or on those binary machines without guard digits +*> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. +*> It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of terms in the rational function to be solved by +*> DLAED4. K >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows and columns in the Q matrix. +*> N >= K (deflation may result in N>K). +*> \endverbatim +*> +*> \param[in] N1 +*> \verbatim +*> N1 is INTEGER +*> The location of the last eigenvalue in the leading submatrix. +*> min(1,N) <= N1 <= N/2. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> D(I) contains the updated eigenvalues for +*> 1 <= I <= K. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ,N) +*> Initially the first K columns are used as workspace. +*> On output the columns 1 to K contain +*> the updated eigenvectors. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is DOUBLE PRECISION +*> The value of the parameter in the rank one update equation. +*> RHO >= 0 required. +*> \endverbatim +*> +*> \param[in,out] DLAMDA +*> \verbatim +*> DLAMDA is DOUBLE PRECISION array, dimension (K) +*> The first K elements of this array contain the old roots +*> of the deflated updating problem. These are the poles +*> of the secular equation. May be changed on output by +*> having lowest order bit set to zero on Cray X-MP, Cray Y-MP, +*> Cray-2, or Cray C-90, as described above. +*> \endverbatim +*> +*> \param[in] Q2 +*> \verbatim +*> Q2 is DOUBLE PRECISION array, dimension (LDQ2, N) +*> The first K columns of this matrix contain the non-deflated +*> eigenvectors for the split problem. +*> \endverbatim +*> +*> \param[in] INDX +*> \verbatim +*> INDX is INTEGER array, dimension (N) +*> The permutation used to arrange the columns of the deflated +*> Q matrix into three groups (see DLAED2). +*> The rows of the eigenvectors found by DLAED4 must be likewise +*> permuted before the matrix multiply can take place. +*> \endverbatim +*> +*> \param[in] CTOT +*> \verbatim +*> CTOT is INTEGER array, dimension (4) +*> A count of the total number of the various types of columns +*> in Q, as described in INDX. The fourth column type is any +*> column which has been deflated. +*> \endverbatim +*> +*> \param[in,out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (K) +*> The first K elements of this array contain the components +*> of the deflation-adjusted updating vector. Destroyed on +*> output. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N1 + 1)*K +*> Will contain the eigenvectors of the repaired matrix which +*> will be multiplied by the previously accumulated eigenvectors +*> to update the system. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, an eigenvalue did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA \n +*> Modified by Francoise Tisseur, University of Tennessee +*> +* ===================================================================== + SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX, + $ CTOT, W, S, INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDQ, N, N1 + DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. + INTEGER CTOT( * ), INDX( * ) + DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ), + $ S( * ), W( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I, II, IQ2, J, N12, N2, N23 + DOUBLE PRECISION TEMP +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMC3, DNRM2 + EXTERNAL DLAMC3, DNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMM, DLACPY, DLAED4, DLASET, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( K.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.K ) THEN + INFO = -2 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAED3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) + $ RETURN +* +* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can +* be computed with high relative accuracy (barring over/underflow). +* This is a problem on machines without a guard digit in +* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). +* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), +* which on any of these machines zeros out the bottommost +* bit of DLAMDA(I) if it is 1; this makes the subsequent +* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation +* occurs. On binary machines with a guard digit (almost all +* machines) it does not change DLAMDA(I) at all. On hexadecimal +* and decimal machines with a guard digit, it slightly +* changes the bottommost bits of DLAMDA(I). It does not account +* for hexadecimal or decimal machines without guard digits +* (we know of none). We use a subroutine call to compute +* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating +* this code. +* + DO 10 I = 1, K + DLAMDA( I ) = DLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I ) + 10 CONTINUE +* + DO 20 J = 1, K + CALL DLAED4( K, J, DLAMDA, W, Q( 1, J ), RHO, D( J ), INFO ) +* +* If the zero finder fails, the computation is terminated. +* + IF( INFO.NE.0 ) + $ GO TO 120 + 20 CONTINUE +* + IF( K.EQ.1 ) + $ GO TO 110 + IF( K.EQ.2 ) THEN + DO 30 J = 1, K + W( 1 ) = Q( 1, J ) + W( 2 ) = Q( 2, J ) + II = INDX( 1 ) + Q( 1, J ) = W( II ) + II = INDX( 2 ) + Q( 2, J ) = W( II ) + 30 CONTINUE + GO TO 110 + END IF +* +* Compute updated W. +* + CALL DCOPY( K, W, 1, S, 1 ) +* +* Initialize W(I) = Q(I,I) +* + CALL DCOPY( K, Q, LDQ+1, W, 1 ) + DO 60 J = 1, K + DO 40 I = 1, J - 1 + W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) ) + 40 CONTINUE + DO 50 I = J + 1, K + W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) ) + 50 CONTINUE + 60 CONTINUE + DO 70 I = 1, K + W( I ) = SIGN( SQRT( -W( I ) ), S( I ) ) + 70 CONTINUE +* +* Compute eigenvectors of the modified rank-1 modification. +* + DO 100 J = 1, K + DO 80 I = 1, K + S( I ) = W( I ) / Q( I, J ) + 80 CONTINUE + TEMP = DNRM2( K, S, 1 ) + DO 90 I = 1, K + II = INDX( I ) + Q( I, J ) = S( II ) / TEMP + 90 CONTINUE + 100 CONTINUE +* +* Compute the updated eigenvectors. +* + 110 CONTINUE +* + N2 = N - N1 + N12 = CTOT( 1 ) + CTOT( 2 ) + N23 = CTOT( 2 ) + CTOT( 3 ) +* + CALL DLACPY( 'A', N23, K, Q( CTOT( 1 )+1, 1 ), LDQ, S, N23 ) + IQ2 = N1*N12 + 1 + IF( N23.NE.0 ) THEN + CALL DGEMM( 'N', 'N', N2, K, N23, ONE, Q2( IQ2 ), N2, S, N23, + $ ZERO, Q( N1+1, 1 ), LDQ ) + ELSE + CALL DLASET( 'A', N2, K, ZERO, ZERO, Q( N1+1, 1 ), LDQ ) + END IF +* + CALL DLACPY( 'A', N12, K, Q, LDQ, S, N12 ) + IF( N12.NE.0 ) THEN + CALL DGEMM( 'N', 'N', N1, K, N12, ONE, Q2, N1, S, N12, ZERO, Q, + $ LDQ ) + ELSE + CALL DLASET( 'A', N1, K, ZERO, ZERO, Q( 1, 1 ), LDQ ) + END IF +* +* + 120 CONTINUE + RETURN +* +* End of DLAED3 +* + END diff --git a/lib/linalg/dlaed4.f b/lib/linalg/dlaed4.f new file mode 100644 index 0000000000..c898b5b618 --- /dev/null +++ b/lib/linalg/dlaed4.f @@ -0,0 +1,917 @@ +*> \brief \b DLAED4 used by sstedc. Finds a single root of the secular equation. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAED4 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAED4( N, I, D, Z, DELTA, RHO, DLAM, INFO ) +* +* .. Scalar Arguments .. +* INTEGER I, INFO, N +* DOUBLE PRECISION DLAM, RHO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), DELTA( * ), Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This subroutine computes the I-th updated eigenvalue of a symmetric +*> rank-one modification to a diagonal matrix whose elements are +*> given in the array d, and that +*> +*> D(i) < D(j) for i < j +*> +*> and that RHO > 0. This is arranged by the calling routine, and is +*> no loss in generality. The rank-one modified system is thus +*> +*> diag( D ) + RHO * Z * Z_transpose. +*> +*> where we assume the Euclidean norm of Z is 1. +*> +*> The method consists of approximating the rational functions in the +*> secular equation by simpler interpolating rational functions. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The length of all arrays. +*> \endverbatim +*> +*> \param[in] I +*> \verbatim +*> I is INTEGER +*> The index of the eigenvalue to be computed. 1 <= I <= N. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The original eigenvalues. It is assumed that they are in +*> order, D(I) < D(J) for I < J. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (N) +*> The components of the updating vector. +*> \endverbatim +*> +*> \param[out] DELTA +*> \verbatim +*> DELTA is DOUBLE PRECISION array, dimension (N) +*> If N .GT. 2, DELTA contains (D(j) - lambda_I) in its j-th +*> component. If N = 1, then DELTA(1) = 1. If N = 2, see DLAED5 +*> for detail. The vector DELTA contains the information necessary +*> to construct the eigenvectors by DLAED3 and DLAED9. +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is DOUBLE PRECISION +*> The scalar in the symmetric updating formula. +*> \endverbatim +*> +*> \param[out] DLAM +*> \verbatim +*> DLAM is DOUBLE PRECISION +*> The computed lambda_I, the I-th updated eigenvalue. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: if INFO = 1, the updating process failed. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> Logical variable ORGATI (origin-at-i?) is used for distinguishing +*> whether D(i) or D(i+1) is treated as the origin. +*> +*> ORGATI = .true. origin at i +*> ORGATI = .false. origin at i+1 +*> +*> Logical variable SWTCH3 (switch-for-3-poles?) is for noting +*> if we are working with THREE poles! +*> +*> MAXIT is the maximum number of iterations allowed for each +*> eigenvalue. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Ren-Cang Li, Computer Science Division, University of California +*> at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE DLAED4( N, I, D, Z, DELTA, RHO, DLAM, INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER I, INFO, N + DOUBLE PRECISION DLAM, RHO +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), DELTA( * ), Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXIT + PARAMETER ( MAXIT = 30 ) + DOUBLE PRECISION ZERO, ONE, TWO, THREE, FOUR, EIGHT, TEN + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ THREE = 3.0D0, FOUR = 4.0D0, EIGHT = 8.0D0, + $ TEN = 10.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL ORGATI, SWTCH, SWTCH3 + INTEGER II, IIM1, IIP1, IP1, ITER, J, NITER + DOUBLE PRECISION A, B, C, DEL, DLTLB, DLTUB, DPHI, DPSI, DW, + $ EPS, ERRETM, ETA, MIDPT, PHI, PREW, PSI, + $ RHOINV, TAU, TEMP, TEMP1, W +* .. +* .. Local Arrays .. + DOUBLE PRECISION ZZ( 3 ) +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLAED5, DLAED6 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Since this routine is called in an inner loop, we do no argument +* checking. +* +* Quick return for N=1 and 2. +* + INFO = 0 + IF( N.EQ.1 ) THEN +* +* Presumably, I=1 upon entry +* + DLAM = D( 1 ) + RHO*Z( 1 )*Z( 1 ) + DELTA( 1 ) = ONE + RETURN + END IF + IF( N.EQ.2 ) THEN + CALL DLAED5( I, D, Z, DELTA, RHO, DLAM ) + RETURN + END IF +* +* Compute machine epsilon +* + EPS = DLAMCH( 'Epsilon' ) + RHOINV = ONE / RHO +* +* The case I = N +* + IF( I.EQ.N ) THEN +* +* Initialize some basic variables +* + II = N - 1 + NITER = 1 +* +* Calculate initial guess +* + MIDPT = RHO / TWO +* +* If ||Z||_2 is not one, then TEMP should be set to +* RHO * ||Z||_2^2 / TWO +* + DO 10 J = 1, N + DELTA( J ) = ( D( J )-D( I ) ) - MIDPT + 10 CONTINUE +* + PSI = ZERO + DO 20 J = 1, N - 2 + PSI = PSI + Z( J )*Z( J ) / DELTA( J ) + 20 CONTINUE +* + C = RHOINV + PSI + W = C + Z( II )*Z( II ) / DELTA( II ) + + $ Z( N )*Z( N ) / DELTA( N ) +* + IF( W.LE.ZERO ) THEN + TEMP = Z( N-1 )*Z( N-1 ) / ( D( N )-D( N-1 )+RHO ) + + $ Z( N )*Z( N ) / RHO + IF( C.LE.TEMP ) THEN + TAU = RHO + ELSE + DEL = D( N ) - D( N-1 ) + A = -C*DEL + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N ) + B = Z( N )*Z( N )*DEL + IF( A.LT.ZERO ) THEN + TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A ) + ELSE + TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C ) + END IF + END IF +* +* It can be proved that +* D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO +* + DLTLB = MIDPT + DLTUB = RHO + ELSE + DEL = D( N ) - D( N-1 ) + A = -C*DEL + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N ) + B = Z( N )*Z( N )*DEL + IF( A.LT.ZERO ) THEN + TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A ) + ELSE + TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C ) + END IF +* +* It can be proved that +* D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2 +* + DLTLB = ZERO + DLTUB = MIDPT + END IF +* + DO 30 J = 1, N + DELTA( J ) = ( D( J )-D( I ) ) - TAU + 30 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 40 J = 1, II + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 40 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / DELTA( N ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV + + $ ABS( TAU )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + DLAM = D( I ) + TAU + GO TO 250 + END IF +* + IF( W.LE.ZERO ) THEN + DLTLB = MAX( DLTLB, TAU ) + ELSE + DLTUB = MIN( DLTUB, TAU ) + END IF +* +* Calculate the new step +* + NITER = NITER + 1 + C = W - DELTA( N-1 )*DPSI - DELTA( N )*DPHI + A = ( DELTA( N-1 )+DELTA( N ) )*W - + $ DELTA( N-1 )*DELTA( N )*( DPSI+DPHI ) + B = DELTA( N-1 )*DELTA( N )*W + IF( C.LT.ZERO ) + $ C = ABS( C ) + IF( C.EQ.ZERO ) THEN +* ETA = B/A +* ETA = RHO - TAU + ETA = DLTUB - TAU + ELSE IF( A.GE.ZERO ) THEN + ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GT.ZERO ) + $ ETA = -W / ( DPSI+DPHI ) + TEMP = TAU + ETA + IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( DLTUB-TAU ) / TWO + ELSE + ETA = ( DLTLB-TAU ) / TWO + END IF + END IF + DO 50 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + 50 CONTINUE +* + TAU = TAU + ETA +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 60 J = 1, II + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 60 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / DELTA( N ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV + + $ ABS( TAU )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI +* +* Main loop to update the values of the array DELTA +* + ITER = NITER + 1 +* + DO 90 NITER = ITER, MAXIT +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + DLAM = D( I ) + TAU + GO TO 250 + END IF +* + IF( W.LE.ZERO ) THEN + DLTLB = MAX( DLTLB, TAU ) + ELSE + DLTUB = MIN( DLTUB, TAU ) + END IF +* +* Calculate the new step +* + C = W - DELTA( N-1 )*DPSI - DELTA( N )*DPHI + A = ( DELTA( N-1 )+DELTA( N ) )*W - + $ DELTA( N-1 )*DELTA( N )*( DPSI+DPHI ) + B = DELTA( N-1 )*DELTA( N )*W + IF( A.GE.ZERO ) THEN + ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GT.ZERO ) + $ ETA = -W / ( DPSI+DPHI ) + TEMP = TAU + ETA + IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( DLTUB-TAU ) / TWO + ELSE + ETA = ( DLTLB-TAU ) / TWO + END IF + END IF + DO 70 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + 70 CONTINUE +* + TAU = TAU + ETA +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 80 J = 1, II + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 80 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / DELTA( N ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV + + $ ABS( TAU )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI + 90 CONTINUE +* +* Return with INFO = 1, NITER = MAXIT and not converged +* + INFO = 1 + DLAM = D( I ) + TAU + GO TO 250 +* +* End for the case I = N +* + ELSE +* +* The case for I < N +* + NITER = 1 + IP1 = I + 1 +* +* Calculate initial guess +* + DEL = D( IP1 ) - D( I ) + MIDPT = DEL / TWO + DO 100 J = 1, N + DELTA( J ) = ( D( J )-D( I ) ) - MIDPT + 100 CONTINUE +* + PSI = ZERO + DO 110 J = 1, I - 1 + PSI = PSI + Z( J )*Z( J ) / DELTA( J ) + 110 CONTINUE +* + PHI = ZERO + DO 120 J = N, I + 2, -1 + PHI = PHI + Z( J )*Z( J ) / DELTA( J ) + 120 CONTINUE + C = RHOINV + PSI + PHI + W = C + Z( I )*Z( I ) / DELTA( I ) + + $ Z( IP1 )*Z( IP1 ) / DELTA( IP1 ) +* + IF( W.GT.ZERO ) THEN +* +* d(i)< the ith eigenvalue < (d(i)+d(i+1))/2 +* +* We choose d(i) as origin. +* + ORGATI = .TRUE. + A = C*DEL + Z( I )*Z( I ) + Z( IP1 )*Z( IP1 ) + B = Z( I )*Z( I )*DEL + IF( A.GT.ZERO ) THEN + TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + ELSE + TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + END IF + DLTLB = ZERO + DLTUB = MIDPT + ELSE +* +* (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1) +* +* We choose d(i+1) as origin. +* + ORGATI = .FALSE. + A = C*DEL - Z( I )*Z( I ) - Z( IP1 )*Z( IP1 ) + B = Z( IP1 )*Z( IP1 )*DEL + IF( A.LT.ZERO ) THEN + TAU = TWO*B / ( A-SQRT( ABS( A*A+FOUR*B*C ) ) ) + ELSE + TAU = -( A+SQRT( ABS( A*A+FOUR*B*C ) ) ) / ( TWO*C ) + END IF + DLTLB = -MIDPT + DLTUB = ZERO + END IF +* + IF( ORGATI ) THEN + DO 130 J = 1, N + DELTA( J ) = ( D( J )-D( I ) ) - TAU + 130 CONTINUE + ELSE + DO 140 J = 1, N + DELTA( J ) = ( D( J )-D( IP1 ) ) - TAU + 140 CONTINUE + END IF + IF( ORGATI ) THEN + II = I + ELSE + II = I + 1 + END IF + IIM1 = II - 1 + IIP1 = II + 1 +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 150 J = 1, IIM1 + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 150 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 160 J = N, IIP1, -1 + TEMP = Z( J ) / DELTA( J ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 160 CONTINUE +* + W = RHOINV + PHI + PSI +* +* W is the value of the secular function with +* its ii-th element removed. +* + SWTCH3 = .FALSE. + IF( ORGATI ) THEN + IF( W.LT.ZERO ) + $ SWTCH3 = .TRUE. + ELSE + IF( W.GT.ZERO ) + $ SWTCH3 = .TRUE. + END IF + IF( II.EQ.1 .OR. II.EQ.N ) + $ SWTCH3 = .FALSE. +* + TEMP = Z( II ) / DELTA( II ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = W + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + + $ THREE*ABS( TEMP ) + ABS( TAU )*DW +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + IF( ORGATI ) THEN + DLAM = D( I ) + TAU + ELSE + DLAM = D( IP1 ) + TAU + END IF + GO TO 250 + END IF +* + IF( W.LE.ZERO ) THEN + DLTLB = MAX( DLTLB, TAU ) + ELSE + DLTUB = MIN( DLTUB, TAU ) + END IF +* +* Calculate the new step +* + NITER = NITER + 1 + IF( .NOT.SWTCH3 ) THEN + IF( ORGATI ) THEN + C = W - DELTA( IP1 )*DW - ( D( I )-D( IP1 ) )* + $ ( Z( I ) / DELTA( I ) )**2 + ELSE + C = W - DELTA( I )*DW - ( D( IP1 )-D( I ) )* + $ ( Z( IP1 ) / DELTA( IP1 ) )**2 + END IF + A = ( DELTA( I )+DELTA( IP1 ) )*W - + $ DELTA( I )*DELTA( IP1 )*DW + B = DELTA( I )*DELTA( IP1 )*W + IF( C.EQ.ZERO ) THEN + IF( A.EQ.ZERO ) THEN + IF( ORGATI ) THEN + A = Z( I )*Z( I ) + DELTA( IP1 )*DELTA( IP1 )* + $ ( DPSI+DPHI ) + ELSE + A = Z( IP1 )*Z( IP1 ) + DELTA( I )*DELTA( I )* + $ ( DPSI+DPHI ) + END IF + END IF + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + ELSE +* +* Interpolation using THREE most relevant poles +* + TEMP = RHOINV + PSI + PHI + IF( ORGATI ) THEN + TEMP1 = Z( IIM1 ) / DELTA( IIM1 ) + TEMP1 = TEMP1*TEMP1 + C = TEMP - DELTA( IIP1 )*( DPSI+DPHI ) - + $ ( D( IIM1 )-D( IIP1 ) )*TEMP1 + ZZ( 1 ) = Z( IIM1 )*Z( IIM1 ) + ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )* + $ ( ( DPSI-TEMP1 )+DPHI ) + ELSE + TEMP1 = Z( IIP1 ) / DELTA( IIP1 ) + TEMP1 = TEMP1*TEMP1 + C = TEMP - DELTA( IIM1 )*( DPSI+DPHI ) - + $ ( D( IIP1 )-D( IIM1 ) )*TEMP1 + ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )* + $ ( DPSI+( DPHI-TEMP1 ) ) + ZZ( 3 ) = Z( IIP1 )*Z( IIP1 ) + END IF + ZZ( 2 ) = Z( II )*Z( II ) + CALL DLAED6( NITER, ORGATI, C, DELTA( IIM1 ), ZZ, W, ETA, + $ INFO ) + IF( INFO.NE.0 ) + $ GO TO 250 + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GE.ZERO ) + $ ETA = -W / DW + TEMP = TAU + ETA + IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( DLTUB-TAU ) / TWO + ELSE + ETA = ( DLTLB-TAU ) / TWO + END IF + END IF +* + PREW = W +* + DO 180 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + 180 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 190 J = 1, IIM1 + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 190 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 200 J = N, IIP1, -1 + TEMP = Z( J ) / DELTA( J ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 200 CONTINUE +* + TEMP = Z( II ) / DELTA( II ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = RHOINV + PHI + PSI + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + + $ THREE*ABS( TEMP ) + ABS( TAU+ETA )*DW +* + SWTCH = .FALSE. + IF( ORGATI ) THEN + IF( -W.GT.ABS( PREW ) / TEN ) + $ SWTCH = .TRUE. + ELSE + IF( W.GT.ABS( PREW ) / TEN ) + $ SWTCH = .TRUE. + END IF +* + TAU = TAU + ETA +* +* Main loop to update the values of the array DELTA +* + ITER = NITER + 1 +* + DO 240 NITER = ITER, MAXIT +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + IF( ORGATI ) THEN + DLAM = D( I ) + TAU + ELSE + DLAM = D( IP1 ) + TAU + END IF + GO TO 250 + END IF +* + IF( W.LE.ZERO ) THEN + DLTLB = MAX( DLTLB, TAU ) + ELSE + DLTUB = MIN( DLTUB, TAU ) + END IF +* +* Calculate the new step +* + IF( .NOT.SWTCH3 ) THEN + IF( .NOT.SWTCH ) THEN + IF( ORGATI ) THEN + C = W - DELTA( IP1 )*DW - + $ ( D( I )-D( IP1 ) )*( Z( I ) / DELTA( I ) )**2 + ELSE + C = W - DELTA( I )*DW - ( D( IP1 )-D( I ) )* + $ ( Z( IP1 ) / DELTA( IP1 ) )**2 + END IF + ELSE + TEMP = Z( II ) / DELTA( II ) + IF( ORGATI ) THEN + DPSI = DPSI + TEMP*TEMP + ELSE + DPHI = DPHI + TEMP*TEMP + END IF + C = W - DELTA( I )*DPSI - DELTA( IP1 )*DPHI + END IF + A = ( DELTA( I )+DELTA( IP1 ) )*W - + $ DELTA( I )*DELTA( IP1 )*DW + B = DELTA( I )*DELTA( IP1 )*W + IF( C.EQ.ZERO ) THEN + IF( A.EQ.ZERO ) THEN + IF( .NOT.SWTCH ) THEN + IF( ORGATI ) THEN + A = Z( I )*Z( I ) + DELTA( IP1 )* + $ DELTA( IP1 )*( DPSI+DPHI ) + ELSE + A = Z( IP1 )*Z( IP1 ) + + $ DELTA( I )*DELTA( I )*( DPSI+DPHI ) + END IF + ELSE + A = DELTA( I )*DELTA( I )*DPSI + + $ DELTA( IP1 )*DELTA( IP1 )*DPHI + END IF + END IF + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + ELSE +* +* Interpolation using THREE most relevant poles +* + TEMP = RHOINV + PSI + PHI + IF( SWTCH ) THEN + C = TEMP - DELTA( IIM1 )*DPSI - DELTA( IIP1 )*DPHI + ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )*DPSI + ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )*DPHI + ELSE + IF( ORGATI ) THEN + TEMP1 = Z( IIM1 ) / DELTA( IIM1 ) + TEMP1 = TEMP1*TEMP1 + C = TEMP - DELTA( IIP1 )*( DPSI+DPHI ) - + $ ( D( IIM1 )-D( IIP1 ) )*TEMP1 + ZZ( 1 ) = Z( IIM1 )*Z( IIM1 ) + ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )* + $ ( ( DPSI-TEMP1 )+DPHI ) + ELSE + TEMP1 = Z( IIP1 ) / DELTA( IIP1 ) + TEMP1 = TEMP1*TEMP1 + C = TEMP - DELTA( IIM1 )*( DPSI+DPHI ) - + $ ( D( IIP1 )-D( IIM1 ) )*TEMP1 + ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )* + $ ( DPSI+( DPHI-TEMP1 ) ) + ZZ( 3 ) = Z( IIP1 )*Z( IIP1 ) + END IF + END IF + CALL DLAED6( NITER, ORGATI, C, DELTA( IIM1 ), ZZ, W, ETA, + $ INFO ) + IF( INFO.NE.0 ) + $ GO TO 250 + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GE.ZERO ) + $ ETA = -W / DW + TEMP = TAU + ETA + IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( DLTUB-TAU ) / TWO + ELSE + ETA = ( DLTLB-TAU ) / TWO + END IF + END IF +* + DO 210 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + 210 CONTINUE +* + TAU = TAU + ETA + PREW = W +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 220 J = 1, IIM1 + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 220 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 230 J = N, IIP1, -1 + TEMP = Z( J ) / DELTA( J ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 230 CONTINUE +* + TEMP = Z( II ) / DELTA( II ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = RHOINV + PHI + PSI + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + + $ THREE*ABS( TEMP ) + ABS( TAU )*DW + IF( W*PREW.GT.ZERO .AND. ABS( W ).GT.ABS( PREW ) / TEN ) + $ SWTCH = .NOT.SWTCH +* + 240 CONTINUE +* +* Return with INFO = 1, NITER = MAXIT and not converged +* + INFO = 1 + IF( ORGATI ) THEN + DLAM = D( I ) + TAU + ELSE + DLAM = D( IP1 ) + TAU + END IF +* + END IF +* + 250 CONTINUE +* + RETURN +* +* End of DLAED4 +* + END diff --git a/lib/linalg/dlaed5.f b/lib/linalg/dlaed5.f new file mode 100644 index 0000000000..3ac9aa19a8 --- /dev/null +++ b/lib/linalg/dlaed5.f @@ -0,0 +1,189 @@ +*> \brief \b DLAED5 used by sstedc. Solves the 2-by-2 secular equation. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAED5 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAED5( I, D, Z, DELTA, RHO, DLAM ) +* +* .. Scalar Arguments .. +* INTEGER I +* DOUBLE PRECISION DLAM, RHO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( 2 ), DELTA( 2 ), Z( 2 ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This subroutine computes the I-th eigenvalue of a symmetric rank-one +*> modification of a 2-by-2 diagonal matrix +*> +*> diag( D ) + RHO * Z * transpose(Z) . +*> +*> The diagonal elements in the array D are assumed to satisfy +*> +*> D(i) < D(j) for i < j . +*> +*> We also assume RHO > 0 and that the Euclidean norm of the vector +*> Z is one. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] I +*> \verbatim +*> I is INTEGER +*> The index of the eigenvalue to be computed. I = 1 or I = 2. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (2) +*> The original eigenvalues. We assume D(1) < D(2). +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (2) +*> The components of the updating vector. +*> \endverbatim +*> +*> \param[out] DELTA +*> \verbatim +*> DELTA is DOUBLE PRECISION array, dimension (2) +*> The vector DELTA contains the information necessary +*> to construct the eigenvectors. +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is DOUBLE PRECISION +*> The scalar in the symmetric updating formula. +*> \endverbatim +*> +*> \param[out] DLAM +*> \verbatim +*> DLAM is DOUBLE PRECISION +*> The computed lambda_I, the I-th updated eigenvalue. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Ren-Cang Li, Computer Science Division, University of California +*> at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE DLAED5( I, D, Z, DELTA, RHO, DLAM ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER I + DOUBLE PRECISION DLAM, RHO +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( 2 ), DELTA( 2 ), Z( 2 ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, FOUR + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ FOUR = 4.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION B, C, DEL, TAU, TEMP, W +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + DEL = D( 2 ) - D( 1 ) + IF( I.EQ.1 ) THEN + W = ONE + TWO*RHO*( Z( 2 )*Z( 2 )-Z( 1 )*Z( 1 ) ) / DEL + IF( W.GT.ZERO ) THEN + B = DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 1 )*Z( 1 )*DEL +* +* B > ZERO, always +* + TAU = TWO*C / ( B+SQRT( ABS( B*B-FOUR*C ) ) ) + DLAM = D( 1 ) + TAU + DELTA( 1 ) = -Z( 1 ) / TAU + DELTA( 2 ) = Z( 2 ) / ( DEL-TAU ) + ELSE + B = -DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 2 )*Z( 2 )*DEL + IF( B.GT.ZERO ) THEN + TAU = -TWO*C / ( B+SQRT( B*B+FOUR*C ) ) + ELSE + TAU = ( B-SQRT( B*B+FOUR*C ) ) / TWO + END IF + DLAM = D( 2 ) + TAU + DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) + DELTA( 2 ) = -Z( 2 ) / TAU + END IF + TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) + DELTA( 1 ) = DELTA( 1 ) / TEMP + DELTA( 2 ) = DELTA( 2 ) / TEMP + ELSE +* +* Now I=2 +* + B = -DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 2 )*Z( 2 )*DEL + IF( B.GT.ZERO ) THEN + TAU = ( B+SQRT( B*B+FOUR*C ) ) / TWO + ELSE + TAU = TWO*C / ( -B+SQRT( B*B+FOUR*C ) ) + END IF + DLAM = D( 2 ) + TAU + DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) + DELTA( 2 ) = -Z( 2 ) / TAU + TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) + DELTA( 1 ) = DELTA( 1 ) / TEMP + DELTA( 2 ) = DELTA( 2 ) / TEMP + END IF + RETURN +* +* End OF DLAED5 +* + END diff --git a/lib/linalg/dlaed6.f b/lib/linalg/dlaed6.f new file mode 100644 index 0000000000..1ce4932b8e --- /dev/null +++ b/lib/linalg/dlaed6.f @@ -0,0 +1,409 @@ +*> \brief \b DLAED6 used by sstedc. Computes one Newton step in solution of the secular equation. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAED6 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAED6( KNITER, ORGATI, RHO, D, Z, FINIT, TAU, INFO ) +* +* .. Scalar Arguments .. +* LOGICAL ORGATI +* INTEGER INFO, KNITER +* DOUBLE PRECISION FINIT, RHO, TAU +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( 3 ), Z( 3 ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAED6 computes the positive or negative root (closest to the origin) +*> of +*> z(1) z(2) z(3) +*> f(x) = rho + --------- + ---------- + --------- +*> d(1)-x d(2)-x d(3)-x +*> +*> It is assumed that +*> +*> if ORGATI = .true. the root is between d(2) and d(3); +*> otherwise it is between d(1) and d(2) +*> +*> This routine will be called by DLAED4 when necessary. In most cases, +*> the root sought is the smallest in magnitude, though it might not be +*> in some extremely rare situations. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] KNITER +*> \verbatim +*> KNITER is INTEGER +*> Refer to DLAED4 for its significance. +*> \endverbatim +*> +*> \param[in] ORGATI +*> \verbatim +*> ORGATI is LOGICAL +*> If ORGATI is true, the needed root is between d(2) and +*> d(3); otherwise it is between d(1) and d(2). See +*> DLAED4 for further details. +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is DOUBLE PRECISION +*> Refer to the equation f(x) above. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (3) +*> D satisfies d(1) < d(2) < d(3). +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (3) +*> Each of the elements in z must be positive. +*> \endverbatim +*> +*> \param[in] FINIT +*> \verbatim +*> FINIT is DOUBLE PRECISION +*> The value of f at 0. It is more accurate than the one +*> evaluated inside this routine (if someone wants to do +*> so). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION +*> The root of the equation f(x). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: if INFO = 1, failure to converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> 10/02/03: This version has a few statements commented out for thread +*> safety (machine parameters are computed on each entry). SJH. +*> +*> 05/10/06: Modified from a new version of Ren-Cang Li, use +*> Gragg-Thornton-Warner cubic convergent scheme for better stability. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Ren-Cang Li, Computer Science Division, University of California +*> at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE DLAED6( KNITER, ORGATI, RHO, D, Z, FINIT, TAU, INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + LOGICAL ORGATI + INTEGER INFO, KNITER + DOUBLE PRECISION FINIT, RHO, TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( 3 ), Z( 3 ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXIT + PARAMETER ( MAXIT = 40 ) + DOUBLE PRECISION ZERO, ONE, TWO, THREE, FOUR, EIGHT + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ THREE = 3.0D0, FOUR = 4.0D0, EIGHT = 8.0D0 ) +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Local Arrays .. + DOUBLE PRECISION DSCALE( 3 ), ZSCALE( 3 ) +* .. +* .. Local Scalars .. + LOGICAL SCALE + INTEGER I, ITER, NITER + DOUBLE PRECISION A, B, BASE, C, DDF, DF, EPS, ERRETM, ETA, F, + $ FC, SCLFAC, SCLINV, SMALL1, SMALL2, SMINV1, + $ SMINV2, TEMP, TEMP1, TEMP2, TEMP3, TEMP4, + $ LBD, UBD +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* + IF( ORGATI ) THEN + LBD = D(2) + UBD = D(3) + ELSE + LBD = D(1) + UBD = D(2) + END IF + IF( FINIT .LT. ZERO )THEN + LBD = ZERO + ELSE + UBD = ZERO + END IF +* + NITER = 1 + TAU = ZERO + IF( KNITER.EQ.2 ) THEN + IF( ORGATI ) THEN + TEMP = ( D( 3 )-D( 2 ) ) / TWO + C = RHO + Z( 1 ) / ( ( D( 1 )-D( 2 ) )-TEMP ) + A = C*( D( 2 )+D( 3 ) ) + Z( 2 ) + Z( 3 ) + B = C*D( 2 )*D( 3 ) + Z( 2 )*D( 3 ) + Z( 3 )*D( 2 ) + ELSE + TEMP = ( D( 1 )-D( 2 ) ) / TWO + C = RHO + Z( 3 ) / ( ( D( 3 )-D( 2 ) )-TEMP ) + A = C*( D( 1 )+D( 2 ) ) + Z( 1 ) + Z( 2 ) + B = C*D( 1 )*D( 2 ) + Z( 1 )*D( 2 ) + Z( 2 )*D( 1 ) + END IF + TEMP = MAX( ABS( A ), ABS( B ), ABS( C ) ) + A = A / TEMP + B = B / TEMP + C = C / TEMP + IF( C.EQ.ZERO ) THEN + TAU = B / A + ELSE IF( A.LE.ZERO ) THEN + TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + IF( TAU .LT. LBD .OR. TAU .GT. UBD ) + $ TAU = ( LBD+UBD )/TWO + IF( D(1).EQ.TAU .OR. D(2).EQ.TAU .OR. D(3).EQ.TAU ) THEN + TAU = ZERO + ELSE + TEMP = FINIT + TAU*Z(1)/( D(1)*( D( 1 )-TAU ) ) + + $ TAU*Z(2)/( D(2)*( D( 2 )-TAU ) ) + + $ TAU*Z(3)/( D(3)*( D( 3 )-TAU ) ) + IF( TEMP .LE. ZERO )THEN + LBD = TAU + ELSE + UBD = TAU + END IF + IF( ABS( FINIT ).LE.ABS( TEMP ) ) + $ TAU = ZERO + END IF + END IF +* +* get machine parameters for possible scaling to avoid overflow +* +* modified by Sven: parameters SMALL1, SMINV1, SMALL2, +* SMINV2, EPS are not SAVEd anymore between one call to the +* others but recomputed at each call +* + EPS = DLAMCH( 'Epsilon' ) + BASE = DLAMCH( 'Base' ) + SMALL1 = BASE**( INT( LOG( DLAMCH( 'SafMin' ) ) / LOG( BASE ) / + $ THREE ) ) + SMINV1 = ONE / SMALL1 + SMALL2 = SMALL1*SMALL1 + SMINV2 = SMINV1*SMINV1 +* +* Determine if scaling of inputs necessary to avoid overflow +* when computing 1/TEMP**3 +* + IF( ORGATI ) THEN + TEMP = MIN( ABS( D( 2 )-TAU ), ABS( D( 3 )-TAU ) ) + ELSE + TEMP = MIN( ABS( D( 1 )-TAU ), ABS( D( 2 )-TAU ) ) + END IF + SCALE = .FALSE. + IF( TEMP.LE.SMALL1 ) THEN + SCALE = .TRUE. + IF( TEMP.LE.SMALL2 ) THEN +* +* Scale up by power of radix nearest 1/SAFMIN**(2/3) +* + SCLFAC = SMINV2 + SCLINV = SMALL2 + ELSE +* +* Scale up by power of radix nearest 1/SAFMIN**(1/3) +* + SCLFAC = SMINV1 + SCLINV = SMALL1 + END IF +* +* Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) +* + DO 10 I = 1, 3 + DSCALE( I ) = D( I )*SCLFAC + ZSCALE( I ) = Z( I )*SCLFAC + 10 CONTINUE + TAU = TAU*SCLFAC + LBD = LBD*SCLFAC + UBD = UBD*SCLFAC + ELSE +* +* Copy D and Z to DSCALE and ZSCALE +* + DO 20 I = 1, 3 + DSCALE( I ) = D( I ) + ZSCALE( I ) = Z( I ) + 20 CONTINUE + END IF +* + FC = ZERO + DF = ZERO + DDF = ZERO + DO 30 I = 1, 3 + TEMP = ONE / ( DSCALE( I )-TAU ) + TEMP1 = ZSCALE( I )*TEMP + TEMP2 = TEMP1*TEMP + TEMP3 = TEMP2*TEMP + FC = FC + TEMP1 / DSCALE( I ) + DF = DF + TEMP2 + DDF = DDF + TEMP3 + 30 CONTINUE + F = FINIT + TAU*FC +* + IF( ABS( F ).LE.ZERO ) + $ GO TO 60 + IF( F .LE. ZERO )THEN + LBD = TAU + ELSE + UBD = TAU + END IF +* +* Iteration begins -- Use Gragg-Thornton-Warner cubic convergent +* scheme +* +* It is not hard to see that +* +* 1) Iterations will go up monotonically +* if FINIT < 0; +* +* 2) Iterations will go down monotonically +* if FINIT > 0. +* + ITER = NITER + 1 +* + DO 50 NITER = ITER, MAXIT +* + IF( ORGATI ) THEN + TEMP1 = DSCALE( 2 ) - TAU + TEMP2 = DSCALE( 3 ) - TAU + ELSE + TEMP1 = DSCALE( 1 ) - TAU + TEMP2 = DSCALE( 2 ) - TAU + END IF + A = ( TEMP1+TEMP2 )*F - TEMP1*TEMP2*DF + B = TEMP1*TEMP2*F + C = F - ( TEMP1+TEMP2 )*DF + TEMP1*TEMP2*DDF + TEMP = MAX( ABS( A ), ABS( B ), ABS( C ) ) + A = A / TEMP + B = B / TEMP + C = C / TEMP + IF( C.EQ.ZERO ) THEN + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + IF( F*ETA.GE.ZERO ) THEN + ETA = -F / DF + END IF +* + TAU = TAU + ETA + IF( TAU .LT. LBD .OR. TAU .GT. UBD ) + $ TAU = ( LBD + UBD )/TWO +* + FC = ZERO + ERRETM = ZERO + DF = ZERO + DDF = ZERO + DO 40 I = 1, 3 + IF ( ( DSCALE( I )-TAU ).NE.ZERO ) THEN + TEMP = ONE / ( DSCALE( I )-TAU ) + TEMP1 = ZSCALE( I )*TEMP + TEMP2 = TEMP1*TEMP + TEMP3 = TEMP2*TEMP + TEMP4 = TEMP1 / DSCALE( I ) + FC = FC + TEMP4 + ERRETM = ERRETM + ABS( TEMP4 ) + DF = DF + TEMP2 + DDF = DDF + TEMP3 + ELSE + GO TO 60 + END IF + 40 CONTINUE + F = FINIT + TAU*FC + ERRETM = EIGHT*( ABS( FINIT )+ABS( TAU )*ERRETM ) + + $ ABS( TAU )*DF + IF( ABS( F ).LE.EPS*ERRETM ) + $ GO TO 60 + IF( F .LE. ZERO )THEN + LBD = TAU + ELSE + UBD = TAU + END IF + 50 CONTINUE + INFO = 1 + 60 CONTINUE +* +* Undo scaling +* + IF( SCALE ) + $ TAU = TAU*SCLINV + RETURN +* +* End of DLAED6 +* + END diff --git a/lib/linalg/dlaed7.f b/lib/linalg/dlaed7.f new file mode 100644 index 0000000000..972c1bc590 --- /dev/null +++ b/lib/linalg/dlaed7.f @@ -0,0 +1,407 @@ +*> \brief \b DLAED7 used by sstedc. Computes the updated eigensystem of a diagonal matrix after modification by a rank-one symmetric matrix. Used when the original matrix is dense. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAED7 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAED7( ICOMPQ, N, QSIZ, TLVLS, CURLVL, CURPBM, D, Q, +* LDQ, INDXQ, RHO, CUTPNT, QSTORE, QPTR, PRMPTR, +* PERM, GIVPTR, GIVCOL, GIVNUM, WORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER CURLVL, CURPBM, CUTPNT, ICOMPQ, INFO, LDQ, N, +* $ QSIZ, TLVLS +* DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( 2, * ), GIVPTR( * ), INDXQ( * ), +* $ IWORK( * ), PERM( * ), PRMPTR( * ), QPTR( * ) +* DOUBLE PRECISION D( * ), GIVNUM( 2, * ), Q( LDQ, * ), +* $ QSTORE( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAED7 computes the updated eigensystem of a diagonal +*> matrix after modification by a rank-one symmetric matrix. This +*> routine is used only for the eigenproblem which requires all +*> eigenvalues and optionally eigenvectors of a dense symmetric matrix +*> that has been reduced to tridiagonal form. DLAED1 handles +*> the case in which all eigenvalues and eigenvectors of a symmetric +*> tridiagonal matrix are desired. +*> +*> T = Q(in) ( D(in) + RHO * Z*Z**T ) Q**T(in) = Q(out) * D(out) * Q**T(out) +*> +*> where Z = Q**Tu, u is a vector of length N with ones in the +*> CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. +*> +*> The eigenvectors of the original matrix are stored in Q, and the +*> eigenvalues are in D. The algorithm consists of three stages: +*> +*> The first stage consists of deflating the size of the problem +*> when there are multiple eigenvalues or if there is a zero in +*> the Z vector. For each such occurence the dimension of the +*> secular equation problem is reduced by one. This stage is +*> performed by the routine DLAED8. +*> +*> The second stage consists of calculating the updated +*> eigenvalues. This is done by finding the roots of the secular +*> equation via the routine DLAED4 (as called by DLAED9). +*> This routine also calculates the eigenvectors of the current +*> problem. +*> +*> The final stage consists of computing the updated eigenvectors +*> directly using the updated eigenvalues. The eigenvectors for +*> the current problem are multiplied with the eigenvectors from +*> the overall problem. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ICOMPQ +*> \verbatim +*> ICOMPQ is INTEGER +*> = 0: Compute eigenvalues only. +*> = 1: Compute eigenvectors of original dense symmetric matrix +*> also. On entry, Q contains the orthogonal matrix used +*> to reduce the original matrix to tridiagonal form. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] QSIZ +*> \verbatim +*> QSIZ is INTEGER +*> The dimension of the orthogonal matrix used to reduce +*> the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. +*> \endverbatim +*> +*> \param[in] TLVLS +*> \verbatim +*> TLVLS is INTEGER +*> The total number of merging levels in the overall divide and +*> conquer tree. +*> \endverbatim +*> +*> \param[in] CURLVL +*> \verbatim +*> CURLVL is INTEGER +*> The current level in the overall merge routine, +*> 0 <= CURLVL <= TLVLS. +*> \endverbatim +*> +*> \param[in] CURPBM +*> \verbatim +*> CURPBM is INTEGER +*> The current problem in the current level in the overall +*> merge routine (counting from upper left to lower right). +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the eigenvalues of the rank-1-perturbed matrix. +*> On exit, the eigenvalues of the repaired matrix. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ, N) +*> On entry, the eigenvectors of the rank-1-perturbed matrix. +*> On exit, the eigenvectors of the repaired tridiagonal matrix. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[out] INDXQ +*> \verbatim +*> INDXQ is INTEGER array, dimension (N) +*> The permutation which will reintegrate the subproblem just +*> solved back into sorted order, i.e., D( INDXQ( I = 1, N ) ) +*> will be in ascending order. +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is DOUBLE PRECISION +*> The subdiagonal element used to create the rank-1 +*> modification. +*> \endverbatim +*> +*> \param[in] CUTPNT +*> \verbatim +*> CUTPNT is INTEGER +*> Contains the location of the last eigenvalue in the leading +*> sub-matrix. min(1,N) <= CUTPNT <= N. +*> \endverbatim +*> +*> \param[in,out] QSTORE +*> \verbatim +*> QSTORE is DOUBLE PRECISION array, dimension (N**2+1) +*> Stores eigenvectors of submatrices encountered during +*> divide and conquer, packed together. QPTR points to +*> beginning of the submatrices. +*> \endverbatim +*> +*> \param[in,out] QPTR +*> \verbatim +*> QPTR is INTEGER array, dimension (N+2) +*> List of indices pointing to beginning of submatrices stored +*> in QSTORE. The submatrices are numbered starting at the +*> bottom left of the divide and conquer tree, from left to +*> right and bottom to top. +*> \endverbatim +*> +*> \param[in] PRMPTR +*> \verbatim +*> PRMPTR is INTEGER array, dimension (N lg N) +*> Contains a list of pointers which indicate where in PERM a +*> level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) +*> indicates the size of the permutation and also the size of +*> the full, non-deflated problem. +*> \endverbatim +*> +*> \param[in] PERM +*> \verbatim +*> PERM is INTEGER array, dimension (N lg N) +*> Contains the permutations (from deflation and sorting) to be +*> applied to each eigenblock. +*> \endverbatim +*> +*> \param[in] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER array, dimension (N lg N) +*> Contains a list of pointers which indicate where in GIVCOL a +*> level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) +*> indicates the number of Givens rotations. +*> \endverbatim +*> +*> \param[in] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, dimension (2, N lg N) +*> Each pair of numbers indicates a pair of columns to take place +*> in a Givens rotation. +*> \endverbatim +*> +*> \param[in] GIVNUM +*> \verbatim +*> GIVNUM is DOUBLE PRECISION array, dimension (2, N lg N) +*> Each number indicates the S value to be used in the +*> corresponding Givens rotation. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N+2*QSIZ*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, an eigenvalue did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA +* +* ===================================================================== + SUBROUTINE DLAED7( ICOMPQ, N, QSIZ, TLVLS, CURLVL, CURPBM, D, Q, + $ LDQ, INDXQ, RHO, CUTPNT, QSTORE, QPTR, PRMPTR, + $ PERM, GIVPTR, GIVCOL, GIVNUM, WORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER CURLVL, CURPBM, CUTPNT, ICOMPQ, INFO, LDQ, N, + $ QSIZ, TLVLS + DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. + INTEGER GIVCOL( 2, * ), GIVPTR( * ), INDXQ( * ), + $ IWORK( * ), PERM( * ), PRMPTR( * ), QPTR( * ) + DOUBLE PRECISION D( * ), GIVNUM( 2, * ), Q( LDQ, * ), + $ QSTORE( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + INTEGER COLTYP, CURR, I, IDLMDA, INDX, INDXC, INDXP, + $ IQ2, IS, IW, IZ, K, LDQ2, N1, N2, PTR +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DLAED8, DLAED9, DLAEDA, DLAMRG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ICOMPQ.EQ.1 .AND. QSIZ.LT.N ) THEN + INFO = -4 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( MIN( 1, N ).GT.CUTPNT .OR. N.LT.CUTPNT ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAED7', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* The following values are for bookkeeping purposes only. They are +* integer pointers which indicate the portion of the workspace +* used by a particular array in DLAED8 and DLAED9. +* + IF( ICOMPQ.EQ.1 ) THEN + LDQ2 = QSIZ + ELSE + LDQ2 = N + END IF +* + IZ = 1 + IDLMDA = IZ + N + IW = IDLMDA + N + IQ2 = IW + N + IS = IQ2 + N*LDQ2 +* + INDX = 1 + INDXC = INDX + N + COLTYP = INDXC + N + INDXP = COLTYP + N +* +* Form the z-vector which consists of the last row of Q_1 and the +* first row of Q_2. +* + PTR = 1 + 2**TLVLS + DO 10 I = 1, CURLVL - 1 + PTR = PTR + 2**( TLVLS-I ) + 10 CONTINUE + CURR = PTR + CURPBM + CALL DLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR, + $ GIVCOL, GIVNUM, QSTORE, QPTR, WORK( IZ ), + $ WORK( IZ+N ), INFO ) +* +* When solving the final problem, we no longer need the stored data, +* so we will overwrite the data from this level onto the previously +* used storage space. +* + IF( CURLVL.EQ.TLVLS ) THEN + QPTR( CURR ) = 1 + PRMPTR( CURR ) = 1 + GIVPTR( CURR ) = 1 + END IF +* +* Sort and Deflate eigenvalues. +* + CALL DLAED8( ICOMPQ, K, N, QSIZ, D, Q, LDQ, INDXQ, RHO, CUTPNT, + $ WORK( IZ ), WORK( IDLMDA ), WORK( IQ2 ), LDQ2, + $ WORK( IW ), PERM( PRMPTR( CURR ) ), GIVPTR( CURR+1 ), + $ GIVCOL( 1, GIVPTR( CURR ) ), + $ GIVNUM( 1, GIVPTR( CURR ) ), IWORK( INDXP ), + $ IWORK( INDX ), INFO ) + PRMPTR( CURR+1 ) = PRMPTR( CURR ) + N + GIVPTR( CURR+1 ) = GIVPTR( CURR+1 ) + GIVPTR( CURR ) +* +* Solve Secular Equation. +* + IF( K.NE.0 ) THEN + CALL DLAED9( K, 1, K, N, D, WORK( IS ), K, RHO, WORK( IDLMDA ), + $ WORK( IW ), QSTORE( QPTR( CURR ) ), K, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + IF( ICOMPQ.EQ.1 ) THEN + CALL DGEMM( 'N', 'N', QSIZ, K, K, ONE, WORK( IQ2 ), LDQ2, + $ QSTORE( QPTR( CURR ) ), K, ZERO, Q, LDQ ) + END IF + QPTR( CURR+1 ) = QPTR( CURR ) + K**2 +* +* Prepare the INDXQ sorting permutation. +* + N1 = K + N2 = N - K + CALL DLAMRG( N1, N2, D, 1, -1, INDXQ ) + ELSE + QPTR( CURR+1 ) = QPTR( CURR ) + DO 20 I = 1, N + INDXQ( I ) = I + 20 CONTINUE + END IF +* + 30 CONTINUE + RETURN +* +* End of DLAED7 +* + END diff --git a/lib/linalg/dlaed8.f b/lib/linalg/dlaed8.f new file mode 100644 index 0000000000..42b4ea1577 --- /dev/null +++ b/lib/linalg/dlaed8.f @@ -0,0 +1,524 @@ +*> \brief \b DLAED8 used by sstedc. Merges eigenvalues and deflates secular equation. Used when the original matrix is dense. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAED8 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAED8( ICOMPQ, K, N, QSIZ, D, Q, LDQ, INDXQ, RHO, +* CUTPNT, Z, DLAMDA, Q2, LDQ2, W, PERM, GIVPTR, +* GIVCOL, GIVNUM, INDXP, INDX, INFO ) +* +* .. Scalar Arguments .. +* INTEGER CUTPNT, GIVPTR, ICOMPQ, INFO, K, LDQ, LDQ2, N, +* $ QSIZ +* DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( 2, * ), INDX( * ), INDXP( * ), +* $ INDXQ( * ), PERM( * ) +* DOUBLE PRECISION D( * ), DLAMDA( * ), GIVNUM( 2, * ), +* $ Q( LDQ, * ), Q2( LDQ2, * ), W( * ), Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAED8 merges the two sets of eigenvalues together into a single +*> sorted set. Then it tries to deflate the size of the problem. +*> There are two ways in which deflation can occur: when two or more +*> eigenvalues are close together or if there is a tiny element in the +*> Z vector. For each such occurrence the order of the related secular +*> equation problem is reduced by one. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ICOMPQ +*> \verbatim +*> ICOMPQ is INTEGER +*> = 0: Compute eigenvalues only. +*> = 1: Compute eigenvectors of original dense symmetric matrix +*> also. On entry, Q contains the orthogonal matrix used +*> to reduce the original matrix to tridiagonal form. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> The number of non-deflated eigenvalues, and the order of the +*> related secular equation. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] QSIZ +*> \verbatim +*> QSIZ is INTEGER +*> The dimension of the orthogonal matrix used to reduce +*> the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the eigenvalues of the two submatrices to be +*> combined. On exit, the trailing (N-K) updated eigenvalues +*> (those which were deflated) sorted into increasing order. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ,N) +*> If ICOMPQ = 0, Q is not referenced. Otherwise, +*> on entry, Q contains the eigenvectors of the partially solved +*> system which has been previously updated in matrix +*> multiplies with other partially solved eigensystems. +*> On exit, Q contains the trailing (N-K) updated eigenvectors +*> (those which were deflated) in its last N-K columns. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] INDXQ +*> \verbatim +*> INDXQ is INTEGER array, dimension (N) +*> The permutation which separately sorts the two sub-problems +*> in D into ascending order. Note that elements in the second +*> half of this permutation must first have CUTPNT added to +*> their values in order to be accurate. +*> \endverbatim +*> +*> \param[in,out] RHO +*> \verbatim +*> RHO is DOUBLE PRECISION +*> On entry, the off-diagonal element associated with the rank-1 +*> cut which originally split the two submatrices which are now +*> being recombined. +*> On exit, RHO has been modified to the value required by +*> DLAED3. +*> \endverbatim +*> +*> \param[in] CUTPNT +*> \verbatim +*> CUTPNT is INTEGER +*> The location of the last eigenvalue in the leading +*> sub-matrix. min(1,N) <= CUTPNT <= N. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (N) +*> On entry, Z contains the updating vector (the last row of +*> the first sub-eigenvector matrix and the first row of the +*> second sub-eigenvector matrix). +*> On exit, the contents of Z are destroyed by the updating +*> process. +*> \endverbatim +*> +*> \param[out] DLAMDA +*> \verbatim +*> DLAMDA is DOUBLE PRECISION array, dimension (N) +*> A copy of the first K eigenvalues which will be used by +*> DLAED3 to form the secular equation. +*> \endverbatim +*> +*> \param[out] Q2 +*> \verbatim +*> Q2 is DOUBLE PRECISION array, dimension (LDQ2,N) +*> If ICOMPQ = 0, Q2 is not referenced. Otherwise, +*> a copy of the first K eigenvectors which will be used by +*> DLAED7 in a matrix multiply (DGEMM) to update the new +*> eigenvectors. +*> \endverbatim +*> +*> \param[in] LDQ2 +*> \verbatim +*> LDQ2 is INTEGER +*> The leading dimension of the array Q2. LDQ2 >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first k values of the final deflation-altered z-vector and +*> will be passed to DLAED3. +*> \endverbatim +*> +*> \param[out] PERM +*> \verbatim +*> PERM is INTEGER array, dimension (N) +*> The permutations (from deflation and sorting) to be applied +*> to each eigenblock. +*> \endverbatim +*> +*> \param[out] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER +*> The number of Givens rotations which took place in this +*> subproblem. +*> \endverbatim +*> +*> \param[out] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, dimension (2, N) +*> Each pair of numbers indicates a pair of columns to take place +*> in a Givens rotation. +*> \endverbatim +*> +*> \param[out] GIVNUM +*> \verbatim +*> GIVNUM is DOUBLE PRECISION array, dimension (2, N) +*> Each number indicates the S value to be used in the +*> corresponding Givens rotation. +*> \endverbatim +*> +*> \param[out] INDXP +*> \verbatim +*> INDXP is INTEGER array, dimension (N) +*> The permutation used to place deflated values of D at the end +*> of the array. INDXP(1:K) points to the nondeflated D-values +*> and INDXP(K+1:N) points to the deflated eigenvalues. +*> \endverbatim +*> +*> \param[out] INDX +*> \verbatim +*> INDX is INTEGER array, dimension (N) +*> The permutation used to sort the contents of D into ascending +*> order. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA +* +* ===================================================================== + SUBROUTINE DLAED8( ICOMPQ, K, N, QSIZ, D, Q, LDQ, INDXQ, RHO, + $ CUTPNT, Z, DLAMDA, Q2, LDQ2, W, PERM, GIVPTR, + $ GIVCOL, GIVNUM, INDXP, INDX, INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER CUTPNT, GIVPTR, ICOMPQ, INFO, K, LDQ, LDQ2, N, + $ QSIZ + DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. + INTEGER GIVCOL( 2, * ), INDX( * ), INDXP( * ), + $ INDXQ( * ), PERM( * ) + DOUBLE PRECISION D( * ), DLAMDA( * ), GIVNUM( 2, * ), + $ Q( LDQ, * ), Q2( LDQ2, * ), W( * ), Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION MONE, ZERO, ONE, TWO, EIGHT + PARAMETER ( MONE = -1.0D0, ZERO = 0.0D0, ONE = 1.0D0, + $ TWO = 2.0D0, EIGHT = 8.0D0 ) +* .. +* .. Local Scalars .. +* + INTEGER I, IMAX, J, JLAM, JMAX, JP, K2, N1, N1P1, N2 + DOUBLE PRECISION C, EPS, S, T, TAU, TOL +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DLAPY2 + EXTERNAL IDAMAX, DLAMCH, DLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLACPY, DLAMRG, DROT, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ICOMPQ.EQ.1 .AND. QSIZ.LT.N ) THEN + INFO = -4 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( CUTPNT.LT.MIN( 1, N ) .OR. CUTPNT.GT.N ) THEN + INFO = -10 + ELSE IF( LDQ2.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAED8', -INFO ) + RETURN + END IF +* +* Need to initialize GIVPTR to O here in case of quick exit +* to prevent an unspecified code behavior (usually sigfault) +* when IWORK array on entry to *stedc is not zeroed +* (or at least some IWORK entries which used in *laed7 for GIVPTR). +* + GIVPTR = 0 +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + N1 = CUTPNT + N2 = N - N1 + N1P1 = N1 + 1 +* + IF( RHO.LT.ZERO ) THEN + CALL DSCAL( N2, MONE, Z( N1P1 ), 1 ) + END IF +* +* Normalize z so that norm(z) = 1 +* + T = ONE / SQRT( TWO ) + DO 10 J = 1, N + INDX( J ) = J + 10 CONTINUE + CALL DSCAL( N, T, Z, 1 ) + RHO = ABS( TWO*RHO ) +* +* Sort the eigenvalues into increasing order +* + DO 20 I = CUTPNT + 1, N + INDXQ( I ) = INDXQ( I ) + CUTPNT + 20 CONTINUE + DO 30 I = 1, N + DLAMDA( I ) = D( INDXQ( I ) ) + W( I ) = Z( INDXQ( I ) ) + 30 CONTINUE + I = 1 + J = CUTPNT + 1 + CALL DLAMRG( N1, N2, DLAMDA, 1, 1, INDX ) + DO 40 I = 1, N + D( I ) = DLAMDA( INDX( I ) ) + Z( I ) = W( INDX( I ) ) + 40 CONTINUE +* +* Calculate the allowable deflation tolerence +* + IMAX = IDAMAX( N, Z, 1 ) + JMAX = IDAMAX( N, D, 1 ) + EPS = DLAMCH( 'Epsilon' ) + TOL = EIGHT*EPS*ABS( D( JMAX ) ) +* +* If the rank-1 modifier is small enough, no more needs to be done +* except to reorganize Q so that its columns correspond with the +* elements in D. +* + IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN + K = 0 + IF( ICOMPQ.EQ.0 ) THEN + DO 50 J = 1, N + PERM( J ) = INDXQ( INDX( J ) ) + 50 CONTINUE + ELSE + DO 60 J = 1, N + PERM( J ) = INDXQ( INDX( J ) ) + CALL DCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 ) + 60 CONTINUE + CALL DLACPY( 'A', QSIZ, N, Q2( 1, 1 ), LDQ2, Q( 1, 1 ), + $ LDQ ) + END IF + RETURN + END IF +* +* If there are multiple eigenvalues then the problem deflates. Here +* the number of equal eigenvalues are found. As each equal +* eigenvalue is found, an elementary reflector is computed to rotate +* the corresponding eigensubspace so that the corresponding +* components of Z are zero in this new basis. +* + K = 0 + K2 = N + 1 + DO 70 J = 1, N + IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + INDXP( K2 ) = J + IF( J.EQ.N ) + $ GO TO 110 + ELSE + JLAM = J + GO TO 80 + END IF + 70 CONTINUE + 80 CONTINUE + J = J + 1 + IF( J.GT.N ) + $ GO TO 100 + IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + INDXP( K2 ) = J + ELSE +* +* Check if eigenvalues are close enough to allow deflation. +* + S = Z( JLAM ) + C = Z( J ) +* +* Find sqrt(a**2+b**2) without overflow or +* destructive underflow. +* + TAU = DLAPY2( C, S ) + T = D( J ) - D( JLAM ) + C = C / TAU + S = -S / TAU + IF( ABS( T*C*S ).LE.TOL ) THEN +* +* Deflation is possible. +* + Z( J ) = TAU + Z( JLAM ) = ZERO +* +* Record the appropriate Givens rotation +* + GIVPTR = GIVPTR + 1 + GIVCOL( 1, GIVPTR ) = INDXQ( INDX( JLAM ) ) + GIVCOL( 2, GIVPTR ) = INDXQ( INDX( J ) ) + GIVNUM( 1, GIVPTR ) = C + GIVNUM( 2, GIVPTR ) = S + IF( ICOMPQ.EQ.1 ) THEN + CALL DROT( QSIZ, Q( 1, INDXQ( INDX( JLAM ) ) ), 1, + $ Q( 1, INDXQ( INDX( J ) ) ), 1, C, S ) + END IF + T = D( JLAM )*C*C + D( J )*S*S + D( J ) = D( JLAM )*S*S + D( J )*C*C + D( JLAM ) = T + K2 = K2 - 1 + I = 1 + 90 CONTINUE + IF( K2+I.LE.N ) THEN + IF( D( JLAM ).LT.D( INDXP( K2+I ) ) ) THEN + INDXP( K2+I-1 ) = INDXP( K2+I ) + INDXP( K2+I ) = JLAM + I = I + 1 + GO TO 90 + ELSE + INDXP( K2+I-1 ) = JLAM + END IF + ELSE + INDXP( K2+I-1 ) = JLAM + END IF + JLAM = J + ELSE + K = K + 1 + W( K ) = Z( JLAM ) + DLAMDA( K ) = D( JLAM ) + INDXP( K ) = JLAM + JLAM = J + END IF + END IF + GO TO 80 + 100 CONTINUE +* +* Record the last eigenvalue. +* + K = K + 1 + W( K ) = Z( JLAM ) + DLAMDA( K ) = D( JLAM ) + INDXP( K ) = JLAM +* + 110 CONTINUE +* +* Sort the eigenvalues and corresponding eigenvectors into DLAMDA +* and Q2 respectively. The eigenvalues/vectors which were not +* deflated go into the first K slots of DLAMDA and Q2 respectively, +* while those which were deflated go into the last N - K slots. +* + IF( ICOMPQ.EQ.0 ) THEN + DO 120 J = 1, N + JP = INDXP( J ) + DLAMDA( J ) = D( JP ) + PERM( J ) = INDXQ( INDX( JP ) ) + 120 CONTINUE + ELSE + DO 130 J = 1, N + JP = INDXP( J ) + DLAMDA( J ) = D( JP ) + PERM( J ) = INDXQ( INDX( JP ) ) + CALL DCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 ) + 130 CONTINUE + END IF +* +* The deflated eigenvalues and their corresponding vectors go back +* into the last N - K slots of D and Q respectively. +* + IF( K.LT.N ) THEN + IF( ICOMPQ.EQ.0 ) THEN + CALL DCOPY( N-K, DLAMDA( K+1 ), 1, D( K+1 ), 1 ) + ELSE + CALL DCOPY( N-K, DLAMDA( K+1 ), 1, D( K+1 ), 1 ) + CALL DLACPY( 'A', QSIZ, N-K, Q2( 1, K+1 ), LDQ2, + $ Q( 1, K+1 ), LDQ ) + END IF + END IF +* + RETURN +* +* End of DLAED8 +* + END diff --git a/lib/linalg/dlaed9.f b/lib/linalg/dlaed9.f new file mode 100644 index 0000000000..8aa0687573 --- /dev/null +++ b/lib/linalg/dlaed9.f @@ -0,0 +1,294 @@ +*> \brief \b DLAED9 used by sstedc. Finds the roots of the secular equation and updates the eigenvectors. Used when the original matrix is dense. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAED9 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAED9( K, KSTART, KSTOP, N, D, Q, LDQ, RHO, DLAMDA, W, +* S, LDS, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, KSTART, KSTOP, LDQ, LDS, N +* DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), S( LDS, * ), +* $ W( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAED9 finds the roots of the secular equation, as defined by the +*> values in D, Z, and RHO, between KSTART and KSTOP. It makes the +*> appropriate calls to DLAED4 and then stores the new matrix of +*> eigenvectors for use in calculating the next level of Z vectors. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of terms in the rational function to be solved by +*> DLAED4. K >= 0. +*> \endverbatim +*> +*> \param[in] KSTART +*> \verbatim +*> KSTART is INTEGER +*> \endverbatim +*> +*> \param[in] KSTOP +*> \verbatim +*> KSTOP is INTEGER +*> The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP +*> are to be computed. 1 <= KSTART <= KSTOP <= K. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows and columns in the Q matrix. +*> N >= K (delation may result in N > K). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> D(I) contains the updated eigenvalues +*> for KSTART <= I <= KSTOP. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ,N) +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max( 1, N ). +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is DOUBLE PRECISION +*> The value of the parameter in the rank one update equation. +*> RHO >= 0 required. +*> \endverbatim +*> +*> \param[in] DLAMDA +*> \verbatim +*> DLAMDA is DOUBLE PRECISION array, dimension (K) +*> The first K elements of this array contain the old roots +*> of the deflated updating problem. These are the poles +*> of the secular equation. +*> \endverbatim +*> +*> \param[in] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (K) +*> The first K elements of this array contain the components +*> of the deflation-adjusted updating vector. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (LDS, K) +*> Will contain the eigenvectors of the repaired matrix which +*> will be stored for subsequent Z vector calculation and +*> multiplied by the previously accumulated eigenvectors +*> to update the system. +*> \endverbatim +*> +*> \param[in] LDS +*> \verbatim +*> LDS is INTEGER +*> The leading dimension of S. LDS >= max( 1, K ). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, an eigenvalue did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA +* +* ===================================================================== + SUBROUTINE DLAED9( K, KSTART, KSTOP, N, D, Q, LDQ, RHO, DLAMDA, W, + $ S, LDS, INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, K, KSTART, KSTOP, LDQ, LDS, N + DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), S( LDS, * ), + $ W( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION TEMP +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMC3, DNRM2 + EXTERNAL DLAMC3, DNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLAED4, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( K.LT.0 ) THEN + INFO = -1 + ELSE IF( KSTART.LT.1 .OR. KSTART.GT.MAX( 1, K ) ) THEN + INFO = -2 + ELSE IF( MAX( 1, KSTOP ).LT.KSTART .OR. KSTOP.GT.MAX( 1, K ) ) + $ THEN + INFO = -3 + ELSE IF( N.LT.K ) THEN + INFO = -4 + ELSE IF( LDQ.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDS.LT.MAX( 1, K ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAED9', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) + $ RETURN +* +* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can +* be computed with high relative accuracy (barring over/underflow). +* This is a problem on machines without a guard digit in +* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). +* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), +* which on any of these machines zeros out the bottommost +* bit of DLAMDA(I) if it is 1; this makes the subsequent +* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation +* occurs. On binary machines with a guard digit (almost all +* machines) it does not change DLAMDA(I) at all. On hexadecimal +* and decimal machines with a guard digit, it slightly +* changes the bottommost bits of DLAMDA(I). It does not account +* for hexadecimal or decimal machines without guard digits +* (we know of none). We use a subroutine call to compute +* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating +* this code. +* + DO 10 I = 1, N + DLAMDA( I ) = DLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I ) + 10 CONTINUE +* + DO 20 J = KSTART, KSTOP + CALL DLAED4( K, J, DLAMDA, W, Q( 1, J ), RHO, D( J ), INFO ) +* +* If the zero finder fails, the computation is terminated. +* + IF( INFO.NE.0 ) + $ GO TO 120 + 20 CONTINUE +* + IF( K.EQ.1 .OR. K.EQ.2 ) THEN + DO 40 I = 1, K + DO 30 J = 1, K + S( J, I ) = Q( J, I ) + 30 CONTINUE + 40 CONTINUE + GO TO 120 + END IF +* +* Compute updated W. +* + CALL DCOPY( K, W, 1, S, 1 ) +* +* Initialize W(I) = Q(I,I) +* + CALL DCOPY( K, Q, LDQ+1, W, 1 ) + DO 70 J = 1, K + DO 50 I = 1, J - 1 + W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) ) + 50 CONTINUE + DO 60 I = J + 1, K + W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) ) + 60 CONTINUE + 70 CONTINUE + DO 80 I = 1, K + W( I ) = SIGN( SQRT( -W( I ) ), S( I, 1 ) ) + 80 CONTINUE +* +* Compute eigenvectors of the modified rank-1 modification. +* + DO 110 J = 1, K + DO 90 I = 1, K + Q( I, J ) = W( I ) / Q( I, J ) + 90 CONTINUE + TEMP = DNRM2( K, Q( 1, J ), 1 ) + DO 100 I = 1, K + S( I, J ) = Q( I, J ) / TEMP + 100 CONTINUE + 110 CONTINUE +* + 120 CONTINUE + RETURN +* +* End of DLAED9 +* + END diff --git a/lib/linalg/dlaeda.f b/lib/linalg/dlaeda.f new file mode 100644 index 0000000000..749a7c365a --- /dev/null +++ b/lib/linalg/dlaeda.f @@ -0,0 +1,308 @@ +*> \brief \b DLAEDA used by sstedc. Computes the Z vector determining the rank-one modification of the diagonal matrix. Used when the original matrix is dense. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAEDA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR, +* GIVCOL, GIVNUM, Q, QPTR, Z, ZTEMP, INFO ) +* +* .. Scalar Arguments .. +* INTEGER CURLVL, CURPBM, INFO, N, TLVLS +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( 2, * ), GIVPTR( * ), PERM( * ), +* $ PRMPTR( * ), QPTR( * ) +* DOUBLE PRECISION GIVNUM( 2, * ), Q( * ), Z( * ), ZTEMP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAEDA computes the Z vector corresponding to the merge step in the +*> CURLVLth step of the merge process with TLVLS steps for the CURPBMth +*> problem. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] TLVLS +*> \verbatim +*> TLVLS is INTEGER +*> The total number of merging levels in the overall divide and +*> conquer tree. +*> \endverbatim +*> +*> \param[in] CURLVL +*> \verbatim +*> CURLVL is INTEGER +*> The current level in the overall merge routine, +*> 0 <= curlvl <= tlvls. +*> \endverbatim +*> +*> \param[in] CURPBM +*> \verbatim +*> CURPBM is INTEGER +*> The current problem in the current level in the overall +*> merge routine (counting from upper left to lower right). +*> \endverbatim +*> +*> \param[in] PRMPTR +*> \verbatim +*> PRMPTR is INTEGER array, dimension (N lg N) +*> Contains a list of pointers which indicate where in PERM a +*> level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) +*> indicates the size of the permutation and incidentally the +*> size of the full, non-deflated problem. +*> \endverbatim +*> +*> \param[in] PERM +*> \verbatim +*> PERM is INTEGER array, dimension (N lg N) +*> Contains the permutations (from deflation and sorting) to be +*> applied to each eigenblock. +*> \endverbatim +*> +*> \param[in] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER array, dimension (N lg N) +*> Contains a list of pointers which indicate where in GIVCOL a +*> level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) +*> indicates the number of Givens rotations. +*> \endverbatim +*> +*> \param[in] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, dimension (2, N lg N) +*> Each pair of numbers indicates a pair of columns to take place +*> in a Givens rotation. +*> \endverbatim +*> +*> \param[in] GIVNUM +*> \verbatim +*> GIVNUM is DOUBLE PRECISION array, dimension (2, N lg N) +*> Each number indicates the S value to be used in the +*> corresponding Givens rotation. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (N**2) +*> Contains the square eigenblocks from previous levels, the +*> starting positions for blocks are given by QPTR. +*> \endverbatim +*> +*> \param[in] QPTR +*> \verbatim +*> QPTR is INTEGER array, dimension (N+2) +*> Contains a list of pointers which indicate where in Q an +*> eigenblock is stored. SQRT( QPTR(i+1) - QPTR(i) ) indicates +*> the size of the block. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (N) +*> On output this vector contains the updating vector (the last +*> row of the first sub-eigenvector matrix and the first row of +*> the second sub-eigenvector matrix). +*> \endverbatim +*> +*> \param[out] ZTEMP +*> \verbatim +*> ZTEMP is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA +* +* ===================================================================== + SUBROUTINE DLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR, + $ GIVCOL, GIVNUM, Q, QPTR, Z, ZTEMP, INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER CURLVL, CURPBM, INFO, N, TLVLS +* .. +* .. Array Arguments .. + INTEGER GIVCOL( 2, * ), GIVPTR( * ), PERM( * ), + $ PRMPTR( * ), QPTR( * ) + DOUBLE PRECISION GIVNUM( 2, * ), Q( * ), Z( * ), ZTEMP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + INTEGER BSIZ1, BSIZ2, CURR, I, K, MID, PSIZ1, PSIZ2, + $ PTR, ZPTR1 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMV, DROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, INT, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -1 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAEDA', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine location of first number in second half. +* + MID = N / 2 + 1 +* +* Gather last/first rows of appropriate eigenblocks into center of Z +* + PTR = 1 +* +* Determine location of lowest level subproblem in the full storage +* scheme +* + CURR = PTR + CURPBM*2**CURLVL + 2**( CURLVL-1 ) - 1 +* +* Determine size of these matrices. We add HALF to the value of +* the SQRT in case the machine underestimates one of these square +* roots. +* + BSIZ1 = INT( HALF+SQRT( DBLE( QPTR( CURR+1 )-QPTR( CURR ) ) ) ) + BSIZ2 = INT( HALF+SQRT( DBLE( QPTR( CURR+2 )-QPTR( CURR+1 ) ) ) ) + DO 10 K = 1, MID - BSIZ1 - 1 + Z( K ) = ZERO + 10 CONTINUE + CALL DCOPY( BSIZ1, Q( QPTR( CURR )+BSIZ1-1 ), BSIZ1, + $ Z( MID-BSIZ1 ), 1 ) + CALL DCOPY( BSIZ2, Q( QPTR( CURR+1 ) ), BSIZ2, Z( MID ), 1 ) + DO 20 K = MID + BSIZ2, N + Z( K ) = ZERO + 20 CONTINUE +* +* Loop through remaining levels 1 -> CURLVL applying the Givens +* rotations and permutation and then multiplying the center matrices +* against the current Z. +* + PTR = 2**TLVLS + 1 + DO 70 K = 1, CURLVL - 1 + CURR = PTR + CURPBM*2**( CURLVL-K ) + 2**( CURLVL-K-1 ) - 1 + PSIZ1 = PRMPTR( CURR+1 ) - PRMPTR( CURR ) + PSIZ2 = PRMPTR( CURR+2 ) - PRMPTR( CURR+1 ) + ZPTR1 = MID - PSIZ1 +* +* Apply Givens at CURR and CURR+1 +* + DO 30 I = GIVPTR( CURR ), GIVPTR( CURR+1 ) - 1 + CALL DROT( 1, Z( ZPTR1+GIVCOL( 1, I )-1 ), 1, + $ Z( ZPTR1+GIVCOL( 2, I )-1 ), 1, GIVNUM( 1, I ), + $ GIVNUM( 2, I ) ) + 30 CONTINUE + DO 40 I = GIVPTR( CURR+1 ), GIVPTR( CURR+2 ) - 1 + CALL DROT( 1, Z( MID-1+GIVCOL( 1, I ) ), 1, + $ Z( MID-1+GIVCOL( 2, I ) ), 1, GIVNUM( 1, I ), + $ GIVNUM( 2, I ) ) + 40 CONTINUE + PSIZ1 = PRMPTR( CURR+1 ) - PRMPTR( CURR ) + PSIZ2 = PRMPTR( CURR+2 ) - PRMPTR( CURR+1 ) + DO 50 I = 0, PSIZ1 - 1 + ZTEMP( I+1 ) = Z( ZPTR1+PERM( PRMPTR( CURR )+I )-1 ) + 50 CONTINUE + DO 60 I = 0, PSIZ2 - 1 + ZTEMP( PSIZ1+I+1 ) = Z( MID+PERM( PRMPTR( CURR+1 )+I )-1 ) + 60 CONTINUE +* +* Multiply Blocks at CURR and CURR+1 +* +* Determine size of these matrices. We add HALF to the value of +* the SQRT in case the machine underestimates one of these +* square roots. +* + BSIZ1 = INT( HALF+SQRT( DBLE( QPTR( CURR+1 )-QPTR( CURR ) ) ) ) + BSIZ2 = INT( HALF+SQRT( DBLE( QPTR( CURR+2 )-QPTR( CURR+ + $ 1 ) ) ) ) + IF( BSIZ1.GT.0 ) THEN + CALL DGEMV( 'T', BSIZ1, BSIZ1, ONE, Q( QPTR( CURR ) ), + $ BSIZ1, ZTEMP( 1 ), 1, ZERO, Z( ZPTR1 ), 1 ) + END IF + CALL DCOPY( PSIZ1-BSIZ1, ZTEMP( BSIZ1+1 ), 1, Z( ZPTR1+BSIZ1 ), + $ 1 ) + IF( BSIZ2.GT.0 ) THEN + CALL DGEMV( 'T', BSIZ2, BSIZ2, ONE, Q( QPTR( CURR+1 ) ), + $ BSIZ2, ZTEMP( PSIZ1+1 ), 1, ZERO, Z( MID ), 1 ) + END IF + CALL DCOPY( PSIZ2-BSIZ2, ZTEMP( PSIZ1+BSIZ2+1 ), 1, + $ Z( MID+BSIZ2 ), 1 ) +* + PTR = PTR + 2**( TLVLS-K ) + 70 CONTINUE +* + RETURN +* +* End of DLAEDA +* + END diff --git a/lib/linalg/dlaev2.f b/lib/linalg/dlaev2.f new file mode 100644 index 0000000000..2e333ddf2c --- /dev/null +++ b/lib/linalg/dlaev2.f @@ -0,0 +1,238 @@ +*> \brief \b DLAEV2 computes the eigenvalues and eigenvectors of a 2-by-2 symmetric/Hermitian matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAEV2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAEV2( A, B, C, RT1, RT2, CS1, SN1 ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION A, B, C, CS1, RT1, RT2, SN1 +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix +*> [ A B ] +*> [ B C ]. +*> On return, RT1 is the eigenvalue of larger absolute value, RT2 is the +*> eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right +*> eigenvector for RT1, giving the decomposition +*> +*> [ CS1 SN1 ] [ A B ] [ CS1 -SN1 ] = [ RT1 0 ] +*> [-SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ]. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION +*> The (1,1) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION +*> The (1,2) element and the conjugate of the (2,1) element of +*> the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION +*> The (2,2) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[out] RT1 +*> \verbatim +*> RT1 is DOUBLE PRECISION +*> The eigenvalue of larger absolute value. +*> \endverbatim +*> +*> \param[out] RT2 +*> \verbatim +*> RT2 is DOUBLE PRECISION +*> The eigenvalue of smaller absolute value. +*> \endverbatim +*> +*> \param[out] CS1 +*> \verbatim +*> CS1 is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] SN1 +*> \verbatim +*> SN1 is DOUBLE PRECISION +*> The vector (CS1, SN1) is a unit right eigenvector for RT1. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> RT1 is accurate to a few ulps barring over/underflow. +*> +*> RT2 may be inaccurate if there is massive cancellation in the +*> determinant A*C-B*B; higher precision or correctly rounded or +*> correctly truncated arithmetic would be needed to compute RT2 +*> accurately in all cases. +*> +*> CS1 and SN1 are accurate to a few ulps barring over/underflow. +*> +*> Overflow is possible only if RT1 is within a factor of 5 of overflow. +*> Underflow is harmless if the input data is 0 or exceeds +*> underflow_threshold / macheps. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLAEV2( A, B, C, RT1, RT2, CS1, SN1 ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + DOUBLE PRECISION A, B, C, CS1, RT1, RT2, SN1 +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D0 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION HALF + PARAMETER ( HALF = 0.5D0 ) +* .. +* .. Local Scalars .. + INTEGER SGN1, SGN2 + DOUBLE PRECISION AB, ACMN, ACMX, ACS, ADF, CS, CT, DF, RT, SM, + $ TB, TN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* +* Compute the eigenvalues +* + SM = A + C + DF = A - C + ADF = ABS( DF ) + TB = B + B + AB = ABS( TB ) + IF( ABS( A ).GT.ABS( C ) ) THEN + ACMX = A + ACMN = C + ELSE + ACMX = C + ACMN = A + END IF + IF( ADF.GT.AB ) THEN + RT = ADF*SQRT( ONE+( AB / ADF )**2 ) + ELSE IF( ADF.LT.AB ) THEN + RT = AB*SQRT( ONE+( ADF / AB )**2 ) + ELSE +* +* Includes case AB=ADF=0 +* + RT = AB*SQRT( TWO ) + END IF + IF( SM.LT.ZERO ) THEN + RT1 = HALF*( SM-RT ) + SGN1 = -1 +* +* Order of execution important. +* To get fully accurate smaller eigenvalue, +* next line needs to be executed in higher precision. +* + RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B + ELSE IF( SM.GT.ZERO ) THEN + RT1 = HALF*( SM+RT ) + SGN1 = 1 +* +* Order of execution important. +* To get fully accurate smaller eigenvalue, +* next line needs to be executed in higher precision. +* + RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B + ELSE +* +* Includes case RT1 = RT2 = 0 +* + RT1 = HALF*RT + RT2 = -HALF*RT + SGN1 = 1 + END IF +* +* Compute the eigenvector +* + IF( DF.GE.ZERO ) THEN + CS = DF + RT + SGN2 = 1 + ELSE + CS = DF - RT + SGN2 = -1 + END IF + ACS = ABS( CS ) + IF( ACS.GT.AB ) THEN + CT = -TB / CS + SN1 = ONE / SQRT( ONE+CT*CT ) + CS1 = CT*SN1 + ELSE + IF( AB.EQ.ZERO ) THEN + CS1 = ONE + SN1 = ZERO + ELSE + TN = -CS / TB + CS1 = ONE / SQRT( ONE+TN*TN ) + SN1 = TN*CS1 + END IF + END IF + IF( SGN1.EQ.SGN2 ) THEN + TN = CS1 + CS1 = -SN1 + SN1 = TN + END IF + RETURN +* +* End of DLAEV2 +* + END diff --git a/lib/linalg/dlamrg.f b/lib/linalg/dlamrg.f new file mode 100644 index 0000000000..7126053e8a --- /dev/null +++ b/lib/linalg/dlamrg.f @@ -0,0 +1,171 @@ +*> \brief \b DLAMRG creates a permutation list to merge the entries of two independently sorted sets into a single set sorted in ascending order. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAMRG + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAMRG( N1, N2, A, DTRD1, DTRD2, INDEX ) +* +* .. Scalar Arguments .. +* INTEGER DTRD1, DTRD2, N1, N2 +* .. +* .. Array Arguments .. +* INTEGER INDEX( * ) +* DOUBLE PRECISION A( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAMRG will create a permutation list which will merge the elements +*> of A (which is composed of two independently sorted sets) into a +*> single set which is sorted in ascending order. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N1 +*> \verbatim +*> N1 is INTEGER +*> \endverbatim +*> +*> \param[in] N2 +*> \verbatim +*> N2 is INTEGER +*> These arguements contain the respective lengths of the two +*> sorted lists to be merged. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (N1+N2) +*> The first N1 elements of A contain a list of numbers which +*> are sorted in either ascending or descending order. Likewise +*> for the final N2 elements. +*> \endverbatim +*> +*> \param[in] DTRD1 +*> \verbatim +*> DTRD1 is INTEGER +*> \endverbatim +*> +*> \param[in] DTRD2 +*> \verbatim +*> DTRD2 is INTEGER +*> These are the strides to be taken through the array A. +*> Allowable strides are 1 and -1. They indicate whether a +*> subset of A is sorted in ascending (DTRDx = 1) or descending +*> (DTRDx = -1) order. +*> \endverbatim +*> +*> \param[out] INDEX +*> \verbatim +*> INDEX is INTEGER array, dimension (N1+N2) +*> On exit this array will contain a permutation such that +*> if B( I ) = A( INDEX( I ) ) for I=1,N1+N2, then B will be +*> sorted in ascending order. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE DLAMRG( N1, N2, A, DTRD1, DTRD2, INDEX ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER DTRD1, DTRD2, N1, N2 +* .. +* .. Array Arguments .. + INTEGER INDEX( * ) + DOUBLE PRECISION A( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IND1, IND2, N1SV, N2SV +* .. +* .. Executable Statements .. +* + N1SV = N1 + N2SV = N2 + IF( DTRD1.GT.0 ) THEN + IND1 = 1 + ELSE + IND1 = N1 + END IF + IF( DTRD2.GT.0 ) THEN + IND2 = 1 + N1 + ELSE + IND2 = N1 + N2 + END IF + I = 1 +* while ( (N1SV > 0) & (N2SV > 0) ) + 10 CONTINUE + IF( N1SV.GT.0 .AND. N2SV.GT.0 ) THEN + IF( A( IND1 ).LE.A( IND2 ) ) THEN + INDEX( I ) = IND1 + I = I + 1 + IND1 = IND1 + DTRD1 + N1SV = N1SV - 1 + ELSE + INDEX( I ) = IND2 + I = I + 1 + IND2 = IND2 + DTRD2 + N2SV = N2SV - 1 + END IF + GO TO 10 + END IF +* end while + IF( N1SV.EQ.0 ) THEN + DO 20 N1SV = 1, N2SV + INDEX( I ) = IND2 + I = I + 1 + IND2 = IND2 + DTRD2 + 20 CONTINUE + ELSE +* N2SV .EQ. 0 + DO 30 N2SV = 1, N1SV + INDEX( I ) = IND1 + I = I + 1 + IND1 = IND1 + DTRD1 + 30 CONTINUE + END IF +* + RETURN +* +* End of DLAMRG +* + END diff --git a/lib/linalg/dlanst.f b/lib/linalg/dlanst.f new file mode 100644 index 0000000000..213b06ada0 --- /dev/null +++ b/lib/linalg/dlanst.f @@ -0,0 +1,186 @@ +*> \brief \b DLANST returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a real symmetric tridiagonal matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLANST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLANST( NORM, N, D, E ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLANST returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> real symmetric tridiagonal matrix A. +*> \endverbatim +*> +*> \return DLANST +*> \verbatim +*> +*> DLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in DLANST as described +*> above. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, DLANST is +*> set to zero. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of A. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) sub-diagonal or super-diagonal elements of A. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLANST( NORM, N, D, E ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION ANORM, SCALE, SUM +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + ANORM = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + ANORM = ABS( D( N ) ) + DO 10 I = 1, N - 1 + SUM = ABS( D( I ) ) + IF( ANORM .LT. SUM .OR. DISNAN( SUM ) ) ANORM = SUM + SUM = ABS( E( I ) ) + IF( ANORM .LT. SUM .OR. DISNAN( SUM ) ) ANORM = SUM + 10 CONTINUE + ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' .OR. + $ LSAME( NORM, 'I' ) ) THEN +* +* Find norm1(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = ABS( D( 1 ) )+ABS( E( 1 ) ) + SUM = ABS( E( N-1 ) )+ABS( D( N ) ) + IF( ANORM .LT. SUM .OR. DISNAN( SUM ) ) ANORM = SUM + DO 20 I = 2, N - 1 + SUM = ABS( D( I ) )+ABS( E( I ) )+ABS( E( I-1 ) ) + IF( ANORM .LT. SUM .OR. DISNAN( SUM ) ) ANORM = SUM + 20 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( N.GT.1 ) THEN + CALL DLASSQ( N-1, E, 1, SCALE, SUM ) + SUM = 2*SUM + END IF + CALL DLASSQ( N, D, 1, SCALE, SUM ) + ANORM = SCALE*SQRT( SUM ) + END IF +* + DLANST = ANORM + RETURN +* +* End of DLANST +* + END diff --git a/lib/linalg/dlansy.f b/lib/linalg/dlansy.f new file mode 100644 index 0000000000..bc70ab8eda --- /dev/null +++ b/lib/linalg/dlansy.f @@ -0,0 +1,241 @@ +*> \brief \b DLANSY returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a real symmetric matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLANSY + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLANSY( NORM, UPLO, N, A, LDA, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM, UPLO +* INTEGER LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLANSY returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> real symmetric matrix A. +*> \endverbatim +*> +*> \return DLANSY +*> \verbatim +*> +*> DLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in DLANSY as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is to be referenced. +*> = 'U': Upper triangular part of A is referenced +*> = 'L': Lower triangular part of A is referenced +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, DLANSY is +*> set to zero. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading n by n +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading n by n lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(N,1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +*> WORK is not referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup doubleSYauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLANSY( NORM, UPLO, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION ABSA, SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, J + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = J, N + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( A( J, J ) ) + 60 CONTINUE + DO 70 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( A( J, J ) ) + DO 90 I = J + 1, N + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL DLASSQ( J-1, A( 1, J ), 1, SCALE, SUM ) + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL DLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM ) + 120 CONTINUE + END IF + SUM = 2*SUM + CALL DLASSQ( N, A, LDA+1, SCALE, SUM ) + VALUE = SCALE*SQRT( SUM ) + END IF +* + DLANSY = VALUE + RETURN +* +* End of DLANSY +* + END diff --git a/lib/linalg/dlapy2.f b/lib/linalg/dlapy2.f new file mode 100644 index 0000000000..d43b0d5d14 --- /dev/null +++ b/lib/linalg/dlapy2.f @@ -0,0 +1,104 @@ +*> \brief \b DLAPY2 returns sqrt(x2+y2). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAPY2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLAPY2( X, Y ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION X, Y +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary +*> overflow. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] X +*> \verbatim +*> X is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] Y +*> \verbatim +*> Y is DOUBLE PRECISION +*> X and Y specify the values x and y. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLAPY2( X, Y ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + DOUBLE PRECISION X, Y +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION W, XABS, YABS, Z +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + XABS = ABS( X ) + YABS = ABS( Y ) + W = MAX( XABS, YABS ) + Z = MIN( XABS, YABS ) + IF( Z.EQ.ZERO ) THEN + DLAPY2 = W + ELSE + DLAPY2 = W*SQRT( ONE+( Z / W )**2 ) + END IF + RETURN +* +* End of DLAPY2 +* + END diff --git a/lib/linalg/dlarf.f b/lib/linalg/dlarf.f new file mode 100644 index 0000000000..80dca69af7 --- /dev/null +++ b/lib/linalg/dlarf.f @@ -0,0 +1,227 @@ +*> \brief \b DLARF applies an elementary reflector to a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, LDC, M, N +* DOUBLE PRECISION TAU +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARF applies a real elementary reflector H to a real m by n matrix +*> C, from either the left or the right. H is represented in the form +*> +*> H = I - tau * v * v**T +*> +*> where tau is a real scalar and v is a real vector. +*> +*> If tau = 0, then H is taken to be the unit matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> = 'R': form C * H +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension +*> (1 + (M-1)*abs(INCV)) if SIDE = 'L' +*> or (1 + (N-1)*abs(INCV)) if SIDE = 'R' +*> The vector v in the representation of H. V is not used if +*> TAU = 0. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV <> 0. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension +*> (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + DOUBLE PRECISION TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL APPLYLEFT + INTEGER I, LASTV, LASTC +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DGER +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILADLR, ILADLC + EXTERNAL LSAME, ILADLR, ILADLC +* .. +* .. Executable Statements .. +* + APPLYLEFT = LSAME( SIDE, 'L' ) + LASTV = 0 + LASTC = 0 + IF( TAU.NE.ZERO ) THEN +! Set up variables for scanning V. LASTV begins pointing to the end +! of V. + IF( APPLYLEFT ) THEN + LASTV = M + ELSE + LASTV = N + END IF + IF( INCV.GT.0 ) THEN + I = 1 + (LASTV-1) * INCV + ELSE + I = 1 + END IF +! Look for the last non-zero row in V. + DO WHILE( LASTV.GT.0 .AND. V( I ).EQ.ZERO ) + LASTV = LASTV - 1 + I = I - INCV + END DO + IF( APPLYLEFT ) THEN +! Scan for the last non-zero column in C(1:lastv,:). + LASTC = ILADLC(LASTV, N, C, LDC) + ELSE +! Scan for the last non-zero row in C(:,1:lastv). + LASTC = ILADLR(M, LASTV, C, LDC) + END IF + END IF +! Note that lastc.eq.0 renders the BLAS operations null; no special +! case is needed at this level. + IF( APPLYLEFT ) THEN +* +* Form H * C +* + IF( LASTV.GT.0 ) THEN +* +* w(1:lastc,1) := C(1:lastv,1:lastc)**T * v(1:lastv,1) +* + CALL DGEMV( 'Transpose', LASTV, LASTC, ONE, C, LDC, V, INCV, + $ ZERO, WORK, 1 ) +* +* C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)**T +* + CALL DGER( LASTV, LASTC, -TAU, V, INCV, WORK, 1, C, LDC ) + END IF + ELSE +* +* Form C * H +* + IF( LASTV.GT.0 ) THEN +* +* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) +* + CALL DGEMV( 'No transpose', LASTC, LASTV, ONE, C, LDC, + $ V, INCV, ZERO, WORK, 1 ) +* +* C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)**T +* + CALL DGER( LASTC, LASTV, -TAU, WORK, 1, V, INCV, C, LDC ) + END IF + END IF + RETURN +* +* End of DLARF +* + END diff --git a/lib/linalg/dlarfb.f b/lib/linalg/dlarfb.f new file mode 100644 index 0000000000..17218478af --- /dev/null +++ b/lib/linalg/dlarfb.f @@ -0,0 +1,758 @@ +*> \brief \b DLARFB applies a block reflector or its transpose to a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARFB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, +* T, LDT, C, LDC, WORK, LDWORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, SIDE, STOREV, TRANS +* INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( LDC, * ), T( LDT, * ), V( LDV, * ), +* $ WORK( LDWORK, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARFB applies a real block reflector H or its transpose H**T to a +*> real m by n matrix C, from either the left or the right. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply H or H**T from the Left +*> = 'R': apply H or H**T from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply H (No transpose) +*> = 'T': apply H**T (Transpose) +*> \endverbatim +*> +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Indicates how H is formed from a product of elementary +*> reflectors +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Indicates how the vectors which define the elementary +*> reflectors are stored: +*> = 'C': Columnwise +*> = 'R': Rowwise +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the matrix T (= the number of elementary +*> reflectors whose product defines the block reflector). +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,M) if STOREV = 'R' and SIDE = 'L' +*> (LDV,N) if STOREV = 'R' and SIDE = 'R' +*> The matrix V. See Further Details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); +*> if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); +*> if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,K) +*> The triangular k by k matrix T in the representation of the +*> block reflector. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by H*C or H**T*C or C*H or C*H**T. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LDWORK,K) +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. +*> If SIDE = 'L', LDWORK >= max(1,N); +*> if SIDE = 'R', LDWORK >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored; the corresponding +*> array elements are modified but restored on exit. The rest of the +*> array is not used. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, + $ T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, J, LASTV, LASTC, lastv2 +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILADLR, ILADLC + EXTERNAL LSAME, ILADLR, ILADLC +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMM, DTRMM +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( STOREV, 'C' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 ) (first K rows) +* ( V2 ) +* where V1 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**T * C where C = ( C1 ) +* ( C2 ) +* + LASTV = MAX( K, ILADLR( M, K, V, LDV ) ) + LASTC = ILADLC( LASTV, N, C, LDC ) +* +* W := C**T * V = (C1**T * V1 + C2**T * V2) (stored in WORK) +* +* W := C1**T +* + DO 10 J = 1, K + CALL DCOPY( LASTC, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W := W * V1 +* + CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C2**T *V2 +* + CALL DGEMM( 'Transpose', 'No transpose', + $ LASTC, K, LASTV-K, + $ ONE, C( K+1, 1 ), LDC, V( K+1, 1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T**T or W * T +* + CALL DTRMM( 'Right', 'Upper', TRANST, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W**T +* + IF( LASTV.GT.K ) THEN +* +* C2 := C2 - V2 * W**T +* + CALL DGEMM( 'No transpose', 'Transpose', + $ LASTV-K, LASTC, K, + $ -ONE, V( K+1, 1 ), LDV, WORK, LDWORK, ONE, + $ C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1**T +* + CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W**T +* + DO 30 J = 1, K + DO 20 I = 1, LASTC + C( J, I ) = C( J, I ) - WORK( I, J ) + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**T where C = ( C1 C2 ) +* + LASTV = MAX( K, ILADLR( N, K, V, LDV ) ) + LASTC = ILADLR( M, LASTV, C, LDC ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C1 +* + DO 40 J = 1, K + CALL DCOPY( LASTC, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W := W * V1 +* + CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C2 * V2 +* + CALL DGEMM( 'No transpose', 'No transpose', + $ LASTC, K, LASTV-K, + $ ONE, C( 1, K+1 ), LDC, V( K+1, 1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**T +* + CALL DTRMM( 'Right', 'Upper', TRANS, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V**T +* + IF( LASTV.GT.K ) THEN +* +* C2 := C2 - W * V2**T +* + CALL DGEMM( 'No transpose', 'Transpose', + $ LASTC, LASTV-K, K, + $ -ONE, WORK, LDWORK, V( K+1, 1 ), LDV, ONE, + $ C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1**T +* + CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 60 J = 1, K + DO 50 I = 1, LASTC + C( I, J ) = C( I, J ) - WORK( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + ELSE +* +* Let V = ( V1 ) +* ( V2 ) (last K rows) +* where V2 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**T * C where C = ( C1 ) +* ( C2 ) +* + LASTC = ILADLC( M, N, C, LDC ) +* +* W := C**T * V = (C1**T * V1 + C2**T * V2) (stored in WORK) +* +* W := C2**T +* + DO 70 J = 1, K + CALL DCOPY( LASTC, C( M-K+J, 1 ), LDC, + $ WORK( 1, J ), 1 ) + 70 CONTINUE +* +* W := W * V2 +* + CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit', + $ LASTC, K, ONE, V( M-K+1, 1 ), LDV, + $ WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C1**T*V1 +* + CALL DGEMM( 'Transpose', 'No transpose', + $ LASTC, K, M-K, ONE, C, LDC, V, LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T**T or W * T +* + CALL DTRMM( 'Right', 'Lower', TRANST, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W**T +* + IF( M.GT.K ) THEN +* +* C1 := C1 - V1 * W**T +* + CALL DGEMM( 'No transpose', 'Transpose', + $ M-K, LASTC, K, -ONE, V, LDV, WORK, LDWORK, + $ ONE, C, LDC ) + END IF +* +* W := W * V2**T +* + CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit', + $ LASTC, K, ONE, V( M-K+1, 1 ), LDV, + $ WORK, LDWORK ) +* +* C2 := C2 - W**T +* + DO 90 J = 1, K + DO 80 I = 1, LASTC + C( M-K+J, I ) = C( M-K+J, I ) - WORK(I, J) + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**T where C = ( C1 C2 ) +* + LASTC = ILADLR( M, N, C, LDC ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C2 +* + DO 100 J = 1, K + CALL DCOPY( LASTC, C( 1, N-K+J ), 1, WORK( 1, J ), 1 ) + 100 CONTINUE +* +* W := W * V2 +* + CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit', + $ LASTC, K, ONE, V( N-K+1, 1 ), LDV, + $ WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C1 * V1 +* + CALL DGEMM( 'No transpose', 'No transpose', + $ LASTC, K, N-K, ONE, C, LDC, V, LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**T +* + CALL DTRMM( 'Right', 'Lower', TRANS, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V**T +* + IF( N.GT.K ) THEN +* +* C1 := C1 - W * V1**T +* + CALL DGEMM( 'No transpose', 'Transpose', + $ LASTC, N-K, K, -ONE, WORK, LDWORK, V, LDV, + $ ONE, C, LDC ) + END IF +* +* W := W * V2**T +* + CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit', + $ LASTC, K, ONE, V( N-K+1, 1 ), LDV, + $ WORK, LDWORK ) +* +* C2 := C2 - W +* + DO 120 J = 1, K + DO 110 I = 1, LASTC + C( I, N-K+J ) = C( I, N-K+J ) - WORK(I, J) + 110 CONTINUE + 120 CONTINUE + END IF + END IF +* + ELSE IF( LSAME( STOREV, 'R' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 V2 ) (V1: first K columns) +* where V1 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**T * C where C = ( C1 ) +* ( C2 ) +* + LASTV = MAX( K, ILADLC( K, M, V, LDV ) ) + LASTC = ILADLC( LASTV, N, C, LDC ) +* +* W := C**T * V**T = (C1**T * V1**T + C2**T * V2**T) (stored in WORK) +* +* W := C1**T +* + DO 130 J = 1, K + CALL DCOPY( LASTC, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 130 CONTINUE +* +* W := W * V1**T +* + CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C2**T*V2**T +* + CALL DGEMM( 'Transpose', 'Transpose', + $ LASTC, K, LASTV-K, + $ ONE, C( K+1, 1 ), LDC, V( 1, K+1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T**T or W * T +* + CALL DTRMM( 'Right', 'Upper', TRANST, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V**T * W**T +* + IF( LASTV.GT.K ) THEN +* +* C2 := C2 - V2**T * W**T +* + CALL DGEMM( 'Transpose', 'Transpose', + $ LASTV-K, LASTC, K, + $ -ONE, V( 1, K+1 ), LDV, WORK, LDWORK, + $ ONE, C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W**T +* + DO 150 J = 1, K + DO 140 I = 1, LASTC + C( J, I ) = C( J, I ) - WORK( I, J ) + 140 CONTINUE + 150 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**T where C = ( C1 C2 ) +* + LASTV = MAX( K, ILADLC( K, N, V, LDV ) ) + LASTC = ILADLR( M, LASTV, C, LDC ) +* +* W := C * V**T = (C1*V1**T + C2*V2**T) (stored in WORK) +* +* W := C1 +* + DO 160 J = 1, K + CALL DCOPY( LASTC, C( 1, J ), 1, WORK( 1, J ), 1 ) + 160 CONTINUE +* +* W := W * V1**T +* + CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C2 * V2**T +* + CALL DGEMM( 'No transpose', 'Transpose', + $ LASTC, K, LASTV-K, + $ ONE, C( 1, K+1 ), LDC, V( 1, K+1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**T +* + CALL DTRMM( 'Right', 'Upper', TRANS, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( LASTV.GT.K ) THEN +* +* C2 := C2 - W * V2 +* + CALL DGEMM( 'No transpose', 'No transpose', + $ LASTC, LASTV-K, K, + $ -ONE, WORK, LDWORK, V( 1, K+1 ), LDV, + $ ONE, C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 180 J = 1, K + DO 170 I = 1, LASTC + C( I, J ) = C( I, J ) - WORK( I, J ) + 170 CONTINUE + 180 CONTINUE +* + END IF +* + ELSE +* +* Let V = ( V1 V2 ) (V2: last K columns) +* where V2 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**T * C where C = ( C1 ) +* ( C2 ) +* + LASTC = ILADLC( M, N, C, LDC ) +* +* W := C**T * V**T = (C1**T * V1**T + C2**T * V2**T) (stored in WORK) +* +* W := C2**T +* + DO 190 J = 1, K + CALL DCOPY( LASTC, C( M-K+J, 1 ), LDC, + $ WORK( 1, J ), 1 ) + 190 CONTINUE +* +* W := W * V2**T +* + CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit', + $ LASTC, K, ONE, V( 1, M-K+1 ), LDV, + $ WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C1**T * V1**T +* + CALL DGEMM( 'Transpose', 'Transpose', + $ LASTC, K, M-K, ONE, C, LDC, V, LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T**T or W * T +* + CALL DTRMM( 'Right', 'Lower', TRANST, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V**T * W**T +* + IF( M.GT.K ) THEN +* +* C1 := C1 - V1**T * W**T +* + CALL DGEMM( 'Transpose', 'Transpose', + $ M-K, LASTC, K, -ONE, V, LDV, WORK, LDWORK, + $ ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit', + $ LASTC, K, ONE, V( 1, M-K+1 ), LDV, + $ WORK, LDWORK ) +* +* C2 := C2 - W**T +* + DO 210 J = 1, K + DO 200 I = 1, LASTC + C( M-K+J, I ) = C( M-K+J, I ) - WORK(I, J) + 200 CONTINUE + 210 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**T where C = ( C1 C2 ) +* + LASTC = ILADLR( M, N, C, LDC ) +* +* W := C * V**T = (C1*V1**T + C2*V2**T) (stored in WORK) +* +* W := C2 +* + DO 220 J = 1, K + CALL DCOPY( LASTC, C( 1, N-K+J ), 1, + $ WORK( 1, J ), 1 ) + 220 CONTINUE +* +* W := W * V2**T +* + CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit', + $ LASTC, K, ONE, V( 1, N-K+1 ), LDV, + $ WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C1 * V1**T +* + CALL DGEMM( 'No transpose', 'Transpose', + $ LASTC, K, N-K, ONE, C, LDC, V, LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**T +* + CALL DTRMM( 'Right', 'Lower', TRANS, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( N.GT.K ) THEN +* +* C1 := C1 - W * V1 +* + CALL DGEMM( 'No transpose', 'No transpose', + $ LASTC, N-K, K, -ONE, WORK, LDWORK, V, LDV, + $ ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit', + $ LASTC, K, ONE, V( 1, N-K+1 ), LDV, + $ WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 240 J = 1, K + DO 230 I = 1, LASTC + C( I, N-K+J ) = C( I, N-K+J ) - WORK(I, J) + 230 CONTINUE + 240 CONTINUE +* + END IF +* + END IF + END IF +* + RETURN +* +* End of DLARFB +* + END diff --git a/lib/linalg/dlarfg.f b/lib/linalg/dlarfg.f new file mode 100644 index 0000000000..ce91d33c1a --- /dev/null +++ b/lib/linalg/dlarfg.f @@ -0,0 +1,196 @@ +*> \brief \b DLARFG generates an elementary reflector (Householder matrix). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARFG + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARFG( N, ALPHA, X, INCX, TAU ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* DOUBLE PRECISION ALPHA, TAU +* .. +* .. Array Arguments .. +* DOUBLE PRECISION X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARFG generates a real elementary reflector H of order n, such +*> that +*> +*> H * ( alpha ) = ( beta ), H**T * H = I. +*> ( x ) ( 0 ) +*> +*> where alpha and beta are scalars, and x is an (n-1)-element real +*> vector. H is represented in the form +*> +*> H = I - tau * ( 1 ) * ( 1 v**T ) , +*> ( v ) +*> +*> where tau is a real scalar and v is a real (n-1)-element +*> vector. +*> +*> If the elements of x are all zero, then tau = 0 and H is taken to be +*> the unit matrix. +*> +*> Otherwise 1 <= tau <= 2. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the elementary reflector. +*> \endverbatim +*> +*> \param[in,out] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION +*> On entry, the value alpha. +*> On exit, it is overwritten with the value beta. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension +*> (1+(N-2)*abs(INCX)) +*> On entry, the vector x. +*> On exit, it is overwritten with the vector v. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION +*> The value tau. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLARFG( N, ALPHA, X, INCX, TAU ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER INCX, N + DOUBLE PRECISION ALPHA, TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER J, KNT + DOUBLE PRECISION BETA, RSAFMN, SAFMIN, XNORM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLAPY2, DNRM2 + EXTERNAL DLAMCH, DLAPY2, DNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN +* .. +* .. External Subroutines .. + EXTERNAL DSCAL +* .. +* .. Executable Statements .. +* + IF( N.LE.1 ) THEN + TAU = ZERO + RETURN + END IF +* + XNORM = DNRM2( N-1, X, INCX ) +* + IF( XNORM.EQ.ZERO ) THEN +* +* H = I +* + TAU = ZERO + ELSE +* +* general case +* + BETA = -SIGN( DLAPY2( ALPHA, XNORM ), ALPHA ) + SAFMIN = DLAMCH( 'S' ) / DLAMCH( 'E' ) + KNT = 0 + IF( ABS( BETA ).LT.SAFMIN ) THEN +* +* XNORM, BETA may be inaccurate; scale X and recompute them +* + RSAFMN = ONE / SAFMIN + 10 CONTINUE + KNT = KNT + 1 + CALL DSCAL( N-1, RSAFMN, X, INCX ) + BETA = BETA*RSAFMN + ALPHA = ALPHA*RSAFMN + IF( ABS( BETA ).LT.SAFMIN ) + $ GO TO 10 +* +* New BETA is at most 1, at least SAFMIN +* + XNORM = DNRM2( N-1, X, INCX ) + BETA = -SIGN( DLAPY2( ALPHA, XNORM ), ALPHA ) + END IF + TAU = ( BETA-ALPHA ) / BETA + CALL DSCAL( N-1, ONE / ( ALPHA-BETA ), X, INCX ) +* +* If ALPHA is subnormal, it may lose relative accuracy +* + DO 20 J = 1, KNT + BETA = BETA*SAFMIN + 20 CONTINUE + ALPHA = BETA + END IF +* + RETURN +* +* End of DLARFG +* + END diff --git a/lib/linalg/dlarft.f b/lib/linalg/dlarft.f new file mode 100644 index 0000000000..bc1b53b2ce --- /dev/null +++ b/lib/linalg/dlarft.f @@ -0,0 +1,326 @@ +*> \brief \b DLARFT forms the triangular factor T of a block reflector H = I - vtvH +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARFT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARFT forms the triangular factor T of a real block reflector H +*> of order n, which is defined as a product of k elementary reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**T +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**T * T * V +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, PREVLASTV, LASTV +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DTRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + PREVLASTV = N + DO I = 1, K + PREVLASTV = MAX( I, PREVLASTV ) + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = 1, I + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( I , J ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) +* + CALL DGEMV( 'Transpose', J-I, I-1, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE, + $ T( 1, I ), 1 ) + ELSE +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( J , I ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T +* + CALL DGEMV( 'No transpose', I-1, J-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, ONE, + $ T( 1, I ), 1 ) + END IF +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + IF( I.GT.1 ) THEN + PREVLASTV = MAX( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + END DO + ELSE + PREVLASTV = 1 + DO I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = I, K + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( N-K+I , J ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) +* + CALL DGEMV( 'Transpose', N-K+I-J, K-I, -TAU( I ), + $ V( J, I+1 ), LDV, V( J, I ), 1, ONE, + $ T( I+1, I ), 1 ) + ELSE +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( J, N-K+I ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T +* + CALL DGEMV( 'No transpose', K-I, N-K+I-J, + $ -TAU( I ), V( I+1, J ), LDV, V( I, J ), LDV, + $ ONE, T( I+1, I ), 1 ) + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL DTRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + IF( I.GT.1 ) THEN + PREVLASTV = MIN( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + T( I, I ) = TAU( I ) + END IF + END DO + END IF + RETURN +* +* End of DLARFT +* + END diff --git a/lib/linalg/dlartg.f b/lib/linalg/dlartg.f new file mode 100644 index 0000000000..bf74c4365c --- /dev/null +++ b/lib/linalg/dlartg.f @@ -0,0 +1,204 @@ +*> \brief \b DLARTG generates a plane rotation with real cosine and real sine. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARTG + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARTG( F, G, CS, SN, R ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION CS, F, G, R, SN +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARTG generate a plane rotation so that +*> +*> [ CS SN ] . [ F ] = [ R ] where CS**2 + SN**2 = 1. +*> [ -SN CS ] [ G ] [ 0 ] +*> +*> This is a slower, more accurate version of the BLAS1 routine DROTG, +*> with the following other differences: +*> F and G are unchanged on return. +*> If G=0, then CS=1 and SN=0. +*> If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any +*> floating point operations (saves work in DBDSQR when +*> there are zeros on the diagonal). +*> +*> If F exceeds G in magnitude, CS will be positive. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] F +*> \verbatim +*> F is DOUBLE PRECISION +*> The first component of vector to be rotated. +*> \endverbatim +*> +*> \param[in] G +*> \verbatim +*> G is DOUBLE PRECISION +*> The second component of vector to be rotated. +*> \endverbatim +*> +*> \param[out] CS +*> \verbatim +*> CS is DOUBLE PRECISION +*> The cosine of the rotation. +*> \endverbatim +*> +*> \param[out] SN +*> \verbatim +*> SN is DOUBLE PRECISION +*> The sine of the rotation. +*> \endverbatim +*> +*> \param[out] R +*> \verbatim +*> R is DOUBLE PRECISION +*> The nonzero component of the rotated vector. +*> +*> This version has a few statements commented out for thread safety +*> (machine parameters are computed on each entry). 10 feb 03, SJH. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLARTG( F, G, CS, SN, R ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + DOUBLE PRECISION CS, F, G, R, SN +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D0 ) +* .. +* .. Local Scalars .. +* LOGICAL FIRST + INTEGER COUNT, I + DOUBLE PRECISION EPS, F1, G1, SAFMIN, SAFMN2, SAFMX2, SCALE +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, SQRT +* .. +* .. Save statement .. +* SAVE FIRST, SAFMX2, SAFMIN, SAFMN2 +* .. +* .. Data statements .. +* DATA FIRST / .TRUE. / +* .. +* .. Executable Statements .. +* +* IF( FIRST ) THEN + SAFMIN = DLAMCH( 'S' ) + EPS = DLAMCH( 'E' ) + SAFMN2 = DLAMCH( 'B' )**INT( LOG( SAFMIN / EPS ) / + $ LOG( DLAMCH( 'B' ) ) / TWO ) + SAFMX2 = ONE / SAFMN2 +* FIRST = .FALSE. +* END IF + IF( G.EQ.ZERO ) THEN + CS = ONE + SN = ZERO + R = F + ELSE IF( F.EQ.ZERO ) THEN + CS = ZERO + SN = ONE + R = G + ELSE + F1 = F + G1 = G + SCALE = MAX( ABS( F1 ), ABS( G1 ) ) + IF( SCALE.GE.SAFMX2 ) THEN + COUNT = 0 + 10 CONTINUE + COUNT = COUNT + 1 + F1 = F1*SAFMN2 + G1 = G1*SAFMN2 + SCALE = MAX( ABS( F1 ), ABS( G1 ) ) + IF( SCALE.GE.SAFMX2 ) + $ GO TO 10 + R = SQRT( F1**2+G1**2 ) + CS = F1 / R + SN = G1 / R + DO 20 I = 1, COUNT + R = R*SAFMX2 + 20 CONTINUE + ELSE IF( SCALE.LE.SAFMN2 ) THEN + COUNT = 0 + 30 CONTINUE + COUNT = COUNT + 1 + F1 = F1*SAFMX2 + G1 = G1*SAFMX2 + SCALE = MAX( ABS( F1 ), ABS( G1 ) ) + IF( SCALE.LE.SAFMN2 ) + $ GO TO 30 + R = SQRT( F1**2+G1**2 ) + CS = F1 / R + SN = G1 / R + DO 40 I = 1, COUNT + R = R*SAFMN2 + 40 CONTINUE + ELSE + R = SQRT( F1**2+G1**2 ) + CS = F1 / R + SN = G1 / R + END IF + IF( ABS( F ).GT.ABS( G ) .AND. CS.LT.ZERO ) THEN + CS = -CS + SN = -SN + R = -R + END IF + END IF + RETURN +* +* End of DLARTG +* + END diff --git a/lib/linalg/dlas2.f b/lib/linalg/dlas2.f new file mode 100644 index 0000000000..81077f940d --- /dev/null +++ b/lib/linalg/dlas2.f @@ -0,0 +1,183 @@ +*> \brief \b DLAS2 computes singular values of a 2-by-2 triangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAS2( F, G, H, SSMIN, SSMAX ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION F, G, H, SSMAX, SSMIN +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAS2 computes the singular values of the 2-by-2 matrix +*> [ F G ] +*> [ 0 H ]. +*> On return, SSMIN is the smaller singular value and SSMAX is the +*> larger singular value. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] F +*> \verbatim +*> F is DOUBLE PRECISION +*> The (1,1) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] G +*> \verbatim +*> G is DOUBLE PRECISION +*> The (1,2) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] H +*> \verbatim +*> H is DOUBLE PRECISION +*> The (2,2) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[out] SSMIN +*> \verbatim +*> SSMIN is DOUBLE PRECISION +*> The smaller singular value. +*> \endverbatim +*> +*> \param[out] SSMAX +*> \verbatim +*> SSMAX is DOUBLE PRECISION +*> The larger singular value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Barring over/underflow, all output quantities are correct to within +*> a few units in the last place (ulps), even in the absence of a guard +*> digit in addition/subtraction. +*> +*> In IEEE arithmetic, the code works correctly if one matrix element is +*> infinite. +*> +*> Overflow will not occur unless the largest singular value itself +*> overflows, or is within a few ulps of overflow. (On machines with +*> partial overflow, like the Cray, overflow may occur if the largest +*> singular value is within a factor of 2 of overflow.) +*> +*> Underflow is harmless if underflow is gradual. Otherwise, results +*> may correspond to a matrix modified by perturbations of size near +*> the underflow threshold. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLAS2( F, G, H, SSMIN, SSMAX ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + DOUBLE PRECISION F, G, H, SSMAX, SSMIN +* .. +* +* ==================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION AS, AT, AU, C, FA, FHMN, FHMX, GA, HA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + FA = ABS( F ) + GA = ABS( G ) + HA = ABS( H ) + FHMN = MIN( FA, HA ) + FHMX = MAX( FA, HA ) + IF( FHMN.EQ.ZERO ) THEN + SSMIN = ZERO + IF( FHMX.EQ.ZERO ) THEN + SSMAX = GA + ELSE + SSMAX = MAX( FHMX, GA )*SQRT( ONE+ + $ ( MIN( FHMX, GA ) / MAX( FHMX, GA ) )**2 ) + END IF + ELSE + IF( GA.LT.FHMX ) THEN + AS = ONE + FHMN / FHMX + AT = ( FHMX-FHMN ) / FHMX + AU = ( GA / FHMX )**2 + C = TWO / ( SQRT( AS*AS+AU )+SQRT( AT*AT+AU ) ) + SSMIN = FHMN*C + SSMAX = FHMX / C + ELSE + AU = FHMX / GA + IF( AU.EQ.ZERO ) THEN +* +* Avoid possible harmful underflow if exponent range +* asymmetric (true SSMIN may not underflow even if +* AU underflows) +* + SSMIN = ( FHMN*FHMX ) / GA + SSMAX = GA + ELSE + AS = ONE + FHMN / FHMX + AT = ( FHMX-FHMN ) / FHMX + C = ONE / ( SQRT( ONE+( AS*AU )**2 )+ + $ SQRT( ONE+( AT*AU )**2 ) ) + SSMIN = ( FHMN*C )*AU + SSMIN = SSMIN + SSMIN + SSMAX = GA / ( C+C ) + END IF + END IF + END IF + RETURN +* +* End of DLAS2 +* + END diff --git a/lib/linalg/dlascl.f b/lib/linalg/dlascl.f new file mode 100644 index 0000000000..9b9b33c0c1 --- /dev/null +++ b/lib/linalg/dlascl.f @@ -0,0 +1,364 @@ +*> \brief \b DLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASCL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TYPE +* INTEGER INFO, KL, KU, LDA, M, N +* DOUBLE PRECISION CFROM, CTO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASCL multiplies the M by N real matrix A by the real scalar +*> CTO/CFROM. This is done without over/underflow as long as the final +*> result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that +*> A may be full, upper triangular, lower triangular, upper Hessenberg, +*> or banded. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TYPE +*> \verbatim +*> TYPE is CHARACTER*1 +*> TYPE indices the storage type of the input matrix. +*> = 'G': A is a full matrix. +*> = 'L': A is a lower triangular matrix. +*> = 'U': A is an upper triangular matrix. +*> = 'H': A is an upper Hessenberg matrix. +*> = 'B': A is a symmetric band matrix with lower bandwidth KL +*> and upper bandwidth KU and with the only the lower +*> half stored. +*> = 'Q': A is a symmetric band matrix with lower bandwidth KL +*> and upper bandwidth KU and with the only the upper +*> half stored. +*> = 'Z': A is a band matrix with lower bandwidth KL and upper +*> bandwidth KU. See DGBTRF for storage details. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The lower bandwidth of A. Referenced only if TYPE = 'B', +*> 'Q' or 'Z'. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The upper bandwidth of A. Referenced only if TYPE = 'B', +*> 'Q' or 'Z'. +*> \endverbatim +*> +*> \param[in] CFROM +*> \verbatim +*> CFROM is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] CTO +*> \verbatim +*> CTO is DOUBLE PRECISION +*> +*> The matrix A is multiplied by CTO/CFROM. A(I,J) is computed +*> without over/underflow if the final result CTO*A(I,J)/CFROM +*> can be represented without over/underflow. CFROM must be +*> nonzero. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The matrix to be multiplied by CTO/CFROM. See TYPE for the +*> storage type. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> 0 - successful exit +*> <0 - if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER TYPE + INTEGER INFO, KL, KU, LDA, M, N + DOUBLE PRECISION CFROM, CTO +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + INTEGER I, ITYPE, J, K1, K2, K3, K4 + DOUBLE PRECISION BIGNUM, CFROM1, CFROMC, CTO1, CTOC, MUL, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH, DISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 +* + IF( LSAME( TYPE, 'G' ) ) THEN + ITYPE = 0 + ELSE IF( LSAME( TYPE, 'L' ) ) THEN + ITYPE = 1 + ELSE IF( LSAME( TYPE, 'U' ) ) THEN + ITYPE = 2 + ELSE IF( LSAME( TYPE, 'H' ) ) THEN + ITYPE = 3 + ELSE IF( LSAME( TYPE, 'B' ) ) THEN + ITYPE = 4 + ELSE IF( LSAME( TYPE, 'Q' ) ) THEN + ITYPE = 5 + ELSE IF( LSAME( TYPE, 'Z' ) ) THEN + ITYPE = 6 + ELSE + ITYPE = -1 + END IF +* + IF( ITYPE.EQ.-1 ) THEN + INFO = -1 + ELSE IF( CFROM.EQ.ZERO .OR. DISNAN(CFROM) ) THEN + INFO = -4 + ELSE IF( DISNAN(CTO) ) THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -6 + ELSE IF( N.LT.0 .OR. ( ITYPE.EQ.4 .AND. N.NE.M ) .OR. + $ ( ITYPE.EQ.5 .AND. N.NE.M ) ) THEN + INFO = -7 + ELSE IF( ITYPE.LE.3 .AND. LDA.LT.MAX( 1, M ) ) THEN + INFO = -9 + ELSE IF( ITYPE.GE.4 ) THEN + IF( KL.LT.0 .OR. KL.GT.MAX( M-1, 0 ) ) THEN + INFO = -2 + ELSE IF( KU.LT.0 .OR. KU.GT.MAX( N-1, 0 ) .OR. + $ ( ( ITYPE.EQ.4 .OR. ITYPE.EQ.5 ) .AND. KL.NE.KU ) ) + $ THEN + INFO = -3 + ELSE IF( ( ITYPE.EQ.4 .AND. LDA.LT.KL+1 ) .OR. + $ ( ITYPE.EQ.5 .AND. LDA.LT.KU+1 ) .OR. + $ ( ITYPE.EQ.6 .AND. LDA.LT.2*KL+KU+1 ) ) THEN + INFO = -9 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASCL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) + $ RETURN +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* + CFROMC = CFROM + CTOC = CTO +* + 10 CONTINUE + CFROM1 = CFROMC*SMLNUM + IF( CFROM1.EQ.CFROMC ) THEN +! CFROMC is an inf. Multiply by a correctly signed zero for +! finite CTOC, or a NaN if CTOC is infinite. + MUL = CTOC / CFROMC + DONE = .TRUE. + CTO1 = CTOC + ELSE + CTO1 = CTOC / BIGNUM + IF( CTO1.EQ.CTOC ) THEN +! CTOC is either 0 or an inf. In both cases, CTOC itself +! serves as the correct multiplication factor. + MUL = CTOC + DONE = .TRUE. + CFROMC = ONE + ELSE IF( ABS( CFROM1 ).GT.ABS( CTOC ) .AND. CTOC.NE.ZERO ) THEN + MUL = SMLNUM + DONE = .FALSE. + CFROMC = CFROM1 + ELSE IF( ABS( CTO1 ).GT.ABS( CFROMC ) ) THEN + MUL = BIGNUM + DONE = .FALSE. + CTOC = CTO1 + ELSE + MUL = CTOC / CFROMC + DONE = .TRUE. + END IF + END IF +* + IF( ITYPE.EQ.0 ) THEN +* +* Full matrix +* + DO 30 J = 1, N + DO 20 I = 1, M + A( I, J ) = A( I, J )*MUL + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( ITYPE.EQ.1 ) THEN +* +* Lower triangular matrix +* + DO 50 J = 1, N + DO 40 I = J, M + A( I, J ) = A( I, J )*MUL + 40 CONTINUE + 50 CONTINUE +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Upper triangular matrix +* + DO 70 J = 1, N + DO 60 I = 1, MIN( J, M ) + A( I, J ) = A( I, J )*MUL + 60 CONTINUE + 70 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* Upper Hessenberg matrix +* + DO 90 J = 1, N + DO 80 I = 1, MIN( J+1, M ) + A( I, J ) = A( I, J )*MUL + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Lower half of a symmetric band matrix +* + K3 = KL + 1 + K4 = N + 1 + DO 110 J = 1, N + DO 100 I = 1, MIN( K3, K4-J ) + A( I, J ) = A( I, J )*MUL + 100 CONTINUE + 110 CONTINUE +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Upper half of a symmetric band matrix +* + K1 = KU + 2 + K3 = KU + 1 + DO 130 J = 1, N + DO 120 I = MAX( K1-J, 1 ), K3 + A( I, J ) = A( I, J )*MUL + 120 CONTINUE + 130 CONTINUE +* + ELSE IF( ITYPE.EQ.6 ) THEN +* +* Band matrix +* + K1 = KL + KU + 2 + K2 = KL + 1 + K3 = 2*KL + KU + 1 + K4 = KL + KU + 1 + M + DO 150 J = 1, N + DO 140 I = MAX( K1-J, K2 ), MIN( K3, K4-J ) + A( I, J ) = A( I, J )*MUL + 140 CONTINUE + 150 CONTINUE +* + END IF +* + IF( .NOT.DONE ) + $ GO TO 10 +* + RETURN +* +* End of DLASCL +* + END diff --git a/lib/linalg/dlaset.f b/lib/linalg/dlaset.f new file mode 100644 index 0000000000..1ce34662ab --- /dev/null +++ b/lib/linalg/dlaset.f @@ -0,0 +1,184 @@ +*> \brief \b DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASET + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASET( UPLO, M, N, ALPHA, BETA, A, LDA ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER LDA, M, N +* DOUBLE PRECISION ALPHA, BETA +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASET initializes an m-by-n matrix A to BETA on the diagonal and +*> ALPHA on the offdiagonals. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies the part of the matrix A to be set. +*> = 'U': Upper triangular part is set; the strictly lower +*> triangular part of A is not changed. +*> = 'L': Lower triangular part is set; the strictly upper +*> triangular part of A is not changed. +*> Otherwise: All of the matrix A is set. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION +*> The constant to which the offdiagonal elements are to be set. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION +*> The constant to which the diagonal elements are to be set. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On exit, the leading m-by-n submatrix of A is set as follows: +*> +*> if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n, +*> if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n, +*> otherwise, A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j, +*> +*> and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLASET( UPLO, M, N, ALPHA, BETA, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, M, N + DOUBLE PRECISION ALPHA, BETA +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Set the strictly upper triangular or trapezoidal part of the +* array to ALPHA. +* + DO 20 J = 2, N + DO 10 I = 1, MIN( J-1, M ) + A( I, J ) = ALPHA + 10 CONTINUE + 20 CONTINUE +* + ELSE IF( LSAME( UPLO, 'L' ) ) THEN +* +* Set the strictly lower triangular or trapezoidal part of the +* array to ALPHA. +* + DO 40 J = 1, MIN( M, N ) + DO 30 I = J + 1, M + A( I, J ) = ALPHA + 30 CONTINUE + 40 CONTINUE +* + ELSE +* +* Set the leading m-by-n submatrix to ALPHA. +* + DO 60 J = 1, N + DO 50 I = 1, M + A( I, J ) = ALPHA + 50 CONTINUE + 60 CONTINUE + END IF +* +* Set the first min(M,N) diagonal elements to BETA. +* + DO 70 I = 1, MIN( M, N ) + A( I, I ) = BETA + 70 CONTINUE +* + RETURN +* +* End of DLASET +* + END diff --git a/lib/linalg/dlasq1.f b/lib/linalg/dlasq1.f new file mode 100644 index 0000000000..d12fb7a5fd --- /dev/null +++ b/lib/linalg/dlasq1.f @@ -0,0 +1,224 @@ +*> \brief \b DLASQ1 computes the singular values of a real square bidiagonal matrix. Used by sbdsqr. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASQ1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASQ1( N, D, E, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASQ1 computes the singular values of a real N-by-N bidiagonal +*> matrix with diagonal D and off-diagonal E. The singular values +*> are computed to high relative accuracy, in the absence of +*> denormalization, underflow and overflow. The algorithm was first +*> presented in +*> +*> "Accurate singular values and differential qd algorithms" by K. V. +*> Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230, +*> 1994, +*> +*> and the present implementation is described in "An implementation of +*> the dqds Algorithm (Positive Case)", LAPACK Working Note. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows and columns in the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, D contains the diagonal elements of the +*> bidiagonal matrix whose SVD is desired. On normal exit, +*> D contains the singular values in decreasing order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N) +*> On entry, elements E(1:N-1) contain the off-diagonal elements +*> of the bidiagonal matrix whose SVD is desired. +*> On exit, E is overwritten. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: the algorithm failed +*> = 1, a split was marked by a positive value in E +*> = 2, current block of Z not diagonalized after 100*N +*> iterations (in inner while loop) On exit D and E +*> represent a matrix with the same singular values +*> which the calling subroutine could use to finish the +*> computation, or even feed back into DLASQ1 +*> = 3, termination criterion of outer while loop not met +*> (program created more than N unreduced blocks) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE DLASQ1( N, D, E, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I, IINFO + DOUBLE PRECISION EPS, SCALE, SAFMIN, SIGMN, SIGMX +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLAS2, DLASCL, DLASQ2, DLASRT, XERBLA +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -2 + CALL XERBLA( 'DLASQ1', -INFO ) + RETURN + ELSE IF( N.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN + D( 1 ) = ABS( D( 1 ) ) + RETURN + ELSE IF( N.EQ.2 ) THEN + CALL DLAS2( D( 1 ), E( 1 ), D( 2 ), SIGMN, SIGMX ) + D( 1 ) = SIGMX + D( 2 ) = SIGMN + RETURN + END IF +* +* Estimate the largest singular value. +* + SIGMX = ZERO + DO 10 I = 1, N - 1 + D( I ) = ABS( D( I ) ) + SIGMX = MAX( SIGMX, ABS( E( I ) ) ) + 10 CONTINUE + D( N ) = ABS( D( N ) ) +* +* Early return if SIGMX is zero (matrix is already diagonal). +* + IF( SIGMX.EQ.ZERO ) THEN + CALL DLASRT( 'D', N, D, IINFO ) + RETURN + END IF +* + DO 20 I = 1, N + SIGMX = MAX( SIGMX, D( I ) ) + 20 CONTINUE +* +* Copy D and E into WORK (in the Z format) and scale (squaring the +* input data makes scaling by a power of the radix pointless). +* + EPS = DLAMCH( 'Precision' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SCALE = SQRT( EPS / SAFMIN ) + CALL DCOPY( N, D, 1, WORK( 1 ), 2 ) + CALL DCOPY( N-1, E, 1, WORK( 2 ), 2 ) + CALL DLASCL( 'G', 0, 0, SIGMX, SCALE, 2*N-1, 1, WORK, 2*N-1, + $ IINFO ) +* +* Compute the q's and e's. +* + DO 30 I = 1, 2*N - 1 + WORK( I ) = WORK( I )**2 + 30 CONTINUE + WORK( 2*N ) = ZERO +* + CALL DLASQ2( N, WORK, INFO ) +* + IF( INFO.EQ.0 ) THEN + DO 40 I = 1, N + D( I ) = SQRT( WORK( I ) ) + 40 CONTINUE + CALL DLASCL( 'G', 0, 0, SCALE, SIGMX, N, 1, D, N, IINFO ) + ELSE IF( INFO.EQ.2 ) THEN +* +* Maximum number of iterations exceeded. Move data from WORK +* into D and E so the calling subroutine can try to finish +* + DO I = 1, N + D( I ) = SQRT( WORK( 2*I-1 ) ) + E( I ) = SQRT( WORK( 2*I ) ) + END DO + CALL DLASCL( 'G', 0, 0, SCALE, SIGMX, N, 1, D, N, IINFO ) + CALL DLASCL( 'G', 0, 0, SCALE, SIGMX, N, 1, E, N, IINFO ) + END IF +* + RETURN +* +* End of DLASQ1 +* + END diff --git a/lib/linalg/dlasq2.f b/lib/linalg/dlasq2.f new file mode 100644 index 0000000000..df1690d020 --- /dev/null +++ b/lib/linalg/dlasq2.f @@ -0,0 +1,582 @@ +*> \brief \b DLASQ2 computes all the eigenvalues of the symmetric positive definite tridiagonal matrix associated with the qd Array Z to high relative accuracy. Used by sbdsqr and sstegr. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASQ2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASQ2( N, Z, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASQ2 computes all the eigenvalues of the symmetric positive +*> definite tridiagonal matrix associated with the qd array Z to high +*> relative accuracy are computed to high relative accuracy, in the +*> absence of denormalization, underflow and overflow. +*> +*> To see the relation of Z to the tridiagonal matrix, let L be a +*> unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and +*> let U be an upper bidiagonal matrix with 1's above and diagonal +*> Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the +*> symmetric tridiagonal to which it is similar. +*> +*> Note : DLASQ2 defines a logical variable, IEEE, which is true +*> on machines which follow ieee-754 floating-point standard in their +*> handling of infinities and NaNs, and false otherwise. This variable +*> is passed to DLASQ3. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows and columns in the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension ( 4*N ) +*> On entry Z holds the qd array. On exit, entries 1 to N hold +*> the eigenvalues in decreasing order, Z( 2*N+1 ) holds the +*> trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If +*> N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) +*> holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of +*> shifts that failed. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if the i-th argument is a scalar and had an illegal +*> value, then INFO = -i, if the i-th argument is an +*> array and the j-entry had an illegal value, then +*> INFO = -(i*100+j) +*> > 0: the algorithm failed +*> = 1, a split was marked by a positive value in E +*> = 2, current block of Z not diagonalized after 100*N +*> iterations (in inner while loop). On exit Z holds +*> a qd array with the same eigenvalues as the given Z. +*> = 3, termination criterion of outer while loop not met +*> (program created more than N unreduced blocks) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Local Variables: I0:N0 defines a current unreduced segment of Z. +*> The shifts are accumulated in SIGMA. Iteration count is in ITER. +*> Ping-pong is controlled by PP (alternates between 0 and 1). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLASQ2( N, Z, INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION CBIAS + PARAMETER ( CBIAS = 1.50D0 ) + DOUBLE PRECISION ZERO, HALF, ONE, TWO, FOUR, HUNDRD + PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0, + $ TWO = 2.0D0, FOUR = 4.0D0, HUNDRD = 100.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL IEEE + INTEGER I0, I1, I4, IINFO, IPN4, ITER, IWHILA, IWHILB, + $ K, KMIN, N0, N1, NBIG, NDIV, NFAIL, PP, SPLT, + $ TTYPE + DOUBLE PRECISION D, DEE, DEEMIN, DESIG, DMIN, DMIN1, DMIN2, DN, + $ DN1, DN2, E, EMAX, EMIN, EPS, G, OLDEMN, QMAX, + $ QMIN, S, SAFMIN, SIGMA, T, TAU, TEMP, TOL, + $ TOL2, TRACE, ZMAX, TEMPE, TEMPQ +* .. +* .. External Subroutines .. + EXTERNAL DLASQ3, DLASRT, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH, ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* (in case DLASQ2 is not called by DLASQ1) +* + INFO = 0 + EPS = DLAMCH( 'Precision' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + TOL = EPS*HUNDRD + TOL2 = TOL**2 +* + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'DLASQ2', 1 ) + RETURN + ELSE IF( N.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN +* +* 1-by-1 case. +* + IF( Z( 1 ).LT.ZERO ) THEN + INFO = -201 + CALL XERBLA( 'DLASQ2', 2 ) + END IF + RETURN + ELSE IF( N.EQ.2 ) THEN +* +* 2-by-2 case. +* + IF( Z( 2 ).LT.ZERO .OR. Z( 3 ).LT.ZERO ) THEN + INFO = -2 + CALL XERBLA( 'DLASQ2', 2 ) + RETURN + ELSE IF( Z( 3 ).GT.Z( 1 ) ) THEN + D = Z( 3 ) + Z( 3 ) = Z( 1 ) + Z( 1 ) = D + END IF + Z( 5 ) = Z( 1 ) + Z( 2 ) + Z( 3 ) + IF( Z( 2 ).GT.Z( 3 )*TOL2 ) THEN + T = HALF*( ( Z( 1 )-Z( 3 ) )+Z( 2 ) ) + S = Z( 3 )*( Z( 2 ) / T ) + IF( S.LE.T ) THEN + S = Z( 3 )*( Z( 2 ) / ( T*( ONE+SQRT( ONE+S / T ) ) ) ) + ELSE + S = Z( 3 )*( Z( 2 ) / ( T+SQRT( T )*SQRT( T+S ) ) ) + END IF + T = Z( 1 ) + ( S+Z( 2 ) ) + Z( 3 ) = Z( 3 )*( Z( 1 ) / T ) + Z( 1 ) = T + END IF + Z( 2 ) = Z( 3 ) + Z( 6 ) = Z( 2 ) + Z( 1 ) + RETURN + END IF +* +* Check for negative data and compute sums of q's and e's. +* + Z( 2*N ) = ZERO + EMIN = Z( 2 ) + QMAX = ZERO + ZMAX = ZERO + D = ZERO + E = ZERO +* + DO 10 K = 1, 2*( N-1 ), 2 + IF( Z( K ).LT.ZERO ) THEN + INFO = -( 200+K ) + CALL XERBLA( 'DLASQ2', 2 ) + RETURN + ELSE IF( Z( K+1 ).LT.ZERO ) THEN + INFO = -( 200+K+1 ) + CALL XERBLA( 'DLASQ2', 2 ) + RETURN + END IF + D = D + Z( K ) + E = E + Z( K+1 ) + QMAX = MAX( QMAX, Z( K ) ) + EMIN = MIN( EMIN, Z( K+1 ) ) + ZMAX = MAX( QMAX, ZMAX, Z( K+1 ) ) + 10 CONTINUE + IF( Z( 2*N-1 ).LT.ZERO ) THEN + INFO = -( 200+2*N-1 ) + CALL XERBLA( 'DLASQ2', 2 ) + RETURN + END IF + D = D + Z( 2*N-1 ) + QMAX = MAX( QMAX, Z( 2*N-1 ) ) + ZMAX = MAX( QMAX, ZMAX ) +* +* Check for diagonality. +* + IF( E.EQ.ZERO ) THEN + DO 20 K = 2, N + Z( K ) = Z( 2*K-1 ) + 20 CONTINUE + CALL DLASRT( 'D', N, Z, IINFO ) + Z( 2*N-1 ) = D + RETURN + END IF +* + TRACE = D + E +* +* Check for zero data. +* + IF( TRACE.EQ.ZERO ) THEN + Z( 2*N-1 ) = ZERO + RETURN + END IF +* +* Check whether the machine is IEEE conformable. +* + IEEE = ILAENV( 10, 'DLASQ2', 'N', 1, 2, 3, 4 ).EQ.1 .AND. + $ ILAENV( 11, 'DLASQ2', 'N', 1, 2, 3, 4 ).EQ.1 +* +* Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). +* + DO 30 K = 2*N, 2, -2 + Z( 2*K ) = ZERO + Z( 2*K-1 ) = Z( K ) + Z( 2*K-2 ) = ZERO + Z( 2*K-3 ) = Z( K-1 ) + 30 CONTINUE +* + I0 = 1 + N0 = N +* +* Reverse the qd-array, if warranted. +* + IF( CBIAS*Z( 4*I0-3 ).LT.Z( 4*N0-3 ) ) THEN + IPN4 = 4*( I0+N0 ) + DO 40 I4 = 4*I0, 2*( I0+N0-1 ), 4 + TEMP = Z( I4-3 ) + Z( I4-3 ) = Z( IPN4-I4-3 ) + Z( IPN4-I4-3 ) = TEMP + TEMP = Z( I4-1 ) + Z( I4-1 ) = Z( IPN4-I4-5 ) + Z( IPN4-I4-5 ) = TEMP + 40 CONTINUE + END IF +* +* Initial split checking via dqd and Li's test. +* + PP = 0 +* + DO 80 K = 1, 2 +* + D = Z( 4*N0+PP-3 ) + DO 50 I4 = 4*( N0-1 ) + PP, 4*I0 + PP, -4 + IF( Z( I4-1 ).LE.TOL2*D ) THEN + Z( I4-1 ) = -ZERO + D = Z( I4-3 ) + ELSE + D = Z( I4-3 )*( D / ( D+Z( I4-1 ) ) ) + END IF + 50 CONTINUE +* +* dqd maps Z to ZZ plus Li's test. +* + EMIN = Z( 4*I0+PP+1 ) + D = Z( 4*I0+PP-3 ) + DO 60 I4 = 4*I0 + PP, 4*( N0-1 ) + PP, 4 + Z( I4-2*PP-2 ) = D + Z( I4-1 ) + IF( Z( I4-1 ).LE.TOL2*D ) THEN + Z( I4-1 ) = -ZERO + Z( I4-2*PP-2 ) = D + Z( I4-2*PP ) = ZERO + D = Z( I4+1 ) + ELSE IF( SAFMIN*Z( I4+1 ).LT.Z( I4-2*PP-2 ) .AND. + $ SAFMIN*Z( I4-2*PP-2 ).LT.Z( I4+1 ) ) THEN + TEMP = Z( I4+1 ) / Z( I4-2*PP-2 ) + Z( I4-2*PP ) = Z( I4-1 )*TEMP + D = D*TEMP + ELSE + Z( I4-2*PP ) = Z( I4+1 )*( Z( I4-1 ) / Z( I4-2*PP-2 ) ) + D = Z( I4+1 )*( D / Z( I4-2*PP-2 ) ) + END IF + EMIN = MIN( EMIN, Z( I4-2*PP ) ) + 60 CONTINUE + Z( 4*N0-PP-2 ) = D +* +* Now find qmax. +* + QMAX = Z( 4*I0-PP-2 ) + DO 70 I4 = 4*I0 - PP + 2, 4*N0 - PP - 2, 4 + QMAX = MAX( QMAX, Z( I4 ) ) + 70 CONTINUE +* +* Prepare for the next iteration on K. +* + PP = 1 - PP + 80 CONTINUE +* +* Initialise variables to pass to DLASQ3. +* + TTYPE = 0 + DMIN1 = ZERO + DMIN2 = ZERO + DN = ZERO + DN1 = ZERO + DN2 = ZERO + G = ZERO + TAU = ZERO +* + ITER = 2 + NFAIL = 0 + NDIV = 2*( N0-I0 ) +* + DO 160 IWHILA = 1, N + 1 + IF( N0.LT.1 ) + $ GO TO 170 +* +* While array unfinished do +* +* E(N0) holds the value of SIGMA when submatrix in I0:N0 +* splits from the rest of the array, but is negated. +* + DESIG = ZERO + IF( N0.EQ.N ) THEN + SIGMA = ZERO + ELSE + SIGMA = -Z( 4*N0-1 ) + END IF + IF( SIGMA.LT.ZERO ) THEN + INFO = 1 + RETURN + END IF +* +* Find last unreduced submatrix's top index I0, find QMAX and +* EMIN. Find Gershgorin-type bound if Q's much greater than E's. +* + EMAX = ZERO + IF( N0.GT.I0 ) THEN + EMIN = ABS( Z( 4*N0-5 ) ) + ELSE + EMIN = ZERO + END IF + QMIN = Z( 4*N0-3 ) + QMAX = QMIN + DO 90 I4 = 4*N0, 8, -4 + IF( Z( I4-5 ).LE.ZERO ) + $ GO TO 100 + IF( QMIN.GE.FOUR*EMAX ) THEN + QMIN = MIN( QMIN, Z( I4-3 ) ) + EMAX = MAX( EMAX, Z( I4-5 ) ) + END IF + QMAX = MAX( QMAX, Z( I4-7 )+Z( I4-5 ) ) + EMIN = MIN( EMIN, Z( I4-5 ) ) + 90 CONTINUE + I4 = 4 +* + 100 CONTINUE + I0 = I4 / 4 + PP = 0 +* + IF( N0-I0.GT.1 ) THEN + DEE = Z( 4*I0-3 ) + DEEMIN = DEE + KMIN = I0 + DO 110 I4 = 4*I0+1, 4*N0-3, 4 + DEE = Z( I4 )*( DEE /( DEE+Z( I4-2 ) ) ) + IF( DEE.LE.DEEMIN ) THEN + DEEMIN = DEE + KMIN = ( I4+3 )/4 + END IF + 110 CONTINUE + IF( (KMIN-I0)*2.LT.N0-KMIN .AND. + $ DEEMIN.LE.HALF*Z(4*N0-3) ) THEN + IPN4 = 4*( I0+N0 ) + PP = 2 + DO 120 I4 = 4*I0, 2*( I0+N0-1 ), 4 + TEMP = Z( I4-3 ) + Z( I4-3 ) = Z( IPN4-I4-3 ) + Z( IPN4-I4-3 ) = TEMP + TEMP = Z( I4-2 ) + Z( I4-2 ) = Z( IPN4-I4-2 ) + Z( IPN4-I4-2 ) = TEMP + TEMP = Z( I4-1 ) + Z( I4-1 ) = Z( IPN4-I4-5 ) + Z( IPN4-I4-5 ) = TEMP + TEMP = Z( I4 ) + Z( I4 ) = Z( IPN4-I4-4 ) + Z( IPN4-I4-4 ) = TEMP + 120 CONTINUE + END IF + END IF +* +* Put -(initial shift) into DMIN. +* + DMIN = -MAX( ZERO, QMIN-TWO*SQRT( QMIN )*SQRT( EMAX ) ) +* +* Now I0:N0 is unreduced. +* PP = 0 for ping, PP = 1 for pong. +* PP = 2 indicates that flipping was applied to the Z array and +* and that the tests for deflation upon entry in DLASQ3 +* should not be performed. +* + NBIG = 100*( N0-I0+1 ) + DO 140 IWHILB = 1, NBIG + IF( I0.GT.N0 ) + $ GO TO 150 +* +* While submatrix unfinished take a good dqds step. +* + CALL DLASQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL, + $ ITER, NDIV, IEEE, TTYPE, DMIN1, DMIN2, DN, DN1, + $ DN2, G, TAU ) +* + PP = 1 - PP +* +* When EMIN is very small check for splits. +* + IF( PP.EQ.0 .AND. N0-I0.GE.3 ) THEN + IF( Z( 4*N0 ).LE.TOL2*QMAX .OR. + $ Z( 4*N0-1 ).LE.TOL2*SIGMA ) THEN + SPLT = I0 - 1 + QMAX = Z( 4*I0-3 ) + EMIN = Z( 4*I0-1 ) + OLDEMN = Z( 4*I0 ) + DO 130 I4 = 4*I0, 4*( N0-3 ), 4 + IF( Z( I4 ).LE.TOL2*Z( I4-3 ) .OR. + $ Z( I4-1 ).LE.TOL2*SIGMA ) THEN + Z( I4-1 ) = -SIGMA + SPLT = I4 / 4 + QMAX = ZERO + EMIN = Z( I4+3 ) + OLDEMN = Z( I4+4 ) + ELSE + QMAX = MAX( QMAX, Z( I4+1 ) ) + EMIN = MIN( EMIN, Z( I4-1 ) ) + OLDEMN = MIN( OLDEMN, Z( I4 ) ) + END IF + 130 CONTINUE + Z( 4*N0-1 ) = EMIN + Z( 4*N0 ) = OLDEMN + I0 = SPLT + 1 + END IF + END IF +* + 140 CONTINUE +* + INFO = 2 +* +* Maximum number of iterations exceeded, restore the shift +* SIGMA and place the new d's and e's in a qd array. +* This might need to be done for several blocks +* + I1 = I0 + N1 = N0 + 145 CONTINUE + TEMPQ = Z( 4*I0-3 ) + Z( 4*I0-3 ) = Z( 4*I0-3 ) + SIGMA + DO K = I0+1, N0 + TEMPE = Z( 4*K-5 ) + Z( 4*K-5 ) = Z( 4*K-5 ) * (TEMPQ / Z( 4*K-7 )) + TEMPQ = Z( 4*K-3 ) + Z( 4*K-3 ) = Z( 4*K-3 ) + SIGMA + TEMPE - Z( 4*K-5 ) + END DO +* +* Prepare to do this on the previous block if there is one +* + IF( I1.GT.1 ) THEN + N1 = I1-1 + DO WHILE( ( I1.GE.2 ) .AND. ( Z(4*I1-5).GE.ZERO ) ) + I1 = I1 - 1 + END DO + SIGMA = -Z(4*N1-1) + GO TO 145 + END IF + + DO K = 1, N + Z( 2*K-1 ) = Z( 4*K-3 ) +* +* Only the block 1..N0 is unfinished. The rest of the e's +* must be essentially zero, although sometimes other data +* has been stored in them. +* + IF( K.LT.N0 ) THEN + Z( 2*K ) = Z( 4*K-1 ) + ELSE + Z( 2*K ) = 0 + END IF + END DO + RETURN +* +* end IWHILB +* + 150 CONTINUE +* + 160 CONTINUE +* + INFO = 3 + RETURN +* +* end IWHILA +* + 170 CONTINUE +* +* Move q's to the front. +* + DO 180 K = 2, N + Z( K ) = Z( 4*K-3 ) + 180 CONTINUE +* +* Sort and compute sum of eigenvalues. +* + CALL DLASRT( 'D', N, Z, IINFO ) +* + E = ZERO + DO 190 K = N, 1, -1 + E = E + Z( K ) + 190 CONTINUE +* +* Store trace, sum(eigenvalues) and information on performance. +* + Z( 2*N+1 ) = TRACE + Z( 2*N+2 ) = E + Z( 2*N+3 ) = DBLE( ITER ) + Z( 2*N+4 ) = DBLE( NDIV ) / DBLE( N**2 ) + Z( 2*N+5 ) = HUNDRD*NFAIL / DBLE( ITER ) + RETURN +* +* End of DLASQ2 +* + END diff --git a/lib/linalg/dlasq3.f b/lib/linalg/dlasq3.f new file mode 100644 index 0000000000..d49d1c5939 --- /dev/null +++ b/lib/linalg/dlasq3.f @@ -0,0 +1,421 @@ +*> \brief \b DLASQ3 checks for deflation, computes a shift and calls dqds. Used by sbdsqr. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASQ3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL, +* ITER, NDIV, IEEE, TTYPE, DMIN1, DMIN2, DN, DN1, +* DN2, G, TAU ) +* +* .. Scalar Arguments .. +* LOGICAL IEEE +* INTEGER I0, ITER, N0, NDIV, NFAIL, PP +* DOUBLE PRECISION DESIG, DMIN, DMIN1, DMIN2, DN, DN1, DN2, G, +* $ QMAX, SIGMA, TAU +* .. +* .. Array Arguments .. +* DOUBLE PRECISION Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASQ3 checks for deflation, computes a shift (TAU) and calls dqds. +*> In case of failure it changes shifts, and tries again until output +*> is positive. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] I0 +*> \verbatim +*> I0 is INTEGER +*> First index. +*> \endverbatim +*> +*> \param[in,out] N0 +*> \verbatim +*> N0 is INTEGER +*> Last index. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension ( 4*N ) +*> Z holds the qd array. +*> \endverbatim +*> +*> \param[in,out] PP +*> \verbatim +*> PP is INTEGER +*> PP=0 for ping, PP=1 for pong. +*> PP=2 indicates that flipping was applied to the Z array +*> and that the initial tests for deflation should not be +*> performed. +*> \endverbatim +*> +*> \param[out] DMIN +*> \verbatim +*> DMIN is DOUBLE PRECISION +*> Minimum value of d. +*> \endverbatim +*> +*> \param[out] SIGMA +*> \verbatim +*> SIGMA is DOUBLE PRECISION +*> Sum of shifts used in current segment. +*> \endverbatim +*> +*> \param[in,out] DESIG +*> \verbatim +*> DESIG is DOUBLE PRECISION +*> Lower order part of SIGMA +*> \endverbatim +*> +*> \param[in] QMAX +*> \verbatim +*> QMAX is DOUBLE PRECISION +*> Maximum value of q. +*> \endverbatim +*> +*> \param[out] NFAIL +*> \verbatim +*> NFAIL is INTEGER +*> Number of times shift was too big. +*> \endverbatim +*> +*> \param[out] ITER +*> \verbatim +*> ITER is INTEGER +*> Number of iterations. +*> \endverbatim +*> +*> \param[out] NDIV +*> \verbatim +*> NDIV is INTEGER +*> Number of divisions. +*> \endverbatim +*> +*> \param[in] IEEE +*> \verbatim +*> IEEE is LOGICAL +*> Flag for IEEE or non IEEE arithmetic (passed to DLASQ5). +*> \endverbatim +*> +*> \param[in,out] TTYPE +*> \verbatim +*> TTYPE is INTEGER +*> Shift type. +*> \endverbatim +*> +*> \param[in,out] DMIN1 +*> \verbatim +*> DMIN1 is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in,out] DMIN2 +*> \verbatim +*> DMIN2 is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in,out] DN +*> \verbatim +*> DN is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in,out] DN1 +*> \verbatim +*> DN1 is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in,out] DN2 +*> \verbatim +*> DN2 is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in,out] G +*> \verbatim +*> G is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in,out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION +*> +*> These are passed as arguments in order to save their values +*> between calls to DLASQ3. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE DLASQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL, + $ ITER, NDIV, IEEE, TTYPE, DMIN1, DMIN2, DN, DN1, + $ DN2, G, TAU ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + LOGICAL IEEE + INTEGER I0, ITER, N0, NDIV, NFAIL, PP + DOUBLE PRECISION DESIG, DMIN, DMIN1, DMIN2, DN, DN1, DN2, G, + $ QMAX, SIGMA, TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION CBIAS + PARAMETER ( CBIAS = 1.50D0 ) + DOUBLE PRECISION ZERO, QURTR, HALF, ONE, TWO, HUNDRD + PARAMETER ( ZERO = 0.0D0, QURTR = 0.250D0, HALF = 0.5D0, + $ ONE = 1.0D0, TWO = 2.0D0, HUNDRD = 100.0D0 ) +* .. +* .. Local Scalars .. + INTEGER IPN4, J4, N0IN, NN, TTYPE + DOUBLE PRECISION EPS, S, T, TEMP, TOL, TOL2 +* .. +* .. External Subroutines .. + EXTERNAL DLASQ4, DLASQ5, DLASQ6 +* .. +* .. External Function .. + DOUBLE PRECISION DLAMCH + LOGICAL DISNAN + EXTERNAL DISNAN, DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + N0IN = N0 + EPS = DLAMCH( 'Precision' ) + TOL = EPS*HUNDRD + TOL2 = TOL**2 +* +* Check for deflation. +* + 10 CONTINUE +* + IF( N0.LT.I0 ) + $ RETURN + IF( N0.EQ.I0 ) + $ GO TO 20 + NN = 4*N0 + PP + IF( N0.EQ.( I0+1 ) ) + $ GO TO 40 +* +* Check whether E(N0-1) is negligible, 1 eigenvalue. +* + IF( Z( NN-5 ).GT.TOL2*( SIGMA+Z( NN-3 ) ) .AND. + $ Z( NN-2*PP-4 ).GT.TOL2*Z( NN-7 ) ) + $ GO TO 30 +* + 20 CONTINUE +* + Z( 4*N0-3 ) = Z( 4*N0+PP-3 ) + SIGMA + N0 = N0 - 1 + GO TO 10 +* +* Check whether E(N0-2) is negligible, 2 eigenvalues. +* + 30 CONTINUE +* + IF( Z( NN-9 ).GT.TOL2*SIGMA .AND. + $ Z( NN-2*PP-8 ).GT.TOL2*Z( NN-11 ) ) + $ GO TO 50 +* + 40 CONTINUE +* + IF( Z( NN-3 ).GT.Z( NN-7 ) ) THEN + S = Z( NN-3 ) + Z( NN-3 ) = Z( NN-7 ) + Z( NN-7 ) = S + END IF + T = HALF*( ( Z( NN-7 )-Z( NN-3 ) )+Z( NN-5 ) ) + IF( Z( NN-5 ).GT.Z( NN-3 )*TOL2.AND.T.NE.ZERO ) THEN + S = Z( NN-3 )*( Z( NN-5 ) / T ) + IF( S.LE.T ) THEN + S = Z( NN-3 )*( Z( NN-5 ) / + $ ( T*( ONE+SQRT( ONE+S / T ) ) ) ) + ELSE + S = Z( NN-3 )*( Z( NN-5 ) / ( T+SQRT( T )*SQRT( T+S ) ) ) + END IF + T = Z( NN-7 ) + ( S+Z( NN-5 ) ) + Z( NN-3 ) = Z( NN-3 )*( Z( NN-7 ) / T ) + Z( NN-7 ) = T + END IF + Z( 4*N0-7 ) = Z( NN-7 ) + SIGMA + Z( 4*N0-3 ) = Z( NN-3 ) + SIGMA + N0 = N0 - 2 + GO TO 10 +* + 50 CONTINUE + IF( PP.EQ.2 ) + $ PP = 0 +* +* Reverse the qd-array, if warranted. +* + IF( DMIN.LE.ZERO .OR. N0.LT.N0IN ) THEN + IF( CBIAS*Z( 4*I0+PP-3 ).LT.Z( 4*N0+PP-3 ) ) THEN + IPN4 = 4*( I0+N0 ) + DO 60 J4 = 4*I0, 2*( I0+N0-1 ), 4 + TEMP = Z( J4-3 ) + Z( J4-3 ) = Z( IPN4-J4-3 ) + Z( IPN4-J4-3 ) = TEMP + TEMP = Z( J4-2 ) + Z( J4-2 ) = Z( IPN4-J4-2 ) + Z( IPN4-J4-2 ) = TEMP + TEMP = Z( J4-1 ) + Z( J4-1 ) = Z( IPN4-J4-5 ) + Z( IPN4-J4-5 ) = TEMP + TEMP = Z( J4 ) + Z( J4 ) = Z( IPN4-J4-4 ) + Z( IPN4-J4-4 ) = TEMP + 60 CONTINUE + IF( N0-I0.LE.4 ) THEN + Z( 4*N0+PP-1 ) = Z( 4*I0+PP-1 ) + Z( 4*N0-PP ) = Z( 4*I0-PP ) + END IF + DMIN2 = MIN( DMIN2, Z( 4*N0+PP-1 ) ) + Z( 4*N0+PP-1 ) = MIN( Z( 4*N0+PP-1 ), Z( 4*I0+PP-1 ), + $ Z( 4*I0+PP+3 ) ) + Z( 4*N0-PP ) = MIN( Z( 4*N0-PP ), Z( 4*I0-PP ), + $ Z( 4*I0-PP+4 ) ) + QMAX = MAX( QMAX, Z( 4*I0+PP-3 ), Z( 4*I0+PP+1 ) ) + DMIN = -ZERO + END IF + END IF +* +* Choose a shift. +* + CALL DLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN, DN1, + $ DN2, TAU, TTYPE, G ) +* +* Call dqds until DMIN > 0. +* + 70 CONTINUE +* + CALL DLASQ5( I0, N0, Z, PP, TAU, SIGMA, DMIN, DMIN1, DMIN2, DN, + $ DN1, DN2, IEEE, EPS ) +* + NDIV = NDIV + ( N0-I0+2 ) + ITER = ITER + 1 +* +* Check status. +* + IF( DMIN.GE.ZERO .AND. DMIN1.GE.ZERO ) THEN +* +* Success. +* + GO TO 90 +* + ELSE IF( DMIN.LT.ZERO .AND. DMIN1.GT.ZERO .AND. + $ Z( 4*( N0-1 )-PP ).LT.TOL*( SIGMA+DN1 ) .AND. + $ ABS( DN ).LT.TOL*SIGMA ) THEN +* +* Convergence hidden by negative DN. +* + Z( 4*( N0-1 )-PP+2 ) = ZERO + DMIN = ZERO + GO TO 90 + ELSE IF( DMIN.LT.ZERO ) THEN +* +* TAU too big. Select new TAU and try again. +* + NFAIL = NFAIL + 1 + IF( TTYPE.LT.-22 ) THEN +* +* Failed twice. Play it safe. +* + TAU = ZERO + ELSE IF( DMIN1.GT.ZERO ) THEN +* +* Late failure. Gives excellent shift. +* + TAU = ( TAU+DMIN )*( ONE-TWO*EPS ) + TTYPE = TTYPE - 11 + ELSE +* +* Early failure. Divide by 4. +* + TAU = QURTR*TAU + TTYPE = TTYPE - 12 + END IF + GO TO 70 + ELSE IF( DISNAN( DMIN ) ) THEN +* +* NaN. +* + IF( TAU.EQ.ZERO ) THEN + GO TO 80 + ELSE + TAU = ZERO + GO TO 70 + END IF + ELSE +* +* Possible underflow. Play it safe. +* + GO TO 80 + END IF +* +* Risk of underflow. +* + 80 CONTINUE + CALL DLASQ6( I0, N0, Z, PP, DMIN, DMIN1, DMIN2, DN, DN1, DN2 ) + NDIV = NDIV + ( N0-I0+2 ) + ITER = ITER + 1 + TAU = ZERO +* + 90 CONTINUE + IF( TAU.LT.SIGMA ) THEN + DESIG = DESIG + TAU + T = SIGMA + DESIG + DESIG = DESIG - ( T-SIGMA ) + ELSE + T = SIGMA + TAU + DESIG = SIGMA - ( T-TAU ) + DESIG + END IF + SIGMA = T +* + RETURN +* +* End of DLASQ3 +* + END diff --git a/lib/linalg/dlasq4.f b/lib/linalg/dlasq4.f new file mode 100644 index 0000000000..97d9bdeba3 --- /dev/null +++ b/lib/linalg/dlasq4.f @@ -0,0 +1,425 @@ +*> \brief \b DLASQ4 computes an approximation to the smallest eigenvalue using values of d from the previous transform. Used by sbdsqr. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASQ4 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN, +* DN1, DN2, TAU, TTYPE, G ) +* +* .. Scalar Arguments .. +* INTEGER I0, N0, N0IN, PP, TTYPE +* DOUBLE PRECISION DMIN, DMIN1, DMIN2, DN, DN1, DN2, G, TAU +* .. +* .. Array Arguments .. +* DOUBLE PRECISION Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASQ4 computes an approximation TAU to the smallest eigenvalue +*> using values of d from the previous transform. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] I0 +*> \verbatim +*> I0 is INTEGER +*> First index. +*> \endverbatim +*> +*> \param[in] N0 +*> \verbatim +*> N0 is INTEGER +*> Last index. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension ( 4*N ) +*> Z holds the qd array. +*> \endverbatim +*> +*> \param[in] PP +*> \verbatim +*> PP is INTEGER +*> PP=0 for ping, PP=1 for pong. +*> \endverbatim +*> +*> \param[in] N0IN +*> \verbatim +*> N0IN is INTEGER +*> The value of N0 at start of EIGTEST. +*> \endverbatim +*> +*> \param[in] DMIN +*> \verbatim +*> DMIN is DOUBLE PRECISION +*> Minimum value of d. +*> \endverbatim +*> +*> \param[in] DMIN1 +*> \verbatim +*> DMIN1 is DOUBLE PRECISION +*> Minimum value of d, excluding D( N0 ). +*> \endverbatim +*> +*> \param[in] DMIN2 +*> \verbatim +*> DMIN2 is DOUBLE PRECISION +*> Minimum value of d, excluding D( N0 ) and D( N0-1 ). +*> \endverbatim +*> +*> \param[in] DN +*> \verbatim +*> DN is DOUBLE PRECISION +*> d(N) +*> \endverbatim +*> +*> \param[in] DN1 +*> \verbatim +*> DN1 is DOUBLE PRECISION +*> d(N-1) +*> \endverbatim +*> +*> \param[in] DN2 +*> \verbatim +*> DN2 is DOUBLE PRECISION +*> d(N-2) +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION +*> This is the shift. +*> \endverbatim +*> +*> \param[out] TTYPE +*> \verbatim +*> TTYPE is INTEGER +*> Shift type. +*> \endverbatim +*> +*> \param[in,out] G +*> \verbatim +*> G is REAL +*> G is passed as an argument in order to save its value between +*> calls to DLASQ4. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> CNST1 = 9/16 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN, + $ DN1, DN2, TAU, TTYPE, G ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER I0, N0, N0IN, PP, TTYPE + DOUBLE PRECISION DMIN, DMIN1, DMIN2, DN, DN1, DN2, G, TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION CNST1, CNST2, CNST3 + PARAMETER ( CNST1 = 0.5630D0, CNST2 = 1.010D0, + $ CNST3 = 1.050D0 ) + DOUBLE PRECISION QURTR, THIRD, HALF, ZERO, ONE, TWO, HUNDRD + PARAMETER ( QURTR = 0.250D0, THIRD = 0.3330D0, + $ HALF = 0.50D0, ZERO = 0.0D0, ONE = 1.0D0, + $ TWO = 2.0D0, HUNDRD = 100.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I4, NN, NP + DOUBLE PRECISION A2, B1, B2, GAM, GAP1, GAP2, S +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* A negative DMIN forces the shift to take that absolute value +* TTYPE records the type of shift. +* + IF( DMIN.LE.ZERO ) THEN + TAU = -DMIN + TTYPE = -1 + RETURN + END IF +* + NN = 4*N0 + PP + IF( N0IN.EQ.N0 ) THEN +* +* No eigenvalues deflated. +* + IF( DMIN.EQ.DN .OR. DMIN.EQ.DN1 ) THEN +* + B1 = SQRT( Z( NN-3 ) )*SQRT( Z( NN-5 ) ) + B2 = SQRT( Z( NN-7 ) )*SQRT( Z( NN-9 ) ) + A2 = Z( NN-7 ) + Z( NN-5 ) +* +* Cases 2 and 3. +* + IF( DMIN.EQ.DN .AND. DMIN1.EQ.DN1 ) THEN + GAP2 = DMIN2 - A2 - DMIN2*QURTR + IF( GAP2.GT.ZERO .AND. GAP2.GT.B2 ) THEN + GAP1 = A2 - DN - ( B2 / GAP2 )*B2 + ELSE + GAP1 = A2 - DN - ( B1+B2 ) + END IF + IF( GAP1.GT.ZERO .AND. GAP1.GT.B1 ) THEN + S = MAX( DN-( B1 / GAP1 )*B1, HALF*DMIN ) + TTYPE = -2 + ELSE + S = ZERO + IF( DN.GT.B1 ) + $ S = DN - B1 + IF( A2.GT.( B1+B2 ) ) + $ S = MIN( S, A2-( B1+B2 ) ) + S = MAX( S, THIRD*DMIN ) + TTYPE = -3 + END IF + ELSE +* +* Case 4. +* + TTYPE = -4 + S = QURTR*DMIN + IF( DMIN.EQ.DN ) THEN + GAM = DN + A2 = ZERO + IF( Z( NN-5 ) .GT. Z( NN-7 ) ) + $ RETURN + B2 = Z( NN-5 ) / Z( NN-7 ) + NP = NN - 9 + ELSE + NP = NN - 2*PP + B2 = Z( NP-2 ) + GAM = DN1 + IF( Z( NP-4 ) .GT. Z( NP-2 ) ) + $ RETURN + A2 = Z( NP-4 ) / Z( NP-2 ) + IF( Z( NN-9 ) .GT. Z( NN-11 ) ) + $ RETURN + B2 = Z( NN-9 ) / Z( NN-11 ) + NP = NN - 13 + END IF +* +* Approximate contribution to norm squared from I < NN-1. +* + A2 = A2 + B2 + DO 10 I4 = NP, 4*I0 - 1 + PP, -4 + IF( B2.EQ.ZERO ) + $ GO TO 20 + B1 = B2 + IF( Z( I4 ) .GT. Z( I4-2 ) ) + $ RETURN + B2 = B2*( Z( I4 ) / Z( I4-2 ) ) + A2 = A2 + B2 + IF( HUNDRD*MAX( B2, B1 ).LT.A2 .OR. CNST1.LT.A2 ) + $ GO TO 20 + 10 CONTINUE + 20 CONTINUE + A2 = CNST3*A2 +* +* Rayleigh quotient residual bound. +* + IF( A2.LT.CNST1 ) + $ S = GAM*( ONE-SQRT( A2 ) ) / ( ONE+A2 ) + END IF + ELSE IF( DMIN.EQ.DN2 ) THEN +* +* Case 5. +* + TTYPE = -5 + S = QURTR*DMIN +* +* Compute contribution to norm squared from I > NN-2. +* + NP = NN - 2*PP + B1 = Z( NP-2 ) + B2 = Z( NP-6 ) + GAM = DN2 + IF( Z( NP-8 ).GT.B2 .OR. Z( NP-4 ).GT.B1 ) + $ RETURN + A2 = ( Z( NP-8 ) / B2 )*( ONE+Z( NP-4 ) / B1 ) +* +* Approximate contribution to norm squared from I < NN-2. +* + IF( N0-I0.GT.2 ) THEN + B2 = Z( NN-13 ) / Z( NN-15 ) + A2 = A2 + B2 + DO 30 I4 = NN - 17, 4*I0 - 1 + PP, -4 + IF( B2.EQ.ZERO ) + $ GO TO 40 + B1 = B2 + IF( Z( I4 ) .GT. Z( I4-2 ) ) + $ RETURN + B2 = B2*( Z( I4 ) / Z( I4-2 ) ) + A2 = A2 + B2 + IF( HUNDRD*MAX( B2, B1 ).LT.A2 .OR. CNST1.LT.A2 ) + $ GO TO 40 + 30 CONTINUE + 40 CONTINUE + A2 = CNST3*A2 + END IF +* + IF( A2.LT.CNST1 ) + $ S = GAM*( ONE-SQRT( A2 ) ) / ( ONE+A2 ) + ELSE +* +* Case 6, no information to guide us. +* + IF( TTYPE.EQ.-6 ) THEN + G = G + THIRD*( ONE-G ) + ELSE IF( TTYPE.EQ.-18 ) THEN + G = QURTR*THIRD + ELSE + G = QURTR + END IF + S = G*DMIN + TTYPE = -6 + END IF +* + ELSE IF( N0IN.EQ.( N0+1 ) ) THEN +* +* One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. +* + IF( DMIN1.EQ.DN1 .AND. DMIN2.EQ.DN2 ) THEN +* +* Cases 7 and 8. +* + TTYPE = -7 + S = THIRD*DMIN1 + IF( Z( NN-5 ).GT.Z( NN-7 ) ) + $ RETURN + B1 = Z( NN-5 ) / Z( NN-7 ) + B2 = B1 + IF( B2.EQ.ZERO ) + $ GO TO 60 + DO 50 I4 = 4*N0 - 9 + PP, 4*I0 - 1 + PP, -4 + A2 = B1 + IF( Z( I4 ).GT.Z( I4-2 ) ) + $ RETURN + B1 = B1*( Z( I4 ) / Z( I4-2 ) ) + B2 = B2 + B1 + IF( HUNDRD*MAX( B1, A2 ).LT.B2 ) + $ GO TO 60 + 50 CONTINUE + 60 CONTINUE + B2 = SQRT( CNST3*B2 ) + A2 = DMIN1 / ( ONE+B2**2 ) + GAP2 = HALF*DMIN2 - A2 + IF( GAP2.GT.ZERO .AND. GAP2.GT.B2*A2 ) THEN + S = MAX( S, A2*( ONE-CNST2*A2*( B2 / GAP2 )*B2 ) ) + ELSE + S = MAX( S, A2*( ONE-CNST2*B2 ) ) + TTYPE = -8 + END IF + ELSE +* +* Case 9. +* + S = QURTR*DMIN1 + IF( DMIN1.EQ.DN1 ) + $ S = HALF*DMIN1 + TTYPE = -9 + END IF +* + ELSE IF( N0IN.EQ.( N0+2 ) ) THEN +* +* Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. +* +* Cases 10 and 11. +* + IF( DMIN2.EQ.DN2 .AND. TWO*Z( NN-5 ).LT.Z( NN-7 ) ) THEN + TTYPE = -10 + S = THIRD*DMIN2 + IF( Z( NN-5 ).GT.Z( NN-7 ) ) + $ RETURN + B1 = Z( NN-5 ) / Z( NN-7 ) + B2 = B1 + IF( B2.EQ.ZERO ) + $ GO TO 80 + DO 70 I4 = 4*N0 - 9 + PP, 4*I0 - 1 + PP, -4 + IF( Z( I4 ).GT.Z( I4-2 ) ) + $ RETURN + B1 = B1*( Z( I4 ) / Z( I4-2 ) ) + B2 = B2 + B1 + IF( HUNDRD*B1.LT.B2 ) + $ GO TO 80 + 70 CONTINUE + 80 CONTINUE + B2 = SQRT( CNST3*B2 ) + A2 = DMIN2 / ( ONE+B2**2 ) + GAP2 = Z( NN-7 ) + Z( NN-9 ) - + $ SQRT( Z( NN-11 ) )*SQRT( Z( NN-9 ) ) - A2 + IF( GAP2.GT.ZERO .AND. GAP2.GT.B2*A2 ) THEN + S = MAX( S, A2*( ONE-CNST2*A2*( B2 / GAP2 )*B2 ) ) + ELSE + S = MAX( S, A2*( ONE-CNST2*B2 ) ) + END IF + ELSE + S = QURTR*DMIN2 + TTYPE = -11 + END IF + ELSE IF( N0IN.GT.( N0+2 ) ) THEN +* +* Case 12, more than two eigenvalues deflated. No information. +* + S = ZERO + TTYPE = -12 + END IF +* + TAU = S + RETURN +* +* End of DLASQ4 +* + END diff --git a/lib/linalg/dlasq5.f b/lib/linalg/dlasq5.f new file mode 100644 index 0000000000..cdd8cf1ae3 --- /dev/null +++ b/lib/linalg/dlasq5.f @@ -0,0 +1,410 @@ +*> \brief \b DLASQ5 computes one dqds transform in ping-pong form. Used by sbdsqr and sstegr. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASQ5 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASQ5( I0, N0, Z, PP, TAU, SIGMA, DMIN, DMIN1, DMIN2, DN, +* DNM1, DNM2, IEEE, EPS ) +* +* .. Scalar Arguments .. +* LOGICAL IEEE +* INTEGER I0, N0, PP +* DOUBLE PRECISION DMIN, DMIN1, DMIN2, DN, DNM1, DNM2, TAU, SIGMA, EPS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASQ5 computes one dqds transform in ping-pong form, one +*> version for IEEE machines another for non IEEE machines. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] I0 +*> \verbatim +*> I0 is INTEGER +*> First index. +*> \endverbatim +*> +*> \param[in] N0 +*> \verbatim +*> N0 is INTEGER +*> Last index. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension ( 4*N ) +*> Z holds the qd array. EMIN is stored in Z(4*N0) to avoid +*> an extra argument. +*> \endverbatim +*> +*> \param[in] PP +*> \verbatim +*> PP is INTEGER +*> PP=0 for ping, PP=1 for pong. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION +*> This is the shift. +*> \endverbatim +*> +*> \param[in] SIGMA +*> \verbatim +*> SIGMA is DOUBLE PRECISION +*> This is the accumulated shift up to this step. +*> \endverbatim +*> +*> \param[out] DMIN +*> \verbatim +*> DMIN is DOUBLE PRECISION +*> Minimum value of d. +*> \endverbatim +*> +*> \param[out] DMIN1 +*> \verbatim +*> DMIN1 is DOUBLE PRECISION +*> Minimum value of d, excluding D( N0 ). +*> \endverbatim +*> +*> \param[out] DMIN2 +*> \verbatim +*> DMIN2 is DOUBLE PRECISION +*> Minimum value of d, excluding D( N0 ) and D( N0-1 ). +*> \endverbatim +*> +*> \param[out] DN +*> \verbatim +*> DN is DOUBLE PRECISION +*> d(N0), the last value of d. +*> \endverbatim +*> +*> \param[out] DNM1 +*> \verbatim +*> DNM1 is DOUBLE PRECISION +*> d(N0-1). +*> \endverbatim +*> +*> \param[out] DNM2 +*> \verbatim +*> DNM2 is DOUBLE PRECISION +*> d(N0-2). +*> \endverbatim +*> +*> \param[in] IEEE +*> \verbatim +*> IEEE is LOGICAL +*> Flag for IEEE or non IEEE arithmetic. +*> \endverbatim +* +*> \param[in] EPS +*> \verbatim +*> EPS is DOUBLE PRECISION +*> This is the value of epsilon used. +*> \endverbatim +*> +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE DLASQ5( I0, N0, Z, PP, TAU, SIGMA, DMIN, DMIN1, DMIN2, + $ DN, DNM1, DNM2, IEEE, EPS ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + LOGICAL IEEE + INTEGER I0, N0, PP + DOUBLE PRECISION DMIN, DMIN1, DMIN2, DN, DNM1, DNM2, TAU, + $ SIGMA, EPS +* .. +* .. Array Arguments .. + DOUBLE PRECISION Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameter .. + DOUBLE PRECISION ZERO, HALF + PARAMETER ( ZERO = 0.0D0, HALF = 0.5 ) +* .. +* .. Local Scalars .. + INTEGER J4, J4P2 + DOUBLE PRECISION D, EMIN, TEMP, DTHRESH +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( ( N0-I0-1 ).LE.0 ) + $ RETURN +* + DTHRESH = EPS*(SIGMA+TAU) + IF( TAU.LT.DTHRESH*HALF ) TAU = ZERO + IF( TAU.NE.ZERO ) THEN + J4 = 4*I0 + PP - 3 + EMIN = Z( J4+4 ) + D = Z( J4 ) - TAU + DMIN = D + DMIN1 = -Z( J4 ) +* + IF( IEEE ) THEN +* +* Code for IEEE arithmetic. +* + IF( PP.EQ.0 ) THEN + DO 10 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-2 ) = D + Z( J4-1 ) + TEMP = Z( J4+1 ) / Z( J4-2 ) + D = D*TEMP - TAU + DMIN = MIN( DMIN, D ) + Z( J4 ) = Z( J4-1 )*TEMP + EMIN = MIN( Z( J4 ), EMIN ) + 10 CONTINUE + ELSE + DO 20 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-3 ) = D + Z( J4 ) + TEMP = Z( J4+2 ) / Z( J4-3 ) + D = D*TEMP - TAU + DMIN = MIN( DMIN, D ) + Z( J4-1 ) = Z( J4 )*TEMP + EMIN = MIN( Z( J4-1 ), EMIN ) + 20 CONTINUE + END IF +* +* Unroll last two steps. +* + DNM2 = D + DMIN2 = DMIN + J4 = 4*( N0-2 ) - PP + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM2 + Z( J4P2 ) + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) - TAU + DMIN = MIN( DMIN, DNM1 ) +* + DMIN1 = DMIN + J4 = J4 + 4 + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM1 + Z( J4P2 ) + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) - TAU + DMIN = MIN( DMIN, DN ) +* + ELSE +* +* Code for non IEEE arithmetic. +* + IF( PP.EQ.0 ) THEN + DO 30 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-2 ) = D + Z( J4-1 ) + IF( D.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4+1 )*( Z( J4-1 ) / Z( J4-2 ) ) + D = Z( J4+1 )*( D / Z( J4-2 ) ) - TAU + END IF + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4 ) ) + 30 CONTINUE + ELSE + DO 40 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-3 ) = D + Z( J4 ) + IF( D.LT.ZERO ) THEN + RETURN + ELSE + Z( J4-1 ) = Z( J4+2 )*( Z( J4 ) / Z( J4-3 ) ) + D = Z( J4+2 )*( D / Z( J4-3 ) ) - TAU + END IF + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4-1 ) ) + 40 CONTINUE + END IF +* +* Unroll last two steps. +* + DNM2 = D + DMIN2 = DMIN + J4 = 4*( N0-2 ) - PP + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM2 + Z( J4P2 ) + IF( DNM2.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) - TAU + END IF + DMIN = MIN( DMIN, DNM1 ) +* + DMIN1 = DMIN + J4 = J4 + 4 + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM1 + Z( J4P2 ) + IF( DNM1.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) - TAU + END IF + DMIN = MIN( DMIN, DN ) +* + END IF + ELSE +* This is the version that sets d's to zero if they are small enough + J4 = 4*I0 + PP - 3 + EMIN = Z( J4+4 ) + D = Z( J4 ) - TAU + DMIN = D + DMIN1 = -Z( J4 ) + IF( IEEE ) THEN +* +* Code for IEEE arithmetic. +* + IF( PP.EQ.0 ) THEN + DO 50 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-2 ) = D + Z( J4-1 ) + TEMP = Z( J4+1 ) / Z( J4-2 ) + D = D*TEMP - TAU + IF( D.LT.DTHRESH ) D = ZERO + DMIN = MIN( DMIN, D ) + Z( J4 ) = Z( J4-1 )*TEMP + EMIN = MIN( Z( J4 ), EMIN ) + 50 CONTINUE + ELSE + DO 60 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-3 ) = D + Z( J4 ) + TEMP = Z( J4+2 ) / Z( J4-3 ) + D = D*TEMP - TAU + IF( D.LT.DTHRESH ) D = ZERO + DMIN = MIN( DMIN, D ) + Z( J4-1 ) = Z( J4 )*TEMP + EMIN = MIN( Z( J4-1 ), EMIN ) + 60 CONTINUE + END IF +* +* Unroll last two steps. +* + DNM2 = D + DMIN2 = DMIN + J4 = 4*( N0-2 ) - PP + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM2 + Z( J4P2 ) + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) - TAU + DMIN = MIN( DMIN, DNM1 ) +* + DMIN1 = DMIN + J4 = J4 + 4 + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM1 + Z( J4P2 ) + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) - TAU + DMIN = MIN( DMIN, DN ) +* + ELSE +* +* Code for non IEEE arithmetic. +* + IF( PP.EQ.0 ) THEN + DO 70 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-2 ) = D + Z( J4-1 ) + IF( D.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4+1 )*( Z( J4-1 ) / Z( J4-2 ) ) + D = Z( J4+1 )*( D / Z( J4-2 ) ) - TAU + END IF + IF( D.LT.DTHRESH) D = ZERO + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4 ) ) + 70 CONTINUE + ELSE + DO 80 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-3 ) = D + Z( J4 ) + IF( D.LT.ZERO ) THEN + RETURN + ELSE + Z( J4-1 ) = Z( J4+2 )*( Z( J4 ) / Z( J4-3 ) ) + D = Z( J4+2 )*( D / Z( J4-3 ) ) - TAU + END IF + IF( D.LT.DTHRESH) D = ZERO + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4-1 ) ) + 80 CONTINUE + END IF +* +* Unroll last two steps. +* + DNM2 = D + DMIN2 = DMIN + J4 = 4*( N0-2 ) - PP + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM2 + Z( J4P2 ) + IF( DNM2.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) - TAU + END IF + DMIN = MIN( DMIN, DNM1 ) +* + DMIN1 = DMIN + J4 = J4 + 4 + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM1 + Z( J4P2 ) + IF( DNM1.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) - TAU + END IF + DMIN = MIN( DMIN, DN ) +* + END IF + END IF +* + Z( J4+2 ) = DN + Z( 4*N0-PP ) = EMIN + RETURN +* +* End of DLASQ5 +* + END diff --git a/lib/linalg/dlasq6.f b/lib/linalg/dlasq6.f new file mode 100644 index 0000000000..3c8661bbba --- /dev/null +++ b/lib/linalg/dlasq6.f @@ -0,0 +1,254 @@ +*> \brief \b DLASQ6 computes one dqd transform in ping-pong form. Used by sbdsqr and sstegr. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASQ6 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASQ6( I0, N0, Z, PP, DMIN, DMIN1, DMIN2, DN, +* DNM1, DNM2 ) +* +* .. Scalar Arguments .. +* INTEGER I0, N0, PP +* DOUBLE PRECISION DMIN, DMIN1, DMIN2, DN, DNM1, DNM2 +* .. +* .. Array Arguments .. +* DOUBLE PRECISION Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASQ6 computes one dqd (shift equal to zero) transform in +*> ping-pong form, with protection against underflow and overflow. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] I0 +*> \verbatim +*> I0 is INTEGER +*> First index. +*> \endverbatim +*> +*> \param[in] N0 +*> \verbatim +*> N0 is INTEGER +*> Last index. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension ( 4*N ) +*> Z holds the qd array. EMIN is stored in Z(4*N0) to avoid +*> an extra argument. +*> \endverbatim +*> +*> \param[in] PP +*> \verbatim +*> PP is INTEGER +*> PP=0 for ping, PP=1 for pong. +*> \endverbatim +*> +*> \param[out] DMIN +*> \verbatim +*> DMIN is DOUBLE PRECISION +*> Minimum value of d. +*> \endverbatim +*> +*> \param[out] DMIN1 +*> \verbatim +*> DMIN1 is DOUBLE PRECISION +*> Minimum value of d, excluding D( N0 ). +*> \endverbatim +*> +*> \param[out] DMIN2 +*> \verbatim +*> DMIN2 is DOUBLE PRECISION +*> Minimum value of d, excluding D( N0 ) and D( N0-1 ). +*> \endverbatim +*> +*> \param[out] DN +*> \verbatim +*> DN is DOUBLE PRECISION +*> d(N0), the last value of d. +*> \endverbatim +*> +*> \param[out] DNM1 +*> \verbatim +*> DNM1 is DOUBLE PRECISION +*> d(N0-1). +*> \endverbatim +*> +*> \param[out] DNM2 +*> \verbatim +*> DNM2 is DOUBLE PRECISION +*> d(N0-2). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE DLASQ6( I0, N0, Z, PP, DMIN, DMIN1, DMIN2, DN, + $ DNM1, DNM2 ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER I0, N0, PP + DOUBLE PRECISION DMIN, DMIN1, DMIN2, DN, DNM1, DNM2 +* .. +* .. Array Arguments .. + DOUBLE PRECISION Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameter .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + INTEGER J4, J4P2 + DOUBLE PRECISION D, EMIN, SAFMIN, TEMP +* .. +* .. External Function .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( ( N0-I0-1 ).LE.0 ) + $ RETURN +* + SAFMIN = DLAMCH( 'Safe minimum' ) + J4 = 4*I0 + PP - 3 + EMIN = Z( J4+4 ) + D = Z( J4 ) + DMIN = D +* + IF( PP.EQ.0 ) THEN + DO 10 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-2 ) = D + Z( J4-1 ) + IF( Z( J4-2 ).EQ.ZERO ) THEN + Z( J4 ) = ZERO + D = Z( J4+1 ) + DMIN = D + EMIN = ZERO + ELSE IF( SAFMIN*Z( J4+1 ).LT.Z( J4-2 ) .AND. + $ SAFMIN*Z( J4-2 ).LT.Z( J4+1 ) ) THEN + TEMP = Z( J4+1 ) / Z( J4-2 ) + Z( J4 ) = Z( J4-1 )*TEMP + D = D*TEMP + ELSE + Z( J4 ) = Z( J4+1 )*( Z( J4-1 ) / Z( J4-2 ) ) + D = Z( J4+1 )*( D / Z( J4-2 ) ) + END IF + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4 ) ) + 10 CONTINUE + ELSE + DO 20 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-3 ) = D + Z( J4 ) + IF( Z( J4-3 ).EQ.ZERO ) THEN + Z( J4-1 ) = ZERO + D = Z( J4+2 ) + DMIN = D + EMIN = ZERO + ELSE IF( SAFMIN*Z( J4+2 ).LT.Z( J4-3 ) .AND. + $ SAFMIN*Z( J4-3 ).LT.Z( J4+2 ) ) THEN + TEMP = Z( J4+2 ) / Z( J4-3 ) + Z( J4-1 ) = Z( J4 )*TEMP + D = D*TEMP + ELSE + Z( J4-1 ) = Z( J4+2 )*( Z( J4 ) / Z( J4-3 ) ) + D = Z( J4+2 )*( D / Z( J4-3 ) ) + END IF + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4-1 ) ) + 20 CONTINUE + END IF +* +* Unroll last two steps. +* + DNM2 = D + DMIN2 = DMIN + J4 = 4*( N0-2 ) - PP + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM2 + Z( J4P2 ) + IF( Z( J4-2 ).EQ.ZERO ) THEN + Z( J4 ) = ZERO + DNM1 = Z( J4P2+2 ) + DMIN = DNM1 + EMIN = ZERO + ELSE IF( SAFMIN*Z( J4P2+2 ).LT.Z( J4-2 ) .AND. + $ SAFMIN*Z( J4-2 ).LT.Z( J4P2+2 ) ) THEN + TEMP = Z( J4P2+2 ) / Z( J4-2 ) + Z( J4 ) = Z( J4P2 )*TEMP + DNM1 = DNM2*TEMP + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) + END IF + DMIN = MIN( DMIN, DNM1 ) +* + DMIN1 = DMIN + J4 = J4 + 4 + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM1 + Z( J4P2 ) + IF( Z( J4-2 ).EQ.ZERO ) THEN + Z( J4 ) = ZERO + DN = Z( J4P2+2 ) + DMIN = DN + EMIN = ZERO + ELSE IF( SAFMIN*Z( J4P2+2 ).LT.Z( J4-2 ) .AND. + $ SAFMIN*Z( J4-2 ).LT.Z( J4P2+2 ) ) THEN + TEMP = Z( J4P2+2 ) / Z( J4-2 ) + Z( J4 ) = Z( J4P2 )*TEMP + DN = DNM1*TEMP + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) + END IF + DMIN = MIN( DMIN, DN ) +* + Z( J4+2 ) = DN + Z( 4*N0-PP ) = EMIN + RETURN +* +* End of DLASQ6 +* + END diff --git a/lib/linalg/dlasr.f b/lib/linalg/dlasr.f new file mode 100644 index 0000000000..645d03b3d8 --- /dev/null +++ b/lib/linalg/dlasr.f @@ -0,0 +1,436 @@ +*> \brief \b DLASR applies a sequence of plane rotations to a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, PIVOT, SIDE +* INTEGER LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( * ), S( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASR applies a sequence of plane rotations to a real matrix A, +*> from either the left or the right. +*> +*> When SIDE = 'L', the transformation takes the form +*> +*> A := P*A +*> +*> and when SIDE = 'R', the transformation takes the form +*> +*> A := A*P**T +*> +*> where P is an orthogonal matrix consisting of a sequence of z plane +*> rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R', +*> and P**T is the transpose of P. +*> +*> When DIRECT = 'F' (Forward sequence), then +*> +*> P = P(z-1) * ... * P(2) * P(1) +*> +*> and when DIRECT = 'B' (Backward sequence), then +*> +*> P = P(1) * P(2) * ... * P(z-1) +*> +*> where P(k) is a plane rotation matrix defined by the 2-by-2 rotation +*> +*> R(k) = ( c(k) s(k) ) +*> = ( -s(k) c(k) ). +*> +*> When PIVOT = 'V' (Variable pivot), the rotation is performed +*> for the plane (k,k+1), i.e., P(k) has the form +*> +*> P(k) = ( 1 ) +*> ( ... ) +*> ( 1 ) +*> ( c(k) s(k) ) +*> ( -s(k) c(k) ) +*> ( 1 ) +*> ( ... ) +*> ( 1 ) +*> +*> where R(k) appears as a rank-2 modification to the identity matrix in +*> rows and columns k and k+1. +*> +*> When PIVOT = 'T' (Top pivot), the rotation is performed for the +*> plane (1,k+1), so P(k) has the form +*> +*> P(k) = ( c(k) s(k) ) +*> ( 1 ) +*> ( ... ) +*> ( 1 ) +*> ( -s(k) c(k) ) +*> ( 1 ) +*> ( ... ) +*> ( 1 ) +*> +*> where R(k) appears in rows and columns 1 and k+1. +*> +*> Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is +*> performed for the plane (k,z), giving P(k) the form +*> +*> P(k) = ( 1 ) +*> ( ... ) +*> ( 1 ) +*> ( c(k) s(k) ) +*> ( 1 ) +*> ( ... ) +*> ( 1 ) +*> ( -s(k) c(k) ) +*> +*> where R(k) appears in rows and columns k and z. The rotations are +*> performed without ever forming P(k) explicitly. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> Specifies whether the plane rotation matrix P is applied to +*> A on the left or the right. +*> = 'L': Left, compute A := P*A +*> = 'R': Right, compute A:= A*P**T +*> \endverbatim +*> +*> \param[in] PIVOT +*> \verbatim +*> PIVOT is CHARACTER*1 +*> Specifies the plane for which P(k) is a plane rotation +*> matrix. +*> = 'V': Variable pivot, the plane (k,k+1) +*> = 'T': Top pivot, the plane (1,k+1) +*> = 'B': Bottom pivot, the plane (k,z) +*> \endverbatim +*> +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies whether P is a forward or backward sequence of +*> plane rotations. +*> = 'F': Forward, P = P(z-1)*...*P(2)*P(1) +*> = 'B': Backward, P = P(1)*P(2)*...*P(z-1) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. If m <= 1, an immediate +*> return is effected. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. If n <= 1, an +*> immediate return is effected. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension +*> (M-1) if SIDE = 'L' +*> (N-1) if SIDE = 'R' +*> The cosines c(k) of the plane rotations. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension +*> (M-1) if SIDE = 'L' +*> (N-1) if SIDE = 'R' +*> The sines s(k) of the plane rotations. The 2-by-2 plane +*> rotation part of the matrix P(k), R(k), has the form +*> R(k) = ( c(k) s(k) ) +*> ( -s(k) c(k) ). +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The M-by-N matrix A. On exit, A is overwritten by P*A if +*> SIDE = 'R' or by A*P**T if SIDE = 'L'. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, PIVOT, SIDE + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( * ), S( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J + DOUBLE PRECISION CTEMP, STEMP, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.( LSAME( SIDE, 'L' ) .OR. LSAME( SIDE, 'R' ) ) ) THEN + INFO = 1 + ELSE IF( .NOT.( LSAME( PIVOT, 'V' ) .OR. LSAME( PIVOT, + $ 'T' ) .OR. LSAME( PIVOT, 'B' ) ) ) THEN + INFO = 2 + ELSE IF( .NOT.( LSAME( DIRECT, 'F' ) .OR. LSAME( DIRECT, 'B' ) ) ) + $ THEN + INFO = 3 + ELSE IF( M.LT.0 ) THEN + INFO = 4 + ELSE IF( N.LT.0 ) THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = 9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASR ', INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) ) + $ RETURN + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form P * A +* + IF( LSAME( PIVOT, 'V' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 20 J = 1, M - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 10 I = 1, N + TEMP = A( J+1, I ) + A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I ) + A( J, I ) = STEMP*TEMP + CTEMP*A( J, I ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 40 J = M - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 30 I = 1, N + TEMP = A( J+1, I ) + A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I ) + A( J, I ) = STEMP*TEMP + CTEMP*A( J, I ) + 30 CONTINUE + END IF + 40 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'T' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 60 J = 2, M + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 50 I = 1, N + TEMP = A( J, I ) + A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I ) + A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 80 J = M, 2, -1 + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 70 I = 1, N + TEMP = A( J, I ) + A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I ) + A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I ) + 70 CONTINUE + END IF + 80 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'B' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 100 J = 1, M - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 90 I = 1, N + TEMP = A( J, I ) + A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP + A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP + 90 CONTINUE + END IF + 100 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 120 J = M - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 110 I = 1, N + TEMP = A( J, I ) + A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP + A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP + 110 CONTINUE + END IF + 120 CONTINUE + END IF + END IF + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form A * P**T +* + IF( LSAME( PIVOT, 'V' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 140 J = 1, N - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 130 I = 1, M + TEMP = A( I, J+1 ) + A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J ) + A( I, J ) = STEMP*TEMP + CTEMP*A( I, J ) + 130 CONTINUE + END IF + 140 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 160 J = N - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 150 I = 1, M + TEMP = A( I, J+1 ) + A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J ) + A( I, J ) = STEMP*TEMP + CTEMP*A( I, J ) + 150 CONTINUE + END IF + 160 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'T' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 180 J = 2, N + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 170 I = 1, M + TEMP = A( I, J ) + A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 ) + A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 ) + 170 CONTINUE + END IF + 180 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 200 J = N, 2, -1 + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 190 I = 1, M + TEMP = A( I, J ) + A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 ) + A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 ) + 190 CONTINUE + END IF + 200 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'B' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 220 J = 1, N - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 210 I = 1, M + TEMP = A( I, J ) + A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP + A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP + 210 CONTINUE + END IF + 220 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 240 J = N - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 230 I = 1, M + TEMP = A( I, J ) + A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP + A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP + 230 CONTINUE + END IF + 240 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of DLASR +* + END diff --git a/lib/linalg/dlasrt.f b/lib/linalg/dlasrt.f new file mode 100644 index 0000000000..f5d0e6cd1a --- /dev/null +++ b/lib/linalg/dlasrt.f @@ -0,0 +1,303 @@ +*> \brief \b DLASRT sorts numbers in increasing or decreasing order. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASRT( ID, N, D, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER ID +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Sort the numbers in D in increasing order (if ID = 'I') or +*> in decreasing order (if ID = 'D' ). +*> +*> Use Quick Sort, reverting to Insertion sort on arrays of +*> size <= 20. Dimension of STACK limits N to about 2**32. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ID +*> \verbatim +*> ID is CHARACTER*1 +*> = 'I': sort D in increasing order; +*> = 'D': sort D in decreasing order. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The length of the array D. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the array to be sorted. +*> On exit, D has been sorted into increasing order +*> (D(1) <= ... <= D(N) ) or into decreasing order +*> (D(1) >= ... >= D(N) ), depending on ID. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE DLASRT( ID, N, D, INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER ID + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER SELECT + PARAMETER ( SELECT = 20 ) +* .. +* .. Local Scalars .. + INTEGER DIR, ENDD, I, J, START, STKPNT + DOUBLE PRECISION D1, D2, D3, DMNMX, TMP +* .. +* .. Local Arrays .. + INTEGER STACK( 2, 32 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input paramters. +* + INFO = 0 + DIR = -1 + IF( LSAME( ID, 'D' ) ) THEN + DIR = 0 + ELSE IF( LSAME( ID, 'I' ) ) THEN + DIR = 1 + END IF + IF( DIR.EQ.-1 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASRT', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + STKPNT = 1 + STACK( 1, 1 ) = 1 + STACK( 2, 1 ) = N + 10 CONTINUE + START = STACK( 1, STKPNT ) + ENDD = STACK( 2, STKPNT ) + STKPNT = STKPNT - 1 + IF( ENDD-START.LE.SELECT .AND. ENDD-START.GT.0 ) THEN +* +* Do Insertion sort on D( START:ENDD ) +* + IF( DIR.EQ.0 ) THEN +* +* Sort into decreasing order +* + DO 30 I = START + 1, ENDD + DO 20 J = I, START + 1, -1 + IF( D( J ).GT.D( J-1 ) ) THEN + DMNMX = D( J ) + D( J ) = D( J-1 ) + D( J-1 ) = DMNMX + ELSE + GO TO 30 + END IF + 20 CONTINUE + 30 CONTINUE +* + ELSE +* +* Sort into increasing order +* + DO 50 I = START + 1, ENDD + DO 40 J = I, START + 1, -1 + IF( D( J ).LT.D( J-1 ) ) THEN + DMNMX = D( J ) + D( J ) = D( J-1 ) + D( J-1 ) = DMNMX + ELSE + GO TO 50 + END IF + 40 CONTINUE + 50 CONTINUE +* + END IF +* + ELSE IF( ENDD-START.GT.SELECT ) THEN +* +* Partition D( START:ENDD ) and stack parts, largest one first +* +* Choose partition entry as median of 3 +* + D1 = D( START ) + D2 = D( ENDD ) + I = ( START+ENDD ) / 2 + D3 = D( I ) + IF( D1.LT.D2 ) THEN + IF( D3.LT.D1 ) THEN + DMNMX = D1 + ELSE IF( D3.LT.D2 ) THEN + DMNMX = D3 + ELSE + DMNMX = D2 + END IF + ELSE + IF( D3.LT.D2 ) THEN + DMNMX = D2 + ELSE IF( D3.LT.D1 ) THEN + DMNMX = D3 + ELSE + DMNMX = D1 + END IF + END IF +* + IF( DIR.EQ.0 ) THEN +* +* Sort into decreasing order +* + I = START - 1 + J = ENDD + 1 + 60 CONTINUE + 70 CONTINUE + J = J - 1 + IF( D( J ).LT.DMNMX ) + $ GO TO 70 + 80 CONTINUE + I = I + 1 + IF( D( I ).GT.DMNMX ) + $ GO TO 80 + IF( I.LT.J ) THEN + TMP = D( I ) + D( I ) = D( J ) + D( J ) = TMP + GO TO 60 + END IF + IF( J-START.GT.ENDD-J-1 ) THEN + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = START + STACK( 2, STKPNT ) = J + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = J + 1 + STACK( 2, STKPNT ) = ENDD + ELSE + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = J + 1 + STACK( 2, STKPNT ) = ENDD + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = START + STACK( 2, STKPNT ) = J + END IF + ELSE +* +* Sort into increasing order +* + I = START - 1 + J = ENDD + 1 + 90 CONTINUE + 100 CONTINUE + J = J - 1 + IF( D( J ).GT.DMNMX ) + $ GO TO 100 + 110 CONTINUE + I = I + 1 + IF( D( I ).LT.DMNMX ) + $ GO TO 110 + IF( I.LT.J ) THEN + TMP = D( I ) + D( I ) = D( J ) + D( J ) = TMP + GO TO 90 + END IF + IF( J-START.GT.ENDD-J-1 ) THEN + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = START + STACK( 2, STKPNT ) = J + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = J + 1 + STACK( 2, STKPNT ) = ENDD + ELSE + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = J + 1 + STACK( 2, STKPNT ) = ENDD + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = START + STACK( 2, STKPNT ) = J + END IF + END IF + END IF + IF( STKPNT.GT.0 ) + $ GO TO 10 + RETURN +* +* End of DLASRT +* + END diff --git a/lib/linalg/dlasv2.f b/lib/linalg/dlasv2.f new file mode 100644 index 0000000000..96aaa1e45c --- /dev/null +++ b/lib/linalg/dlasv2.f @@ -0,0 +1,325 @@ +*> \brief \b DLASV2 computes the singular value decomposition of a 2-by-2 triangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASV2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASV2( F, G, H, SSMIN, SSMAX, SNR, CSR, SNL, CSL ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION CSL, CSR, F, G, H, SNL, SNR, SSMAX, SSMIN +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASV2 computes the singular value decomposition of a 2-by-2 +*> triangular matrix +*> [ F G ] +*> [ 0 H ]. +*> On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the +*> smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and +*> right singular vectors for abs(SSMAX), giving the decomposition +*> +*> [ CSL SNL ] [ F G ] [ CSR -SNR ] = [ SSMAX 0 ] +*> [-SNL CSL ] [ 0 H ] [ SNR CSR ] [ 0 SSMIN ]. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] F +*> \verbatim +*> F is DOUBLE PRECISION +*> The (1,1) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] G +*> \verbatim +*> G is DOUBLE PRECISION +*> The (1,2) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] H +*> \verbatim +*> H is DOUBLE PRECISION +*> The (2,2) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[out] SSMIN +*> \verbatim +*> SSMIN is DOUBLE PRECISION +*> abs(SSMIN) is the smaller singular value. +*> \endverbatim +*> +*> \param[out] SSMAX +*> \verbatim +*> SSMAX is DOUBLE PRECISION +*> abs(SSMAX) is the larger singular value. +*> \endverbatim +*> +*> \param[out] SNL +*> \verbatim +*> SNL is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] CSL +*> \verbatim +*> CSL is DOUBLE PRECISION +*> The vector (CSL, SNL) is a unit left singular vector for the +*> singular value abs(SSMAX). +*> \endverbatim +*> +*> \param[out] SNR +*> \verbatim +*> SNR is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] CSR +*> \verbatim +*> CSR is DOUBLE PRECISION +*> The vector (CSR, SNR) is a unit right singular vector for the +*> singular value abs(SSMAX). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Any input parameter may be aliased with any output parameter. +*> +*> Barring over/underflow and assuming a guard digit in subtraction, all +*> output quantities are correct to within a few units in the last +*> place (ulps). +*> +*> In IEEE arithmetic, the code works correctly if one matrix element is +*> infinite. +*> +*> Overflow will not occur unless the largest singular value itself +*> overflows or is within a few ulps of overflow. (On machines with +*> partial overflow, like the Cray, overflow may occur if the largest +*> singular value is within a factor of 2 of overflow.) +*> +*> Underflow is harmless if underflow is gradual. Otherwise, results +*> may correspond to a matrix modified by perturbations of size near +*> the underflow threshold. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLASV2( F, G, H, SSMIN, SSMAX, SNR, CSR, SNL, CSL ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + DOUBLE PRECISION CSL, CSR, F, G, H, SNL, SNR, SSMAX, SSMIN +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION HALF + PARAMETER ( HALF = 0.5D0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D0 ) + DOUBLE PRECISION FOUR + PARAMETER ( FOUR = 4.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL GASMAL, SWAP + INTEGER PMAX + DOUBLE PRECISION A, CLT, CRT, D, FA, FT, GA, GT, HA, HT, L, M, + $ MM, R, S, SLT, SRT, T, TEMP, TSIGN, TT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN, SQRT +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Executable Statements .. +* + FT = F + FA = ABS( FT ) + HT = H + HA = ABS( H ) +* +* PMAX points to the maximum absolute element of matrix +* PMAX = 1 if F largest in absolute values +* PMAX = 2 if G largest in absolute values +* PMAX = 3 if H largest in absolute values +* + PMAX = 1 + SWAP = ( HA.GT.FA ) + IF( SWAP ) THEN + PMAX = 3 + TEMP = FT + FT = HT + HT = TEMP + TEMP = FA + FA = HA + HA = TEMP +* +* Now FA .ge. HA +* + END IF + GT = G + GA = ABS( GT ) + IF( GA.EQ.ZERO ) THEN +* +* Diagonal matrix +* + SSMIN = HA + SSMAX = FA + CLT = ONE + CRT = ONE + SLT = ZERO + SRT = ZERO + ELSE + GASMAL = .TRUE. + IF( GA.GT.FA ) THEN + PMAX = 2 + IF( ( FA / GA ).LT.DLAMCH( 'EPS' ) ) THEN +* +* Case of very large GA +* + GASMAL = .FALSE. + SSMAX = GA + IF( HA.GT.ONE ) THEN + SSMIN = FA / ( GA / HA ) + ELSE + SSMIN = ( FA / GA )*HA + END IF + CLT = ONE + SLT = HT / GT + SRT = ONE + CRT = FT / GT + END IF + END IF + IF( GASMAL ) THEN +* +* Normal case +* + D = FA - HA + IF( D.EQ.FA ) THEN +* +* Copes with infinite F or H +* + L = ONE + ELSE + L = D / FA + END IF +* +* Note that 0 .le. L .le. 1 +* + M = GT / FT +* +* Note that abs(M) .le. 1/macheps +* + T = TWO - L +* +* Note that T .ge. 1 +* + MM = M*M + TT = T*T + S = SQRT( TT+MM ) +* +* Note that 1 .le. S .le. 1 + 1/macheps +* + IF( L.EQ.ZERO ) THEN + R = ABS( M ) + ELSE + R = SQRT( L*L+MM ) + END IF +* +* Note that 0 .le. R .le. 1 + 1/macheps +* + A = HALF*( S+R ) +* +* Note that 1 .le. A .le. 1 + abs(M) +* + SSMIN = HA / A + SSMAX = FA*A + IF( MM.EQ.ZERO ) THEN +* +* Note that M is very tiny +* + IF( L.EQ.ZERO ) THEN + T = SIGN( TWO, FT )*SIGN( ONE, GT ) + ELSE + T = GT / SIGN( D, FT ) + M / T + END IF + ELSE + T = ( M / ( S+T )+M / ( R+L ) )*( ONE+A ) + END IF + L = SQRT( T*T+FOUR ) + CRT = TWO / L + SRT = T / L + CLT = ( CRT+SRT*M ) / A + SLT = ( HT / FT )*SRT / A + END IF + END IF + IF( SWAP ) THEN + CSL = SRT + SNL = CRT + CSR = SLT + SNR = CLT + ELSE + CSL = CLT + SNL = SLT + CSR = CRT + SNR = SRT + END IF +* +* Correct signs of SSMAX and SSMIN +* + IF( PMAX.EQ.1 ) + $ TSIGN = SIGN( ONE, CSR )*SIGN( ONE, CSL )*SIGN( ONE, F ) + IF( PMAX.EQ.2 ) + $ TSIGN = SIGN( ONE, SNR )*SIGN( ONE, CSL )*SIGN( ONE, G ) + IF( PMAX.EQ.3 ) + $ TSIGN = SIGN( ONE, SNR )*SIGN( ONE, SNL )*SIGN( ONE, H ) + SSMAX = SIGN( SSMAX, TSIGN ) + SSMIN = SIGN( SSMIN, TSIGN*SIGN( ONE, F )*SIGN( ONE, H ) ) + RETURN +* +* End of DLASV2 +* + END diff --git a/lib/linalg/dlatrd.f b/lib/linalg/dlatrd.f new file mode 100644 index 0000000000..69ec0018be --- /dev/null +++ b/lib/linalg/dlatrd.f @@ -0,0 +1,336 @@ +*> \brief \b DLATRD reduces the first nb rows and columns of a symmetric/Hermitian matrix A to real tridiagonal form by an orthogonal similarity transformation. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLATRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER LDA, LDW, N, NB +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), E( * ), TAU( * ), W( LDW, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLATRD reduces NB rows and columns of a real symmetric matrix A to +*> symmetric tridiagonal form by an orthogonal similarity +*> transformation Q**T * A * Q, and returns the matrices V and W which are +*> needed to apply the transformation to the unreduced part of A. +*> +*> If UPLO = 'U', DLATRD reduces the last NB rows and columns of a +*> matrix, of which the upper triangle is supplied; +*> if UPLO = 'L', DLATRD reduces the first NB rows and columns of a +*> matrix, of which the lower triangle is supplied. +*> +*> This is an auxiliary routine called by DSYTRD. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The number of rows and columns to be reduced. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit: +*> if UPLO = 'U', the last NB columns have been reduced to +*> tridiagonal form, with the diagonal elements overwriting +*> the diagonal elements of A; the elements above the diagonal +*> with the array TAU, represent the orthogonal matrix Q as a +*> product of elementary reflectors; +*> if UPLO = 'L', the first NB columns have been reduced to +*> tridiagonal form, with the diagonal elements overwriting +*> the diagonal elements of A; the elements below the diagonal +*> with the array TAU, represent the orthogonal matrix Q as a +*> product of elementary reflectors. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= (1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal +*> elements of the last NB columns of the reduced matrix; +*> if UPLO = 'L', E(1:nb) contains the subdiagonal elements of +*> the first NB columns of the reduced matrix. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (N-1) +*> The scalar factors of the elementary reflectors, stored in +*> TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. +*> See Further Details. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (LDW,NB) +*> The n-by-nb matrix W required to update the unreduced part +*> of A. +*> \endverbatim +*> +*> \param[in] LDW +*> \verbatim +*> LDW is INTEGER +*> The leading dimension of the array W. LDW >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(n) H(n-1) . . . H(n-nb+1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), +*> and tau in TAU(i-1). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(nb). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), +*> and tau in TAU(i). +*> +*> The elements of the vectors v together form the n-by-nb matrix V +*> which is needed, with W, to apply the transformation to the unreduced +*> part of the matrix, using a symmetric rank-2k update of the form: +*> A := A - V*W**T - W*V**T. +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5 and nb = 2: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( a a a v4 v5 ) ( d ) +*> ( a a v4 v5 ) ( 1 d ) +*> ( a 1 v5 ) ( v1 1 a ) +*> ( d 1 ) ( v1 v2 a a ) +*> ( d ) ( v1 v2 a a a ) +*> +*> where d denotes a diagonal element of the reduced matrix, a denotes +*> an element of the original matrix that is unchanged, and vi denotes +*> an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDW, N, NB +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), E( * ), TAU( * ), W( LDW, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, HALF + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, HALF = 0.5D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IW + DOUBLE PRECISION ALPHA +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DGEMV, DLARFG, DSCAL, DSYMV +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT + EXTERNAL LSAME, DDOT +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Reduce last NB columns of upper triangle +* + DO 10 I = N, N - NB + 1, -1 + IW = I - N + NB + IF( I.LT.N ) THEN +* +* Update A(1:i,i) +* + CALL DGEMV( 'No transpose', I, N-I, -ONE, A( 1, I+1 ), + $ LDA, W( I, IW+1 ), LDW, ONE, A( 1, I ), 1 ) + CALL DGEMV( 'No transpose', I, N-I, -ONE, W( 1, IW+1 ), + $ LDW, A( I, I+1 ), LDA, ONE, A( 1, I ), 1 ) + END IF + IF( I.GT.1 ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(1:i-2,i) +* + CALL DLARFG( I-1, A( I-1, I ), A( 1, I ), 1, TAU( I-1 ) ) + E( I-1 ) = A( I-1, I ) + A( I-1, I ) = ONE +* +* Compute W(1:i-1,i) +* + CALL DSYMV( 'Upper', I-1, ONE, A, LDA, A( 1, I ), 1, + $ ZERO, W( 1, IW ), 1 ) + IF( I.LT.N ) THEN + CALL DGEMV( 'Transpose', I-1, N-I, ONE, W( 1, IW+1 ), + $ LDW, A( 1, I ), 1, ZERO, W( I+1, IW ), 1 ) + CALL DGEMV( 'No transpose', I-1, N-I, -ONE, + $ A( 1, I+1 ), LDA, W( I+1, IW ), 1, ONE, + $ W( 1, IW ), 1 ) + CALL DGEMV( 'Transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( 1, I ), 1, ZERO, W( I+1, IW ), 1 ) + CALL DGEMV( 'No transpose', I-1, N-I, -ONE, + $ W( 1, IW+1 ), LDW, W( I+1, IW ), 1, ONE, + $ W( 1, IW ), 1 ) + END IF + CALL DSCAL( I-1, TAU( I-1 ), W( 1, IW ), 1 ) + ALPHA = -HALF*TAU( I-1 )*DDOT( I-1, W( 1, IW ), 1, + $ A( 1, I ), 1 ) + CALL DAXPY( I-1, ALPHA, A( 1, I ), 1, W( 1, IW ), 1 ) + END IF +* + 10 CONTINUE + ELSE +* +* Reduce first NB columns of lower triangle +* + DO 20 I = 1, NB +* +* Update A(i:n,i) +* + CALL DGEMV( 'No transpose', N-I+1, I-1, -ONE, A( I, 1 ), + $ LDA, W( I, 1 ), LDW, ONE, A( I, I ), 1 ) + CALL DGEMV( 'No transpose', N-I+1, I-1, -ONE, W( I, 1 ), + $ LDW, A( I, 1 ), LDA, ONE, A( I, I ), 1 ) + IF( I.LT.N ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(i+2:n,i) +* + CALL DLARFG( N-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1, + $ TAU( I ) ) + E( I ) = A( I+1, I ) + A( I+1, I ) = ONE +* +* Compute W(i+1:n,i) +* + CALL DSYMV( 'Lower', N-I, ONE, A( I+1, I+1 ), LDA, + $ A( I+1, I ), 1, ZERO, W( I+1, I ), 1 ) + CALL DGEMV( 'Transpose', N-I, I-1, ONE, W( I+1, 1 ), LDW, + $ A( I+1, I ), 1, ZERO, W( 1, I ), 1 ) + CALL DGEMV( 'No transpose', N-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, W( 1, I ), 1, ONE, W( I+1, I ), 1 ) + CALL DGEMV( 'Transpose', N-I, I-1, ONE, A( I+1, 1 ), LDA, + $ A( I+1, I ), 1, ZERO, W( 1, I ), 1 ) + CALL DGEMV( 'No transpose', N-I, I-1, -ONE, W( I+1, 1 ), + $ LDW, W( 1, I ), 1, ONE, W( I+1, I ), 1 ) + CALL DSCAL( N-I, TAU( I ), W( I+1, I ), 1 ) + ALPHA = -HALF*TAU( I )*DDOT( N-I, W( I+1, I ), 1, + $ A( I+1, I ), 1 ) + CALL DAXPY( N-I, ALPHA, A( I+1, I ), 1, W( I+1, I ), 1 ) + END IF +* + 20 CONTINUE + END IF +* + RETURN +* +* End of DLATRD +* + END diff --git a/lib/linalg/dnrm2.f b/lib/linalg/dnrm2.f new file mode 100644 index 0000000000..5ea257a200 --- /dev/null +++ b/lib/linalg/dnrm2.f @@ -0,0 +1,112 @@ +*> \brief \b DNRM2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DNRM2(N,X,INCX) +* +* .. Scalar Arguments .. +* INTEGER INCX,N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION X(*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DNRM2 returns the euclidean norm of a vector via the function +*> name, so that +*> +*> DNRM2 := sqrt( x'*x ) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup double_blas_level1 +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> -- This version written on 25-October-1982. +*> Modified on 14-October-1993 to inline the call to DLASSQ. +*> Sven Hammarling, Nag Ltd. +*> \endverbatim +*> +* ===================================================================== + DOUBLE PRECISION FUNCTION DNRM2(N,X,INCX) +* +* -- Reference BLAS level1 routine (version 3.4.0) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INCX,N +* .. +* .. Array Arguments .. + DOUBLE PRECISION X(*) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE,ZERO + PARAMETER (ONE=1.0D+0,ZERO=0.0D+0) +* .. +* .. Local Scalars .. + DOUBLE PRECISION ABSXI,NORM,SCALE,SSQ + INTEGER IX +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS,SQRT +* .. + IF (N.LT.1 .OR. INCX.LT.1) THEN + NORM = ZERO + ELSE IF (N.EQ.1) THEN + NORM = ABS(X(1)) + ELSE + SCALE = ZERO + SSQ = ONE +* The following loop is equivalent to this call to the LAPACK +* auxiliary routine: +* CALL DLASSQ( N, X, INCX, SCALE, SSQ ) +* + DO 10 IX = 1,1 + (N-1)*INCX,INCX + IF (X(IX).NE.ZERO) THEN + ABSXI = ABS(X(IX)) + IF (SCALE.LT.ABSXI) THEN + SSQ = ONE + SSQ* (SCALE/ABSXI)**2 + SCALE = ABSXI + ELSE + SSQ = SSQ + (ABSXI/SCALE)**2 + END IF + END IF + 10 CONTINUE + NORM = SCALE*SQRT(SSQ) + END IF +* + DNRM2 = NORM + RETURN +* +* End of DNRM2. +* + END diff --git a/lib/linalg/dorg2r.f b/lib/linalg/dorg2r.f new file mode 100644 index 0000000000..86df6dddc7 --- /dev/null +++ b/lib/linalg/dorg2r.f @@ -0,0 +1,200 @@ +*> \brief \b DORG2R generates all or part of the orthogonal matrix Q from a QR factorization determined by sgeqrf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORG2R + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORG2R( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORG2R generates an m by n real matrix Q with orthonormal columns, +*> which is defined as the first n columns of a product of k elementary +*> reflectors of order m +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by DGEQRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. N >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the i-th column must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by DGEQRF in the first k columns of its array +*> argument A. +*> On exit, the m-by-n matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGEQRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORG2R( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORG2R', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Initialise columns k+1:n to columns of the unit matrix +* + DO 20 J = K + 1, N + DO 10 L = 1, M + A( L, J ) = ZERO + 10 CONTINUE + A( J, J ) = ONE + 20 CONTINUE +* + DO 40 I = K, 1, -1 +* +* Apply H(i) to A(i:m,i:n) from the left +* + IF( I.LT.N ) THEN + A( I, I ) = ONE + CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK ) + END IF + IF( I.LT.M ) + $ CALL DSCAL( M-I, -TAU( I ), A( I+1, I ), 1 ) + A( I, I ) = ONE - TAU( I ) +* +* Set A(1:i-1,i) to zero +* + DO 30 L = 1, I - 1 + A( L, I ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of DORG2R +* + END diff --git a/lib/linalg/dorgbr.f b/lib/linalg/dorgbr.f new file mode 100644 index 0000000000..ddfa7262a0 --- /dev/null +++ b/lib/linalg/dorgbr.f @@ -0,0 +1,338 @@ +*> \brief \b DORGBR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORGBR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORGBR( VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER VECT +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORGBR generates one of the real orthogonal matrices Q or P**T +*> determined by DGEBRD when reducing a real matrix A to bidiagonal +*> form: A = Q * B * P**T. Q and P**T are defined as products of +*> elementary reflectors H(i) or G(i) respectively. +*> +*> If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q +*> is of order M: +*> if m >= k, Q = H(1) H(2) . . . H(k) and DORGBR returns the first n +*> columns of Q, where m >= n >= k; +*> if m < k, Q = H(1) H(2) . . . H(m-1) and DORGBR returns Q as an +*> M-by-M matrix. +*> +*> If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T +*> is of order N: +*> if k < n, P**T = G(k) . . . G(2) G(1) and DORGBR returns the first m +*> rows of P**T, where n >= m >= k; +*> if k >= n, P**T = G(n-1) . . . G(2) G(1) and DORGBR returns P**T as +*> an N-by-N matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> Specifies whether the matrix Q or the matrix P**T is +*> required, as defined in the transformation applied by DGEBRD: +*> = 'Q': generate Q; +*> = 'P': generate P**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q or P**T to be returned. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q or P**T to be returned. +*> N >= 0. +*> If VECT = 'Q', M >= N >= min(M,K); +*> if VECT = 'P', N >= M >= min(N,K). +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> If VECT = 'Q', the number of columns in the original M-by-K +*> matrix reduced by DGEBRD. +*> If VECT = 'P', the number of rows in the original K-by-N +*> matrix reduced by DGEBRD. +*> K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the vectors which define the elementary reflectors, +*> as returned by DGEBRD. +*> On exit, the M-by-N matrix Q or P**T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension +*> (min(M,K)) if VECT = 'Q' +*> (min(N,K)) if VECT = 'P' +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i) or G(i), which determines Q or P**T, as +*> returned by DGEBRD in its array argument TAUQ or TAUP. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,min(M,N)). +*> For optimum performance LWORK >= min(M,N)*NB, where NB +*> is the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doubleGBcomputational +* +* ===================================================================== + SUBROUTINE DORGBR( VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER VECT + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTQ + INTEGER I, IINFO, J, LWKOPT, MN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DORGLQ, DORGQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + WANTQ = LSAME( VECT, 'Q' ) + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 .OR. ( WANTQ .AND. ( N.GT.M .OR. N.LT.MIN( M, + $ K ) ) ) .OR. ( .NOT.WANTQ .AND. ( M.GT.N .OR. M.LT. + $ MIN( N, K ) ) ) ) THEN + INFO = -3 + ELSE IF( K.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LWORK.LT.MAX( 1, MN ) .AND. .NOT.LQUERY ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = 1 + IF( WANTQ ) THEN + IF( M.GE.K ) THEN + CALL DORGQR( M, N, K, A, LDA, TAU, WORK, -1, IINFO ) + ELSE + IF( M.GT.1 ) THEN + CALL DORGQR( M-1, M-1, M-1, A( 2, 2 ), LDA, TAU, WORK, + $ -1, IINFO ) + END IF + END IF + ELSE + IF( K.LT.N ) THEN + CALL DORGLQ( M, N, K, A, LDA, TAU, WORK, -1, IINFO ) + ELSE + IF( N.GT.1 ) THEN + CALL DORGLQ( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ -1, IINFO ) + END IF + END IF + END IF + LWKOPT = WORK( 1 ) + LWKOPT = MAX (LWKOPT, MN) + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORGBR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK( 1 ) = LWKOPT + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( WANTQ ) THEN +* +* Form Q, determined by a call to DGEBRD to reduce an m-by-k +* matrix +* + IF( M.GE.K ) THEN +* +* If m >= k, assume m >= n >= k +* + CALL DORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* If m < k, assume m = n +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first row and column of Q +* to those of the unit matrix +* + DO 20 J = M, 2, -1 + A( 1, J ) = ZERO + DO 10 I = J + 1, M + A( I, J ) = A( I, J-1 ) + 10 CONTINUE + 20 CONTINUE + A( 1, 1 ) = ONE + DO 30 I = 2, M + A( I, 1 ) = ZERO + 30 CONTINUE + IF( M.GT.1 ) THEN +* +* Form Q(2:m,2:m) +* + CALL DORGQR( M-1, M-1, M-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + ELSE +* +* Form P**T, determined by a call to DGEBRD to reduce a k-by-n +* matrix +* + IF( K.LT.N ) THEN +* +* If k < n, assume k <= m <= n +* + CALL DORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* If k >= n, assume m = n +* +* Shift the vectors which define the elementary reflectors one +* row downward, and set the first row and column of P**T to +* those of the unit matrix +* + A( 1, 1 ) = ONE + DO 40 I = 2, N + A( I, 1 ) = ZERO + 40 CONTINUE + DO 60 J = 2, N + DO 50 I = J - 1, 2, -1 + A( I, J ) = A( I-1, J ) + 50 CONTINUE + A( 1, J ) = ZERO + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Form P**T(2:n,2:n) +* + CALL DORGLQ( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORGBR +* + END diff --git a/lib/linalg/dorgl2.f b/lib/linalg/dorgl2.f new file mode 100644 index 0000000000..3e8398b73f --- /dev/null +++ b/lib/linalg/dorgl2.f @@ -0,0 +1,204 @@ +*> \brief \b DORGL2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORGL2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORGL2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORGL2 generates an m by n real matrix Q with orthonormal rows, +*> which is defined as the first m rows of a product of k elementary +*> reflectors of order n +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by DGELQF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. N >= M. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. M >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the i-th row must contain the vector which defines +*> the elementary reflector H(i), for i = 1,2,...,k, as returned +*> by DGELQF in the first k rows of its array argument A. +*> On exit, the m-by-n matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGELQF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORGL2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORGL2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) + $ RETURN +* + IF( K.LT.M ) THEN +* +* Initialise rows k+1:m to rows of the unit matrix +* + DO 20 J = 1, N + DO 10 L = K + 1, M + A( L, J ) = ZERO + 10 CONTINUE + IF( J.GT.K .AND. J.LE.M ) + $ A( J, J ) = ONE + 20 CONTINUE + END IF +* + DO 40 I = K, 1, -1 +* +* Apply H(i) to A(i:m,i:n) from the right +* + IF( I.LT.N ) THEN + IF( I.LT.M ) THEN + A( I, I ) = ONE + CALL DLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ TAU( I ), A( I+1, I ), LDA, WORK ) + END IF + CALL DSCAL( N-I, -TAU( I ), A( I, I+1 ), LDA ) + END IF + A( I, I ) = ONE - TAU( I ) +* +* Set A(i,1:i-1) to zero +* + DO 30 L = 1, I - 1 + A( I, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of DORGL2 +* + END diff --git a/lib/linalg/dorglq.f b/lib/linalg/dorglq.f new file mode 100644 index 0000000000..88aec15005 --- /dev/null +++ b/lib/linalg/dorglq.f @@ -0,0 +1,289 @@ +*> \brief \b DORGLQ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORGLQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORGLQ generates an M-by-N real matrix Q with orthonormal rows, +*> which is defined as the first M rows of a product of K elementary +*> reflectors of order N +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by DGELQF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. N >= M. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. M >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the i-th row must contain the vector which defines +*> the elementary reflector H(i), for i = 1,2,...,k, as returned +*> by DGELQF in the first k rows of its array argument A. +*> On exit, the M-by-N matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGELQF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M). +*> For optimum performance LWORK >= M*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KI, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL DLARFB, DLARFT, DORGL2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'DORGLQ', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, M )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORGLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'DORGLQ', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DORGLQ', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the last block. +* The first kk rows are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* +* Set A(kk+1:m,1:kk) to zero. +* + DO 20 J = 1, KK + DO 10 I = KK + 1, M + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the last or only block. +* + IF( KK.LT.M ) + $ CALL DORGL2( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA, + $ TAU( KK+1 ), WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = KI + 1, 1, -NB + IB = MIN( NB, K-I+1 ) + IF( I+IB.LE.M ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL DLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H**T to A(i+ib:m,i:n) from the right +* + CALL DLARFB( 'Right', 'Transpose', 'Forward', 'Rowwise', + $ M-I-IB+1, N-I+1, IB, A( I, I ), LDA, WORK, + $ LDWORK, A( I+IB, I ), LDA, WORK( IB+1 ), + $ LDWORK ) + END IF +* +* Apply H**T to columns i:n of current block +* + CALL DORGL2( IB, N-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* +* Set columns 1:i-1 of current block to zero +* + DO 40 J = 1, I - 1 + DO 30 L = I, I + IB - 1 + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of DORGLQ +* + END diff --git a/lib/linalg/dorgqr.f b/lib/linalg/dorgqr.f new file mode 100644 index 0000000000..404ab184e6 --- /dev/null +++ b/lib/linalg/dorgqr.f @@ -0,0 +1,290 @@ +*> \brief \b DORGQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORGQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORGQR generates an M-by-N real matrix Q with orthonormal columns, +*> which is defined as the first N columns of a product of K elementary +*> reflectors of order M +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by DGEQRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. N >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the i-th column must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by DGEQRF in the first k columns of its array +*> argument A. +*> On exit, the M-by-N matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGEQRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimum performance LWORK >= N*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KI, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL DLARFB, DLARFT, DORG2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'DORGQR', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, N )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORGQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'DORGQR', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DORGQR', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the last block. +* The first kk columns are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* +* Set A(1:kk,kk+1:n) to zero. +* + DO 20 J = KK + 1, N + DO 10 I = 1, KK + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the last or only block. +* + IF( KK.LT.N ) + $ CALL DORG2R( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA, + $ TAU( KK+1 ), WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = KI + 1, 1, -NB + IB = MIN( NB, K-I+1 ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL DLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(i:m,i+ib:n) from the left +* + CALL DLARFB( 'Left', 'No transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H to rows i:m of current block +* + CALL DORG2R( M-I+1, IB, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* +* Set rows 1:i-1 of current block to zero +* + DO 40 J = I, I + IB - 1 + DO 30 L = 1, I - 1 + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of DORGQR +* + END diff --git a/lib/linalg/dorm2l.f b/lib/linalg/dorm2l.f new file mode 100644 index 0000000000..3ff25869a7 --- /dev/null +++ b/lib/linalg/dorm2l.f @@ -0,0 +1,278 @@ +*> \brief \b DORM2L multiplies a general matrix by the orthogonal matrix from a QL factorization determined by sgeqlf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORM2L + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORM2L overwrites the general real m by n matrix C with +*> +*> Q * C if SIDE = 'L' and TRANS = 'N', or +*> +*> Q**T * C if SIDE = 'L' and TRANS = 'T', or +*> +*> C * Q if SIDE = 'R' and TRANS = 'N', or +*> +*> C * Q**T if SIDE = 'R' and TRANS = 'T', +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by DGEQLF. Q is of order m if SIDE = 'L' and of order n +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left +*> = 'R': apply Q or Q**T from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose) +*> = 'T': apply Q**T (Transpose) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> DGEQLF in the last k columns of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGEQLF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension +*> (N) if SIDE = 'L', +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, MI, NI, NQ + DOUBLE PRECISION AII +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORM2L', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. NOTRAN ) .OR. ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) + $ THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) is applied to C(1:m-k+i,1:n) +* + MI = M - K + I + ELSE +* +* H(i) is applied to C(1:m,1:n-k+i) +* + NI = N - K + I + END IF +* +* Apply H(i) +* + AII = A( NQ-K+I, I ) + A( NQ-K+I, I ) = ONE + CALL DLARF( SIDE, MI, NI, A( 1, I ), 1, TAU( I ), C, LDC, + $ WORK ) + A( NQ-K+I, I ) = AII + 10 CONTINUE + RETURN +* +* End of DORM2L +* + END diff --git a/lib/linalg/dorm2r.f b/lib/linalg/dorm2r.f new file mode 100644 index 0000000000..b13f12d53c --- /dev/null +++ b/lib/linalg/dorm2r.f @@ -0,0 +1,282 @@ +*> \brief \b DORM2R multiplies a general matrix by the orthogonal matrix from a QR factorization determined by sgeqrf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORM2R + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORM2R overwrites the general real m by n matrix C with +*> +*> Q * C if SIDE = 'L' and TRANS = 'N', or +*> +*> Q**T* C if SIDE = 'L' and TRANS = 'T', or +*> +*> C * Q if SIDE = 'R' and TRANS = 'N', or +*> +*> C * Q**T if SIDE = 'R' and TRANS = 'T', +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left +*> = 'R': apply Q or Q**T from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose) +*> = 'T': apply Q**T (Transpose) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> DGEQRF in the first k columns of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGEQRF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension +*> (N) if SIDE = 'L', +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ + DOUBLE PRECISION AII +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORM2R', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. ( .NOT.LEFT .AND. NOTRAN ) ) + $ THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) +* + AII = A( I, I ) + A( I, I ) = ONE + CALL DLARF( SIDE, MI, NI, A( I, I ), 1, TAU( I ), C( IC, JC ), + $ LDC, WORK ) + A( I, I ) = AII + 10 CONTINUE + RETURN +* +* End of DORM2R +* + END diff --git a/lib/linalg/dormbr.f b/lib/linalg/dormbr.f new file mode 100644 index 0000000000..7a0d9b9038 --- /dev/null +++ b/lib/linalg/dormbr.f @@ -0,0 +1,372 @@ +*> \brief \b DORMBR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORMBR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORMBR( VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, +* LDC, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS, VECT +* INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> If VECT = 'Q', DORMBR overwrites the general real M-by-N matrix C +*> with +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> If VECT = 'P', DORMBR overwrites the general real M-by-N matrix C +*> with +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': P * C C * P +*> TRANS = 'T': P**T * C C * P**T +*> +*> Here Q and P**T are the orthogonal matrices determined by DGEBRD when +*> reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and +*> P**T are defined as products of elementary reflectors H(i) and G(i) +*> respectively. +*> +*> Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the +*> order of the orthogonal matrix Q or P**T that is applied. +*> +*> If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: +*> if nq >= k, Q = H(1) H(2) . . . H(k); +*> if nq < k, Q = H(1) H(2) . . . H(nq-1). +*> +*> If VECT = 'P', A is assumed to have been a K-by-NQ matrix: +*> if k < nq, P = G(1) G(2) . . . G(k); +*> if k >= nq, P = G(1) G(2) . . . G(nq-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'Q': apply Q or Q**T; +*> = 'P': apply P or P**T. +*> \endverbatim +*> +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q, Q**T, P or P**T from the Left; +*> = 'R': apply Q, Q**T, P or P**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q or P; +*> = 'T': Transpose, apply Q**T or P**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> If VECT = 'Q', the number of columns in the original +*> matrix reduced by DGEBRD. +*> If VECT = 'P', the number of rows in the original +*> matrix reduced by DGEBRD. +*> K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension +*> (LDA,min(nq,K)) if VECT = 'Q' +*> (LDA,nq) if VECT = 'P' +*> The vectors which define the elementary reflectors H(i) and +*> G(i), whose products determine the matrices Q and P, as +*> returned by DGEBRD. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If VECT = 'Q', LDA >= max(1,nq); +*> if VECT = 'P', LDA >= max(1,min(nq,K)). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (min(nq,K)) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i) or G(i) which determines Q or P, as returned +*> by DGEBRD in the array argument TAUQ or TAUP. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q +*> or P*C or P**T*C or C*P or C*P**T. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For optimum performance LWORK >= N*NB if SIDE = 'L', and +*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal +*> blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORMBR( VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, + $ LDC, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, VECT + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL APPLYQ, LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DORMLQ, DORMQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + APPLYQ = LSAME( VECT, 'Q' ) + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q or P and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.APPLYQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( K.LT.0 ) THEN + INFO = -6 + ELSE IF( ( APPLYQ .AND. LDA.LT.MAX( 1, NQ ) ) .OR. + $ ( .NOT.APPLYQ .AND. LDA.LT.MAX( 1, MIN( NQ, K ) ) ) ) + $ THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( APPLYQ ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'DORMQR', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'DORMQR', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + ELSE + IF( LEFT ) THEN + NB = ILAENV( 1, 'DORMLQ', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'DORMLQ', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + END IF + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORMBR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + WORK( 1 ) = 1 + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + IF( APPLYQ ) THEN +* +* Apply Q +* + IF( NQ.GE.K ) THEN +* +* Q was determined by a call to DGEBRD with nq >= k +* + CALL DORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, IINFO ) + ELSE IF( NQ.GT.1 ) THEN +* +* Q was determined by a call to DGEBRD with nq < k +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + I1 = 2 + I2 = 1 + ELSE + MI = M + NI = N - 1 + I1 = 1 + I2 = 2 + END IF + CALL DORMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU, + $ C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + ELSE +* +* Apply P +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF + IF( NQ.GT.K ) THEN +* +* P was determined by a call to DGEBRD with nq > k +* + CALL DORMLQ( SIDE, TRANST, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, IINFO ) + ELSE IF( NQ.GT.1 ) THEN +* +* P was determined by a call to DGEBRD with nq <= k +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + I1 = 2 + I2 = 1 + ELSE + MI = M + NI = N - 1 + I1 = 1 + I2 = 2 + END IF + CALL DORMLQ( SIDE, TRANST, MI, NI, NQ-1, A( 1, 2 ), LDA, + $ TAU, C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORMBR +* + END diff --git a/lib/linalg/dorml2.f b/lib/linalg/dorml2.f new file mode 100644 index 0000000000..9ae2396e12 --- /dev/null +++ b/lib/linalg/dorml2.f @@ -0,0 +1,282 @@ +*> \brief \b DORML2 multiplies a general matrix by the orthogonal matrix from a LQ factorization determined by sgelqf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORML2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORML2 overwrites the general real m by n matrix C with +*> +*> Q * C if SIDE = 'L' and TRANS = 'N', or +*> +*> Q**T* C if SIDE = 'L' and TRANS = 'T', or +*> +*> C * Q if SIDE = 'R' and TRANS = 'N', or +*> +*> C * Q**T if SIDE = 'R' and TRANS = 'T', +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by DGELQF. Q is of order m if SIDE = 'L' and of order n +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left +*> = 'R': apply Q or Q**T from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose) +*> = 'T': apply Q**T (Transpose) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> DGELQF in the first k rows of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGELQF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension +*> (N) if SIDE = 'L', +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ + DOUBLE PRECISION AII +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORML2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. NOTRAN ) .OR. ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) + $ THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) +* + AII = A( I, I ) + A( I, I ) = ONE + CALL DLARF( SIDE, MI, NI, A( I, I ), LDA, TAU( I ), + $ C( IC, JC ), LDC, WORK ) + A( I, I ) = AII + 10 CONTINUE + RETURN +* +* End of DORML2 +* + END diff --git a/lib/linalg/dormlq.f b/lib/linalg/dormlq.f new file mode 100644 index 0000000000..ebbd4d26e2 --- /dev/null +++ b/lib/linalg/dormlq.f @@ -0,0 +1,354 @@ +*> \brief \b DORMLQ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORMLQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORMLQ overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by DGELQF. Q is of order M if SIDE = 'L' and of order N +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> DGELQF in the first k rows of its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGELQF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For optimum performance LWORK >= N*NB if SIDE = 'L', and +*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal +*> blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK, + $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + DOUBLE PRECISION T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DLARFB, DLARFT, DORML2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'DORMLQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORMLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DORMLQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL DORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL DLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ), + $ LDA, TAU( I ), T, LDT ) + IF( LEFT ) THEN +* +* H or H**T is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H**T is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H**T +* + CALL DLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB, + $ A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, WORK, + $ LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORMLQ +* + END diff --git a/lib/linalg/dormql.f b/lib/linalg/dormql.f new file mode 100644 index 0000000000..96c6f1958e --- /dev/null +++ b/lib/linalg/dormql.f @@ -0,0 +1,348 @@ +*> \brief \b DORMQL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORMQL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORMQL overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by DGEQLF. Q is of order M if SIDE = 'L' and of order N +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> DGEQLF in the last k columns of its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGEQLF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For optimum performance LWORK >= N*NB if SIDE = 'L', and +*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal +*> blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT, + $ MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + DOUBLE PRECISION T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DLARFB, DLARFT, DORM2L, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = MAX( 1, N ) + ELSE + NQ = N + NW = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + LWKOPT = 1 + ELSE +* +* Determine the block size. NB may be at most NBMAX, where +* NBMAX is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'DORMQL', SIDE // TRANS, M, N, + $ K, -1 ) ) + LWKOPT = NW*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORMQL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DORMQL', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL DORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL DLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB, + $ A( 1, I ), LDA, TAU( I ), T, LDT ) + IF( LEFT ) THEN +* +* H or H**T is applied to C(1:m-k+i+ib-1,1:n) +* + MI = M - K + I + IB - 1 + ELSE +* +* H or H**T is applied to C(1:m,1:n-k+i+ib-1) +* + NI = N - K + I + IB - 1 + END IF +* +* Apply H or H**T +* + CALL DLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI, + $ IB, A( 1, I ), LDA, T, LDT, C, LDC, WORK, + $ LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORMQL +* + END diff --git a/lib/linalg/dormqr.f b/lib/linalg/dormqr.f new file mode 100644 index 0000000000..c0767ecf61 --- /dev/null +++ b/lib/linalg/dormqr.f @@ -0,0 +1,347 @@ +*> \brief \b DORMQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORMQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORMQR overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> DGEQRF in the first k columns of its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGEQRF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For optimum performance LWORK >= N*NB if SIDE = 'L', and +*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal +*> blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK, + $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + DOUBLE PRECISION T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DLARFB, DLARFT, DORM2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'DORMQR', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORMQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DORMQR', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL DORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL DLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ), + $ LDA, TAU( I ), T, LDT ) + IF( LEFT ) THEN +* +* H or H**T is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H**T is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H**T +* + CALL DLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI, + $ IB, A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, + $ WORK, LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORMQR +* + END diff --git a/lib/linalg/dormtr.f b/lib/linalg/dormtr.f new file mode 100644 index 0000000000..00fff4dda2 --- /dev/null +++ b/lib/linalg/dormtr.f @@ -0,0 +1,310 @@ +*> \brief \b DORMTR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORMTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORMTR( SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS, UPLO +* INTEGER INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORMTR overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> where Q is a real orthogonal matrix of order nq, with nq = m if +*> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of +*> nq-1 elementary reflectors, as returned by DSYTRD: +*> +*> if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); +*> +*> if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A contains elementary reflectors +*> from DSYTRD; +*> = 'L': Lower triangle of A contains elementary reflectors +*> from DSYTRD. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension +*> (LDA,M) if SIDE = 'L' +*> (LDA,N) if SIDE = 'R' +*> The vectors which define the elementary reflectors, as +*> returned by DSYTRD. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension +*> (M-1) if SIDE = 'L' +*> (N-1) if SIDE = 'R' +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DSYTRD. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For optimum performance LWORK >= N*NB if SIDE = 'L', and +*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal +*> blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORMTR( SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, UPLO + INTEGER INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, UPPER + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DORMQL, DORMQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'T' ) ) + $ THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( UPPER ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'DORMQL', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'DORMQL', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + ELSE + IF( LEFT ) THEN + NB = ILAENV( 1, 'DORMQR', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'DORMQR', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + END IF + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORMTR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. NQ.EQ.1 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + ELSE + MI = M + NI = N - 1 + END IF +* + IF( UPPER ) THEN +* +* Q was determined by a call to DSYTRD with UPLO = 'U' +* + CALL DORMQL( SIDE, TRANS, MI, NI, NQ-1, A( 1, 2 ), LDA, TAU, C, + $ LDC, WORK, LWORK, IINFO ) + ELSE +* +* Q was determined by a call to DSYTRD with UPLO = 'L' +* + IF( LEFT ) THEN + I1 = 2 + I2 = 1 + ELSE + I1 = 1 + I2 = 2 + END IF + CALL DORMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU, + $ C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORMTR +* + END diff --git a/lib/linalg/dpotf2.f b/lib/linalg/dpotf2.f new file mode 100644 index 0000000000..6003e19b05 --- /dev/null +++ b/lib/linalg/dpotf2.f @@ -0,0 +1,230 @@ +*> \brief \b DPOTF2 computes the Cholesky factorization of a symmetric/Hermitian positive definite matrix (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPOTF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPOTF2( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPOTF2 computes the Cholesky factorization of a real symmetric +*> positive definite matrix A. +*> +*> The factorization has the form +*> A = U**T * U , if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n by n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization A = U**T *U or A = L*L**T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, the leading minor of order k is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup doublePOcomputational +* +* ===================================================================== + SUBROUTINE DPOTF2( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + DOUBLE PRECISION DDOT + EXTERNAL LSAME, DDOT, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPOTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U**T *U. +* + DO 10 J = 1, N +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = A( J, J ) - DDOT( J-1, A( 1, J ), 1, A( 1, J ), 1 ) + IF( AJJ.LE.ZERO.OR.DISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of row J. +* + IF( J.LT.N ) THEN + CALL DGEMV( 'Transpose', J-1, N-J, -ONE, A( 1, J+1 ), + $ LDA, A( 1, J ), 1, ONE, A( J, J+1 ), LDA ) + CALL DSCAL( N-J, ONE / AJJ, A( J, J+1 ), LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L**T. +* + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = A( J, J ) - DDOT( J-1, A( J, 1 ), LDA, A( J, 1 ), + $ LDA ) + IF( AJJ.LE.ZERO.OR.DISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of column J. +* + IF( J.LT.N ) THEN + CALL DGEMV( 'No transpose', N-J, J-1, -ONE, A( J+1, 1 ), + $ LDA, A( J, 1 ), LDA, ONE, A( J+1, J ), 1 ) + CALL DSCAL( N-J, ONE / AJJ, A( J+1, J ), 1 ) + END IF + 20 CONTINUE + END IF + GO TO 40 +* + 30 CONTINUE + INFO = J +* + 40 CONTINUE + RETURN +* +* End of DPOTF2 +* + END diff --git a/lib/linalg/dpotrf.f b/lib/linalg/dpotrf.f new file mode 100644 index 0000000000..3457230b56 --- /dev/null +++ b/lib/linalg/dpotrf.f @@ -0,0 +1,246 @@ +*> \brief \b DPOTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPOTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPOTRF( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPOTRF computes the Cholesky factorization of a real symmetric +*> positive definite matrix A. +*> +*> The factorization has the form +*> A = U**T * U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> +*> This is the block version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup doublePOcomputational +* +* ===================================================================== + SUBROUTINE DPOTRF( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DPOTF2, DSYRK, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPOTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'DPOTRF', UPLO, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + CALL DPOTF2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code. +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U**T*U. +* + DO 10 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + CALL DSYRK( 'Upper', 'Transpose', JB, J-1, -ONE, + $ A( 1, J ), LDA, ONE, A( J, J ), LDA ) + CALL DPOTF2( 'Upper', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + IF( J+JB.LE.N ) THEN +* +* Compute the current block row. +* + CALL DGEMM( 'Transpose', 'No transpose', JB, N-J-JB+1, + $ J-1, -ONE, A( 1, J ), LDA, A( 1, J+JB ), + $ LDA, ONE, A( J, J+JB ), LDA ) + CALL DTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', + $ JB, N-J-JB+1, ONE, A( J, J ), LDA, + $ A( J, J+JB ), LDA ) + END IF + 10 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization A = L*L**T. +* + DO 20 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + CALL DSYRK( 'Lower', 'No transpose', JB, J-1, -ONE, + $ A( J, 1 ), LDA, ONE, A( J, J ), LDA ) + CALL DPOTF2( 'Lower', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + IF( J+JB.LE.N ) THEN +* +* Compute the current block column. +* + CALL DGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ J-1, -ONE, A( J+JB, 1 ), LDA, A( J, 1 ), + $ LDA, ONE, A( J+JB, J ), LDA ) + CALL DTRSM( 'Right', 'Lower', 'Transpose', 'Non-unit', + $ N-J-JB+1, JB, ONE, A( J, J ), LDA, + $ A( J+JB, J ), LDA ) + END IF + 20 CONTINUE + END IF + END IF + GO TO 40 +* + 30 CONTINUE + INFO = INFO + J - 1 +* + 40 CONTINUE + RETURN +* +* End of DPOTRF +* + END diff --git a/lib/linalg/drot.f b/lib/linalg/drot.f new file mode 100644 index 0000000000..1615ef6a87 --- /dev/null +++ b/lib/linalg/drot.f @@ -0,0 +1,101 @@ +*> \brief \b DROT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE DROT(N,DX,INCX,DY,INCY,C,S) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION C,S +* INTEGER INCX,INCY,N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION DX(*),DY(*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DROT applies a plane rotation. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup double_blas_level1 +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> jack dongarra, linpack, 3/11/78. +*> modified 12/3/93, array(1) declarations changed to array(*) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DROT(N,DX,INCX,DY,INCY,C,S) +* +* -- Reference BLAS level1 routine (version 3.4.0) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + DOUBLE PRECISION C,S + INTEGER INCX,INCY,N +* .. +* .. Array Arguments .. + DOUBLE PRECISION DX(*),DY(*) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + DOUBLE PRECISION DTEMP + INTEGER I,IX,IY +* .. + IF (N.LE.0) RETURN + IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN +* +* code for both increments equal to 1 +* + DO I = 1,N + DTEMP = C*DX(I) + S*DY(I) + DY(I) = C*DY(I) - S*DX(I) + DX(I) = DTEMP + END DO + ELSE +* +* code for unequal increments or equal increments not equal +* to 1 +* + IX = 1 + IY = 1 + IF (INCX.LT.0) IX = (-N+1)*INCX + 1 + IF (INCY.LT.0) IY = (-N+1)*INCY + 1 + DO I = 1,N + DTEMP = C*DX(IX) + S*DY(IY) + DY(IY) = C*DY(IY) - S*DX(IX) + DX(IX) = DTEMP + IX = IX + INCX + IY = IY + INCY + END DO + END IF + RETURN + END diff --git a/lib/linalg/dstedc.f b/lib/linalg/dstedc.f new file mode 100644 index 0000000000..b59e1c3bbd --- /dev/null +++ b/lib/linalg/dstedc.f @@ -0,0 +1,489 @@ +*> \brief \b DSTEBZ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSTEDC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, IWORK, +* LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPZ +* INTEGER INFO, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSTEDC computes all eigenvalues and, optionally, eigenvectors of a +*> symmetric tridiagonal matrix using the divide and conquer method. +*> The eigenvectors of a full or band real symmetric matrix can also be +*> found if DSYTRD or DSPTRD or DSBTRD has been used to reduce this +*> matrix to tridiagonal form. +*> +*> This code makes very mild assumptions about floating point +*> arithmetic. It will work on machines with a guard digit in +*> add/subtract, or on those binary machines without guard digits +*> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. +*> It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. See DLAED3 for details. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only. +*> = 'I': Compute eigenvectors of tridiagonal matrix also. +*> = 'V': Compute eigenvectors of original dense symmetric +*> matrix also. On entry, Z contains the orthogonal +*> matrix used to reduce the original matrix to +*> tridiagonal form. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the diagonal elements of the tridiagonal matrix. +*> On exit, if INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> On entry, the subdiagonal elements of the tridiagonal matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ,N) +*> On entry, if COMPZ = 'V', then Z contains the orthogonal +*> matrix used in the reduction to tridiagonal form. +*> On exit, if INFO = 0, then if COMPZ = 'V', Z contains the +*> orthonormal eigenvectors of the original symmetric matrix, +*> and if COMPZ = 'I', Z contains the orthonormal eigenvectors +*> of the symmetric tridiagonal matrix. +*> If COMPZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1. +*> If eigenvectors are desired, then LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, +*> dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If COMPZ = 'N' or N <= 1 then LWORK must be at least 1. +*> If COMPZ = 'V' and N > 1 then LWORK must be at least +*> ( 1 + 3*N + 2*N*lg N + 4*N**2 ), +*> where lg( N ) = smallest integer k such +*> that 2**k >= N. +*> If COMPZ = 'I' and N > 1 then LWORK must be at least +*> ( 1 + 4*N + N**2 ). +*> Note that for COMPZ = 'I' or 'V', then if N is less than or +*> equal to the minimum divide size, usually 25, then LWORK need +*> only be max(1,2*(N-1)). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If COMPZ = 'N' or N <= 1 then LIWORK must be at least 1. +*> If COMPZ = 'V' and N > 1 then LIWORK must be at least +*> ( 6 + 6*N + 5*N*lg N ). +*> If COMPZ = 'I' and N > 1 then LIWORK must be at least +*> ( 3 + 5*N ). +*> Note that for COMPZ = 'I' or 'V', then if N is less than or +*> equal to the minimum divide size, usually 25, then LIWORK +*> need only be 1. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the IWORK array, +*> returns this value as the first entry of the IWORK array, and +*> no error message related to LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: The algorithm failed to compute an eigenvalue while +*> working on the submatrix lying in rows and columns +*> INFO/(N+1) through mod(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA \n +*> Modified by Francoise Tisseur, University of Tennessee +*> +* ===================================================================== + SUBROUTINE DSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER FINISH, I, ICOMPZ, II, J, K, LGN, LIWMIN, + $ LWMIN, M, SMLSIZ, START, STOREZ, STRTRW + DOUBLE PRECISION EPS, ORGNRM, P, TINY +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANST + EXTERNAL LSAME, ILAENV, DLAMCH, DLANST +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DLACPY, DLAED0, DLASCL, DLASET, DLASRT, + $ DSTEQR, DSTERF, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, INT, LOG, MAX, MOD, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. + $ ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, N ) ) ) THEN + INFO = -6 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + SMLSIZ = ILAENV( 9, 'DSTEDC', ' ', 0, 0, 0, 0 ) + IF( N.LE.1 .OR. ICOMPZ.EQ.0 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE IF( N.LE.SMLSIZ ) THEN + LIWMIN = 1 + LWMIN = 2*( N - 1 ) + ELSE + LGN = INT( LOG( DBLE( N ) )/LOG( TWO ) ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( ICOMPZ.EQ.1 ) THEN + LWMIN = 1 + 3*N + 2*N*LGN + 4*N**2 + LIWMIN = 6 + 6*N + 5*N*LGN + ELSE IF( ICOMPZ.EQ.2 ) THEN + LWMIN = 1 + 4*N + N**2 + LIWMIN = 3 + 5*N + END IF + END IF + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT. LQUERY ) THEN + INFO = -8 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT. LQUERY ) THEN + INFO = -10 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSTEDC', -INFO ) + RETURN + ELSE IF (LQUERY) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( N.EQ.1 ) THEN + IF( ICOMPZ.NE.0 ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* If the following conditional clause is removed, then the routine +* will use the Divide and Conquer routine to compute only the +* eigenvalues, which requires (3N + 3N**2) real workspace and +* (2 + 5N + 2N lg(N)) integer workspace. +* Since on many architectures DSTERF is much faster than any other +* algorithm for finding eigenvalues only, it is used here +* as the default. If the conditional clause is removed, then +* information on the size of workspace needs to be changed. +* +* If COMPZ = 'N', use DSTERF to compute the eigenvalues. +* + IF( ICOMPZ.EQ.0 ) THEN + CALL DSTERF( N, D, E, INFO ) + GO TO 50 + END IF +* +* If N is smaller than the minimum divide size (SMLSIZ+1), then +* solve the problem with another solver. +* + IF( N.LE.SMLSIZ ) THEN +* + CALL DSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* + ELSE +* +* If COMPZ = 'V', the Z matrix must be stored elsewhere for later +* use. +* + IF( ICOMPZ.EQ.1 ) THEN + STOREZ = 1 + N*N + ELSE + STOREZ = 1 + END IF +* + IF( ICOMPZ.EQ.2 ) THEN + CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) + END IF +* +* Scale. +* + ORGNRM = DLANST( 'M', N, D, E ) + IF( ORGNRM.EQ.ZERO ) + $ GO TO 50 +* + EPS = DLAMCH( 'Epsilon' ) +* + START = 1 +* +* while ( START <= N ) +* + 10 CONTINUE + IF( START.LE.N ) THEN +* +* Let FINISH be the position of the next subdiagonal entry +* such that E( FINISH ) <= TINY or FINISH = N if no such +* subdiagonal exists. The matrix identified by the elements +* between START and FINISH constitutes an independent +* sub-problem. +* + FINISH = START + 20 CONTINUE + IF( FINISH.LT.N ) THEN + TINY = EPS*SQRT( ABS( D( FINISH ) ) )* + $ SQRT( ABS( D( FINISH+1 ) ) ) + IF( ABS( E( FINISH ) ).GT.TINY ) THEN + FINISH = FINISH + 1 + GO TO 20 + END IF + END IF +* +* (Sub) Problem determined. Compute its size and solve it. +* + M = FINISH - START + 1 + IF( M.EQ.1 ) THEN + START = FINISH + 1 + GO TO 10 + END IF + IF( M.GT.SMLSIZ ) THEN +* +* Scale. +* + ORGNRM = DLANST( 'M', M, D( START ), E( START ) ) + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, M, 1, D( START ), M, + $ INFO ) + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, M-1, 1, E( START ), + $ M-1, INFO ) +* + IF( ICOMPZ.EQ.1 ) THEN + STRTRW = 1 + ELSE + STRTRW = START + END IF + CALL DLAED0( ICOMPZ, N, M, D( START ), E( START ), + $ Z( STRTRW, START ), LDZ, WORK( 1 ), N, + $ WORK( STOREZ ), IWORK, INFO ) + IF( INFO.NE.0 ) THEN + INFO = ( INFO / ( M+1 )+START-1 )*( N+1 ) + + $ MOD( INFO, ( M+1 ) ) + START - 1 + GO TO 50 + END IF +* +* Scale back. +* + CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, M, 1, D( START ), M, + $ INFO ) +* + ELSE + IF( ICOMPZ.EQ.1 ) THEN +* +* Since QR won't update a Z matrix which is larger than +* the length of D, we must solve the sub-problem in a +* workspace and then multiply back into Z. +* + CALL DSTEQR( 'I', M, D( START ), E( START ), WORK, M, + $ WORK( M*M+1 ), INFO ) + CALL DLACPY( 'A', N, M, Z( 1, START ), LDZ, + $ WORK( STOREZ ), N ) + CALL DGEMM( 'N', 'N', N, M, M, ONE, + $ WORK( STOREZ ), N, WORK, M, ZERO, + $ Z( 1, START ), LDZ ) + ELSE IF( ICOMPZ.EQ.2 ) THEN + CALL DSTEQR( 'I', M, D( START ), E( START ), + $ Z( START, START ), LDZ, WORK, INFO ) + ELSE + CALL DSTERF( M, D( START ), E( START ), INFO ) + END IF + IF( INFO.NE.0 ) THEN + INFO = START*( N+1 ) + FINISH + GO TO 50 + END IF + END IF +* + START = FINISH + 1 + GO TO 10 + END IF +* +* endwhile +* +* If the problem split any number of times, then the eigenvalues +* will not be properly ordered. Here we permute the eigenvalues +* (and the associated eigenvectors) into ascending order. +* + IF( M.NE.N ) THEN + IF( ICOMPZ.EQ.0 ) THEN +* +* Use Quick Sort +* + CALL DLASRT( 'I', N, D, INFO ) +* + ELSE +* +* Use Selection Sort to minimize swaps of eigenvectors +* + DO 40 II = 2, N + I = II - 1 + K = I + P = D( I ) + DO 30 J = II, N + IF( D( J ).LT.P ) THEN + K = J + P = D( J ) + END IF + 30 CONTINUE + IF( K.NE.I ) THEN + D( K ) = D( I ) + D( I ) = P + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 ) + END IF + 40 CONTINUE + END IF + END IF + END IF +* + 50 CONTINUE + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of DSTEDC +* + END diff --git a/lib/linalg/dsteqr.f b/lib/linalg/dsteqr.f new file mode 100644 index 0000000000..9e165bb6bb --- /dev/null +++ b/lib/linalg/dsteqr.f @@ -0,0 +1,572 @@ +*> \brief \b DSTEQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSTEQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPZ +* INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSTEQR computes all eigenvalues and, optionally, eigenvectors of a +*> symmetric tridiagonal matrix using the implicit QL or QR method. +*> The eigenvectors of a full or band symmetric matrix can also be found +*> if DSYTRD or DSPTRD or DSBTRD has been used to reduce this matrix to +*> tridiagonal form. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only. +*> = 'V': Compute eigenvalues and eigenvectors of the original +*> symmetric matrix. On entry, Z must contain the +*> orthogonal matrix used to reduce the original matrix +*> to tridiagonal form. +*> = 'I': Compute eigenvalues and eigenvectors of the +*> tridiagonal matrix. Z is initialized to the identity +*> matrix. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the diagonal elements of the tridiagonal matrix. +*> On exit, if INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, N) +*> On entry, if COMPZ = 'V', then Z contains the orthogonal +*> matrix used in the reduction to tridiagonal form. +*> On exit, if INFO = 0, then if COMPZ = 'V', Z contains the +*> orthonormal eigenvectors of the original symmetric matrix, +*> and if COMPZ = 'I', Z contains the orthonormal eigenvectors +*> of the symmetric tridiagonal matrix. +*> If COMPZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> eigenvectors are desired, then LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (max(1,2*N-2)) +*> If COMPZ = 'N', then WORK is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: the algorithm has failed to find all the eigenvalues in +*> a total of 30*N iterations; if INFO = i, then i +*> elements of E have not converged to zero; on exit, D +*> and E contain the elements of a symmetric tridiagonal +*> matrix which is orthogonally similar to the original +*> matrix. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE DSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, THREE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ THREE = 3.0D0 ) + INTEGER MAXIT + PARAMETER ( MAXIT = 30 ) +* .. +* .. Local Scalars .. + INTEGER I, ICOMPZ, II, ISCALE, J, JTOT, K, L, L1, LEND, + $ LENDM1, LENDP1, LENDSV, LM1, LSV, M, MM, MM1, + $ NM1, NMAXIT + DOUBLE PRECISION ANORM, B, C, EPS, EPS2, F, G, P, R, RT1, RT2, + $ S, SAFMAX, SAFMIN, SSFMAX, SSFMIN, TST +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANST, DLAPY2 + EXTERNAL LSAME, DLAMCH, DLANST, DLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL DLAE2, DLAEV2, DLARTG, DLASCL, DLASET, DLASR, + $ DLASRT, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, + $ N ) ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSTEQR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ICOMPZ.EQ.2 ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Determine the unit roundoff and over/underflow thresholds. +* + EPS = DLAMCH( 'E' ) + EPS2 = EPS**2 + SAFMIN = DLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + SSFMAX = SQRT( SAFMAX ) / THREE + SSFMIN = SQRT( SAFMIN ) / EPS2 +* +* Compute the eigenvalues and eigenvectors of the tridiagonal +* matrix. +* + IF( ICOMPZ.EQ.2 ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) +* + NMAXIT = N*MAXIT + JTOT = 0 +* +* Determine where the matrix splits and choose QL or QR iteration +* for each block, according to whether top or bottom diagonal +* element is smaller. +* + L1 = 1 + NM1 = N - 1 +* + 10 CONTINUE + IF( L1.GT.N ) + $ GO TO 160 + IF( L1.GT.1 ) + $ E( L1-1 ) = ZERO + IF( L1.LE.NM1 ) THEN + DO 20 M = L1, NM1 + TST = ABS( E( M ) ) + IF( TST.EQ.ZERO ) + $ GO TO 30 + IF( TST.LE.( SQRT( ABS( D( M ) ) )*SQRT( ABS( D( M+ + $ 1 ) ) ) )*EPS ) THEN + E( M ) = ZERO + GO TO 30 + END IF + 20 CONTINUE + END IF + M = N +* + 30 CONTINUE + L = L1 + LSV = L + LEND = M + LENDSV = LEND + L1 = M + 1 + IF( LEND.EQ.L ) + $ GO TO 10 +* +* Scale submatrix in rows and columns L to LEND +* + ANORM = DLANST( 'M', LEND-L+1, D( L ), E( L ) ) + ISCALE = 0 + IF( ANORM.EQ.ZERO ) + $ GO TO 10 + IF( ANORM.GT.SSFMAX ) THEN + ISCALE = 1 + CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N, + $ INFO ) + ELSE IF( ANORM.LT.SSFMIN ) THEN + ISCALE = 2 + CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N, + $ INFO ) + END IF +* +* Choose between QL and QR iteration +* + IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN + LEND = LSV + L = LENDSV + END IF +* + IF( LEND.GT.L ) THEN +* +* QL Iteration +* +* Look for small subdiagonal element. +* + 40 CONTINUE + IF( L.NE.LEND ) THEN + LENDM1 = LEND - 1 + DO 50 M = L, LENDM1 + TST = ABS( E( M ) )**2 + IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M+1 ) )+ + $ SAFMIN )GO TO 60 + 50 CONTINUE + END IF +* + M = LEND +* + 60 CONTINUE + IF( M.LT.LEND ) + $ E( M ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 80 +* +* If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 +* to compute its eigensystem. +* + IF( M.EQ.L+1 ) THEN + IF( ICOMPZ.GT.0 ) THEN + CALL DLAEV2( D( L ), E( L ), D( L+1 ), RT1, RT2, C, S ) + WORK( L ) = C + WORK( N-1+L ) = S + CALL DLASR( 'R', 'V', 'B', N, 2, WORK( L ), + $ WORK( N-1+L ), Z( 1, L ), LDZ ) + ELSE + CALL DLAE2( D( L ), E( L ), D( L+1 ), RT1, RT2 ) + END IF + D( L ) = RT1 + D( L+1 ) = RT2 + E( L ) = ZERO + L = L + 2 + IF( L.LE.LEND ) + $ GO TO 40 + GO TO 140 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 140 + JTOT = JTOT + 1 +* +* Form shift. +* + G = ( D( L+1 )-P ) / ( TWO*E( L ) ) + R = DLAPY2( G, ONE ) + G = D( M ) - P + ( E( L ) / ( G+SIGN( R, G ) ) ) +* + S = ONE + C = ONE + P = ZERO +* +* Inner loop +* + MM1 = M - 1 + DO 70 I = MM1, L, -1 + F = S*E( I ) + B = C*E( I ) + CALL DLARTG( G, F, C, S, R ) + IF( I.NE.M-1 ) + $ E( I+1 ) = R + G = D( I+1 ) - P + R = ( D( I )-G )*S + TWO*C*B + P = S*R + D( I+1 ) = G + P + G = C*R - B +* +* If eigenvectors are desired, then save rotations. +* + IF( ICOMPZ.GT.0 ) THEN + WORK( I ) = C + WORK( N-1+I ) = -S + END IF +* + 70 CONTINUE +* +* If eigenvectors are desired, then apply saved rotations. +* + IF( ICOMPZ.GT.0 ) THEN + MM = M - L + 1 + CALL DLASR( 'R', 'V', 'B', N, MM, WORK( L ), WORK( N-1+L ), + $ Z( 1, L ), LDZ ) + END IF +* + D( L ) = D( L ) - P + E( L ) = G + GO TO 40 +* +* Eigenvalue found. +* + 80 CONTINUE + D( L ) = P +* + L = L + 1 + IF( L.LE.LEND ) + $ GO TO 40 + GO TO 140 +* + ELSE +* +* QR Iteration +* +* Look for small superdiagonal element. +* + 90 CONTINUE + IF( L.NE.LEND ) THEN + LENDP1 = LEND + 1 + DO 100 M = L, LENDP1, -1 + TST = ABS( E( M-1 ) )**2 + IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M-1 ) )+ + $ SAFMIN )GO TO 110 + 100 CONTINUE + END IF +* + M = LEND +* + 110 CONTINUE + IF( M.GT.LEND ) + $ E( M-1 ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 130 +* +* If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 +* to compute its eigensystem. +* + IF( M.EQ.L-1 ) THEN + IF( ICOMPZ.GT.0 ) THEN + CALL DLAEV2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2, C, S ) + WORK( M ) = C + WORK( N-1+M ) = S + CALL DLASR( 'R', 'V', 'F', N, 2, WORK( M ), + $ WORK( N-1+M ), Z( 1, L-1 ), LDZ ) + ELSE + CALL DLAE2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2 ) + END IF + D( L-1 ) = RT1 + D( L ) = RT2 + E( L-1 ) = ZERO + L = L - 2 + IF( L.GE.LEND ) + $ GO TO 90 + GO TO 140 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 140 + JTOT = JTOT + 1 +* +* Form shift. +* + G = ( D( L-1 )-P ) / ( TWO*E( L-1 ) ) + R = DLAPY2( G, ONE ) + G = D( M ) - P + ( E( L-1 ) / ( G+SIGN( R, G ) ) ) +* + S = ONE + C = ONE + P = ZERO +* +* Inner loop +* + LM1 = L - 1 + DO 120 I = M, LM1 + F = S*E( I ) + B = C*E( I ) + CALL DLARTG( G, F, C, S, R ) + IF( I.NE.M ) + $ E( I-1 ) = R + G = D( I ) - P + R = ( D( I+1 )-G )*S + TWO*C*B + P = S*R + D( I ) = G + P + G = C*R - B +* +* If eigenvectors are desired, then save rotations. +* + IF( ICOMPZ.GT.0 ) THEN + WORK( I ) = C + WORK( N-1+I ) = S + END IF +* + 120 CONTINUE +* +* If eigenvectors are desired, then apply saved rotations. +* + IF( ICOMPZ.GT.0 ) THEN + MM = L - M + 1 + CALL DLASR( 'R', 'V', 'F', N, MM, WORK( M ), WORK( N-1+M ), + $ Z( 1, M ), LDZ ) + END IF +* + D( L ) = D( L ) - P + E( LM1 ) = G + GO TO 90 +* +* Eigenvalue found. +* + 130 CONTINUE + D( L ) = P +* + L = L - 1 + IF( L.GE.LEND ) + $ GO TO 90 + GO TO 140 +* + END IF +* +* Undo scaling if necessary +* + 140 CONTINUE + IF( ISCALE.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV, 1, E( LSV ), + $ N, INFO ) + ELSE IF( ISCALE.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV, 1, E( LSV ), + $ N, INFO ) + END IF +* +* Check for no convergence to an eigenvalue after a total +* of N*MAXIT iterations. +* + IF( JTOT.LT.NMAXIT ) + $ GO TO 10 + DO 150 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 150 CONTINUE + GO TO 190 +* +* Order eigenvalues and eigenvectors. +* + 160 CONTINUE + IF( ICOMPZ.EQ.0 ) THEN +* +* Use Quick Sort +* + CALL DLASRT( 'I', N, D, INFO ) +* + ELSE +* +* Use Selection Sort to minimize swaps of eigenvectors +* + DO 180 II = 2, N + I = II - 1 + K = I + P = D( I ) + DO 170 J = II, N + IF( D( J ).LT.P ) THEN + K = J + P = D( J ) + END IF + 170 CONTINUE + IF( K.NE.I ) THEN + D( K ) = D( I ) + D( I ) = P + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 ) + END IF + 180 CONTINUE + END IF +* + 190 CONTINUE + RETURN +* +* End of DSTEQR +* + END diff --git a/lib/linalg/dsterf.f b/lib/linalg/dsterf.f new file mode 100644 index 0000000000..b93cc13dd6 --- /dev/null +++ b/lib/linalg/dsterf.f @@ -0,0 +1,426 @@ +*> \brief \b DSTERF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSTERF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSTERF( N, D, E, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSTERF computes all eigenvalues of a symmetric tridiagonal matrix +*> using the Pal-Walker-Kahan variant of the QL or QR algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the n diagonal elements of the tridiagonal matrix. +*> On exit, if INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: the algorithm failed to find all of the eigenvalues in +*> a total of 30*N iterations; if INFO = i, then i +*> elements of E have not converged to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE DSTERF( N, D, E, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, THREE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ THREE = 3.0D0 ) + INTEGER MAXIT + PARAMETER ( MAXIT = 30 ) +* .. +* .. Local Scalars .. + INTEGER I, ISCALE, JTOT, L, L1, LEND, LENDSV, LSV, M, + $ NMAXIT + DOUBLE PRECISION ALPHA, ANORM, BB, C, EPS, EPS2, GAMMA, OLDC, + $ OLDGAM, P, R, RT1, RT2, RTE, S, SAFMAX, SAFMIN, + $ SIGMA, SSFMAX, SSFMIN, RMAX +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLANST, DLAPY2 + EXTERNAL DLAMCH, DLANST, DLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL DLAE2, DLASCL, DLASRT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'DSTERF', -INFO ) + RETURN + END IF + IF( N.LE.1 ) + $ RETURN +* +* Determine the unit roundoff for this environment. +* + EPS = DLAMCH( 'E' ) + EPS2 = EPS**2 + SAFMIN = DLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + SSFMAX = SQRT( SAFMAX ) / THREE + SSFMIN = SQRT( SAFMIN ) / EPS2 + RMAX = DLAMCH( 'O' ) +* +* Compute the eigenvalues of the tridiagonal matrix. +* + NMAXIT = N*MAXIT + SIGMA = ZERO + JTOT = 0 +* +* Determine where the matrix splits and choose QL or QR iteration +* for each block, according to whether top or bottom diagonal +* element is smaller. +* + L1 = 1 +* + 10 CONTINUE + IF( L1.GT.N ) + $ GO TO 170 + IF( L1.GT.1 ) + $ E( L1-1 ) = ZERO + DO 20 M = L1, N - 1 + IF( ABS( E( M ) ).LE.( SQRT( ABS( D( M ) ) )*SQRT( ABS( D( M+ + $ 1 ) ) ) )*EPS ) THEN + E( M ) = ZERO + GO TO 30 + END IF + 20 CONTINUE + M = N +* + 30 CONTINUE + L = L1 + LSV = L + LEND = M + LENDSV = LEND + L1 = M + 1 + IF( LEND.EQ.L ) + $ GO TO 10 +* +* Scale submatrix in rows and columns L to LEND +* + ANORM = DLANST( 'M', LEND-L+1, D( L ), E( L ) ) + ISCALE = 0 + IF( ANORM.EQ.ZERO ) + $ GO TO 10 + IF( (ANORM.GT.SSFMAX) ) THEN + ISCALE = 1 + CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N, + $ INFO ) + ELSE IF( ANORM.LT.SSFMIN ) THEN + ISCALE = 2 + CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N, + $ INFO ) + END IF +* + DO 40 I = L, LEND - 1 + E( I ) = E( I )**2 + 40 CONTINUE +* +* Choose between QL and QR iteration +* + IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN + LEND = LSV + L = LENDSV + END IF +* + IF( LEND.GE.L ) THEN +* +* QL Iteration +* +* Look for small subdiagonal element. +* + 50 CONTINUE + IF( L.NE.LEND ) THEN + DO 60 M = L, LEND - 1 + IF( ABS( E( M ) ).LE.EPS2*ABS( D( M )*D( M+1 ) ) ) + $ GO TO 70 + 60 CONTINUE + END IF + M = LEND +* + 70 CONTINUE + IF( M.LT.LEND ) + $ E( M ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 90 +* +* If remaining matrix is 2 by 2, use DLAE2 to compute its +* eigenvalues. +* + IF( M.EQ.L+1 ) THEN + RTE = SQRT( E( L ) ) + CALL DLAE2( D( L ), RTE, D( L+1 ), RT1, RT2 ) + D( L ) = RT1 + D( L+1 ) = RT2 + E( L ) = ZERO + L = L + 2 + IF( L.LE.LEND ) + $ GO TO 50 + GO TO 150 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 150 + JTOT = JTOT + 1 +* +* Form shift. +* + RTE = SQRT( E( L ) ) + SIGMA = ( D( L+1 )-P ) / ( TWO*RTE ) + R = DLAPY2( SIGMA, ONE ) + SIGMA = P - ( RTE / ( SIGMA+SIGN( R, SIGMA ) ) ) +* + C = ONE + S = ZERO + GAMMA = D( M ) - SIGMA + P = GAMMA*GAMMA +* +* Inner loop +* + DO 80 I = M - 1, L, -1 + BB = E( I ) + R = P + BB + IF( I.NE.M-1 ) + $ E( I+1 ) = S*R + OLDC = C + C = P / R + S = BB / R + OLDGAM = GAMMA + ALPHA = D( I ) + GAMMA = C*( ALPHA-SIGMA ) - S*OLDGAM + D( I+1 ) = OLDGAM + ( ALPHA-GAMMA ) + IF( C.NE.ZERO ) THEN + P = ( GAMMA*GAMMA ) / C + ELSE + P = OLDC*BB + END IF + 80 CONTINUE +* + E( L ) = S*P + D( L ) = SIGMA + GAMMA + GO TO 50 +* +* Eigenvalue found. +* + 90 CONTINUE + D( L ) = P +* + L = L + 1 + IF( L.LE.LEND ) + $ GO TO 50 + GO TO 150 +* + ELSE +* +* QR Iteration +* +* Look for small superdiagonal element. +* + 100 CONTINUE + DO 110 M = L, LEND + 1, -1 + IF( ABS( E( M-1 ) ).LE.EPS2*ABS( D( M )*D( M-1 ) ) ) + $ GO TO 120 + 110 CONTINUE + M = LEND +* + 120 CONTINUE + IF( M.GT.LEND ) + $ E( M-1 ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 140 +* +* If remaining matrix is 2 by 2, use DLAE2 to compute its +* eigenvalues. +* + IF( M.EQ.L-1 ) THEN + RTE = SQRT( E( L-1 ) ) + CALL DLAE2( D( L ), RTE, D( L-1 ), RT1, RT2 ) + D( L ) = RT1 + D( L-1 ) = RT2 + E( L-1 ) = ZERO + L = L - 2 + IF( L.GE.LEND ) + $ GO TO 100 + GO TO 150 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 150 + JTOT = JTOT + 1 +* +* Form shift. +* + RTE = SQRT( E( L-1 ) ) + SIGMA = ( D( L-1 )-P ) / ( TWO*RTE ) + R = DLAPY2( SIGMA, ONE ) + SIGMA = P - ( RTE / ( SIGMA+SIGN( R, SIGMA ) ) ) +* + C = ONE + S = ZERO + GAMMA = D( M ) - SIGMA + P = GAMMA*GAMMA +* +* Inner loop +* + DO 130 I = M, L - 1 + BB = E( I ) + R = P + BB + IF( I.NE.M ) + $ E( I-1 ) = S*R + OLDC = C + C = P / R + S = BB / R + OLDGAM = GAMMA + ALPHA = D( I+1 ) + GAMMA = C*( ALPHA-SIGMA ) - S*OLDGAM + D( I ) = OLDGAM + ( ALPHA-GAMMA ) + IF( C.NE.ZERO ) THEN + P = ( GAMMA*GAMMA ) / C + ELSE + P = OLDC*BB + END IF + 130 CONTINUE +* + E( L-1 ) = S*P + D( L ) = SIGMA + GAMMA + GO TO 100 +* +* Eigenvalue found. +* + 140 CONTINUE + D( L ) = P +* + L = L - 1 + IF( L.GE.LEND ) + $ GO TO 100 + GO TO 150 +* + END IF +* +* Undo scaling if necessary +* + 150 CONTINUE + IF( ISCALE.EQ.1 ) + $ CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + IF( ISCALE.EQ.2 ) + $ CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) +* +* Check for no convergence to an eigenvalue after a total +* of N*MAXIT iterations. +* + IF( JTOT.LT.NMAXIT ) + $ GO TO 10 + DO 160 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 160 CONTINUE + GO TO 180 +* +* Sort eigenvalues in increasing order. +* + 170 CONTINUE + CALL DLASRT( 'I', N, D, INFO ) +* + 180 CONTINUE + RETURN +* +* End of DSTERF +* + END diff --git a/lib/linalg/dsyevd.f b/lib/linalg/dsyevd.f new file mode 100644 index 0000000000..3c9545ac31 --- /dev/null +++ b/lib/linalg/dsyevd.f @@ -0,0 +1,357 @@ +*> \brief DSYEVD computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYEVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, IWORK, +* LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYEVD computes all eigenvalues and, optionally, eigenvectors of a +*> real symmetric matrix A. If eigenvectors are desired, it uses a +*> divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> +*> Because of large use of BLAS of level 3, DSYEVD needs N**2 more +*> workspace than DSYEVX. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, +*> dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N <= 1, LWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LWORK must be at least 2*N+1. +*> If JOBZ = 'V' and N > 1, LWORK must be at least +*> 1 + 6*N + 2*N**2. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i and JOBZ = 'N', then the algorithm failed +*> to converge; i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> if INFO = i and JOBZ = 'V', then the algorithm failed +*> to compute an eigenvalue while working on the submatrix +*> lying in rows and columns INFO/(N+1) through +*> mod(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup doubleSYeigen +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA \n +*> Modified by Francoise Tisseur, University of Tennessee \n +*> Modified description of INFO. Sven, 16 Feb 05. \n + + +*> +* ===================================================================== + SUBROUTINE DSYEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. +* + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, INDE, INDTAU, INDWK2, INDWRK, ISCALE, + $ LIOPT, LIWMIN, LLWORK, LLWRK2, LOPT, LWMIN + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANSY + EXTERNAL LSAME, DLAMCH, DLANSY, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DLACPY, DLASCL, DORMTR, DSCAL, DSTEDC, DSTERF, + $ DSYTRD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + LOPT = LWMIN + LIOPT = LIWMIN + ELSE + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 6*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + 1 + END IF + LOPT = MAX( LWMIN, 2*N + + $ ILAENV( 1, 'DSYTRD', UPLO, N, -1, -1, -1 ) ) + LIOPT = LIWMIN + END IF + WORK( 1 ) = LOPT + IWORK( 1 ) = LIOPT +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + IF( WANTZ ) + $ A( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = DLANSY( 'M', UPLO, N, A, LDA, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL DLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call DSYTRD to reduce symmetric matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + INDWRK = INDTAU + N + LLWORK = LWORK - INDWRK + 1 + INDWK2 = INDWRK + N*N + LLWRK2 = LWORK - INDWK2 + 1 +* + CALL DSYTRD( UPLO, N, A, LDA, W, WORK( INDE ), WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the +* tridiagonal matrix, then call DORMTR to multiply it by the +* Householder transformations stored in A. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL DSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO ) + CALL DORMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ), + $ WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO ) + CALL DLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) + $ CALL DSCAL( N, ONE / SIGMA, W, 1 ) +* + WORK( 1 ) = LOPT + IWORK( 1 ) = LIOPT +* + RETURN +* +* End of DSYEVD +* + END diff --git a/lib/linalg/dsygs2.f b/lib/linalg/dsygs2.f new file mode 100644 index 0000000000..644dcfff1b --- /dev/null +++ b/lib/linalg/dsygs2.f @@ -0,0 +1,283 @@ +*> \brief \b DSYGS2 reduces a symmetric definite generalized eigenproblem to standard form, using the factorization results obtained from spotrf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYGS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYGS2 reduces a real symmetric-definite generalized eigenproblem +*> to standard form. +*> +*> If ITYPE = 1, the problem is A*x = lambda*B*x, +*> and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) +*> +*> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +*> B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T *A*L. +*> +*> B must have been previously factorized as U**T *U or L*L**T by DPOTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); +*> = 2 or 3: compute U*A*U**T or L**T *A*L. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored, and how B has been factorized. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n by n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the transformed matrix, stored in the +*> same format as A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> The triangular factor from the Cholesky factorization of B, +*> as returned by DPOTRF. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup doubleSYcomputational +* +* ===================================================================== + SUBROUTINE DSYGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, HALF + PARAMETER ( ONE = 1.0D0, HALF = 0.5D0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K + DOUBLE PRECISION AKK, BKK, CT +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DSCAL, DSYR2, DTRMV, DTRSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYGS2', -INFO ) + RETURN + END IF +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U**T)*A*inv(U) +* + DO 10 K = 1, N +* +* Update the upper triangle of A(k:n,k:n) +* + AKK = A( K, K ) + BKK = B( K, K ) + AKK = AKK / BKK**2 + A( K, K ) = AKK + IF( K.LT.N ) THEN + CALL DSCAL( N-K, ONE / BKK, A( K, K+1 ), LDA ) + CT = -HALF*AKK + CALL DAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ), + $ LDA ) + CALL DSYR2( UPLO, N-K, -ONE, A( K, K+1 ), LDA, + $ B( K, K+1 ), LDB, A( K+1, K+1 ), LDA ) + CALL DAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ), + $ LDA ) + CALL DTRSV( UPLO, 'Transpose', 'Non-unit', N-K, + $ B( K+1, K+1 ), LDB, A( K, K+1 ), LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L**T) +* + DO 20 K = 1, N +* +* Update the lower triangle of A(k:n,k:n) +* + AKK = A( K, K ) + BKK = B( K, K ) + AKK = AKK / BKK**2 + A( K, K ) = AKK + IF( K.LT.N ) THEN + CALL DSCAL( N-K, ONE / BKK, A( K+1, K ), 1 ) + CT = -HALF*AKK + CALL DAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 ) + CALL DSYR2( UPLO, N-K, -ONE, A( K+1, K ), 1, + $ B( K+1, K ), 1, A( K+1, K+1 ), LDA ) + CALL DAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 ) + CALL DTRSV( UPLO, 'No transpose', 'Non-unit', N-K, + $ B( K+1, K+1 ), LDB, A( K+1, K ), 1 ) + END IF + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U**T +* + DO 30 K = 1, N +* +* Update the upper triangle of A(1:k,1:k) +* + AKK = A( K, K ) + BKK = B( K, K ) + CALL DTRMV( UPLO, 'No transpose', 'Non-unit', K-1, B, + $ LDB, A( 1, K ), 1 ) + CT = HALF*AKK + CALL DAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 ) + CALL DSYR2( UPLO, K-1, ONE, A( 1, K ), 1, B( 1, K ), 1, + $ A, LDA ) + CALL DAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 ) + CALL DSCAL( K-1, BKK, A( 1, K ), 1 ) + A( K, K ) = AKK*BKK**2 + 30 CONTINUE + ELSE +* +* Compute L**T *A*L +* + DO 40 K = 1, N +* +* Update the lower triangle of A(1:k,1:k) +* + AKK = A( K, K ) + BKK = B( K, K ) + CALL DTRMV( UPLO, 'Transpose', 'Non-unit', K-1, B, LDB, + $ A( K, 1 ), LDA ) + CT = HALF*AKK + CALL DAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA ) + CALL DSYR2( UPLO, K-1, ONE, A( K, 1 ), LDA, B( K, 1 ), + $ LDB, A, LDA ) + CALL DAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA ) + CALL DSCAL( K-1, BKK, A( K, 1 ), LDA ) + A( K, K ) = AKK*BKK**2 + 40 CONTINUE + END IF + END IF + RETURN +* +* End of DSYGS2 +* + END diff --git a/lib/linalg/dsygst.f b/lib/linalg/dsygst.f new file mode 100644 index 0000000000..f1d5311c9a --- /dev/null +++ b/lib/linalg/dsygst.f @@ -0,0 +1,321 @@ +*> \brief \b DSYGST +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYGST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYGST reduces a real symmetric-definite generalized eigenproblem +*> to standard form. +*> +*> If ITYPE = 1, the problem is A*x = lambda*B*x, +*> and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) +*> +*> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +*> B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. +*> +*> B must have been previously factorized as U**T*U or L*L**T by DPOTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); +*> = 2 or 3: compute U*A*U**T or L**T*A*L. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored and B is factored as +*> U**T*U; +*> = 'L': Lower triangle of A is stored and B is factored as +*> L*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the transformed matrix, stored in the +*> same format as A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> The triangular factor from the Cholesky factorization of B, +*> as returned by DPOTRF. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup doubleSYcomputational +* +* ===================================================================== + SUBROUTINE DSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, HALF + PARAMETER ( ONE = 1.0D0, HALF = 0.5D0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K, KB, NB +* .. +* .. External Subroutines .. + EXTERNAL DSYGS2, DSYMM, DSYR2K, DTRMM, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYGST', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'DSYGST', UPLO, N, -1, -1, -1 ) +* + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL DSYGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + ELSE +* +* Use blocked code +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U**T)*A*inv(U) +* + DO 10 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the upper triangle of A(k:n,k:n) +* + CALL DSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + IF( K+KB.LE.N ) THEN + CALL DTRSM( 'Left', UPLO, 'Transpose', 'Non-unit', + $ KB, N-K-KB+1, ONE, B( K, K ), LDB, + $ A( K, K+KB ), LDA ) + CALL DSYMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, + $ A( K, K ), LDA, B( K, K+KB ), LDB, ONE, + $ A( K, K+KB ), LDA ) + CALL DSYR2K( UPLO, 'Transpose', N-K-KB+1, KB, -ONE, + $ A( K, K+KB ), LDA, B( K, K+KB ), LDB, + $ ONE, A( K+KB, K+KB ), LDA ) + CALL DSYMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, + $ A( K, K ), LDA, B( K, K+KB ), LDB, ONE, + $ A( K, K+KB ), LDA ) + CALL DTRSM( 'Right', UPLO, 'No transpose', + $ 'Non-unit', KB, N-K-KB+1, ONE, + $ B( K+KB, K+KB ), LDB, A( K, K+KB ), + $ LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L**T) +* + DO 20 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the lower triangle of A(k:n,k:n) +* + CALL DSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + IF( K+KB.LE.N ) THEN + CALL DTRSM( 'Right', UPLO, 'Transpose', 'Non-unit', + $ N-K-KB+1, KB, ONE, B( K, K ), LDB, + $ A( K+KB, K ), LDA ) + CALL DSYMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, + $ A( K, K ), LDA, B( K+KB, K ), LDB, ONE, + $ A( K+KB, K ), LDA ) + CALL DSYR2K( UPLO, 'No transpose', N-K-KB+1, KB, + $ -ONE, A( K+KB, K ), LDA, B( K+KB, K ), + $ LDB, ONE, A( K+KB, K+KB ), LDA ) + CALL DSYMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, + $ A( K, K ), LDA, B( K+KB, K ), LDB, ONE, + $ A( K+KB, K ), LDA ) + CALL DTRSM( 'Left', UPLO, 'No transpose', + $ 'Non-unit', N-K-KB+1, KB, ONE, + $ B( K+KB, K+KB ), LDB, A( K+KB, K ), + $ LDA ) + END IF + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U**T +* + DO 30 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) +* + CALL DTRMM( 'Left', UPLO, 'No transpose', 'Non-unit', + $ K-1, KB, ONE, B, LDB, A( 1, K ), LDA ) + CALL DSYMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ), + $ LDA, B( 1, K ), LDB, ONE, A( 1, K ), LDA ) + CALL DSYR2K( UPLO, 'No transpose', K-1, KB, ONE, + $ A( 1, K ), LDA, B( 1, K ), LDB, ONE, A, + $ LDA ) + CALL DSYMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ), + $ LDA, B( 1, K ), LDB, ONE, A( 1, K ), LDA ) + CALL DTRMM( 'Right', UPLO, 'Transpose', 'Non-unit', + $ K-1, KB, ONE, B( K, K ), LDB, A( 1, K ), + $ LDA ) + CALL DSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + 30 CONTINUE + ELSE +* +* Compute L**T*A*L +* + DO 40 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) +* + CALL DTRMM( 'Right', UPLO, 'No transpose', 'Non-unit', + $ KB, K-1, ONE, B, LDB, A( K, 1 ), LDA ) + CALL DSYMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ), + $ LDA, B( K, 1 ), LDB, ONE, A( K, 1 ), LDA ) + CALL DSYR2K( UPLO, 'Transpose', K-1, KB, ONE, + $ A( K, 1 ), LDA, B( K, 1 ), LDB, ONE, A, + $ LDA ) + CALL DSYMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ), + $ LDA, B( K, 1 ), LDB, ONE, A( K, 1 ), LDA ) + CALL DTRMM( 'Left', UPLO, 'Transpose', 'Non-unit', KB, + $ K-1, ONE, B( K, K ), LDB, A( K, 1 ), LDA ) + CALL DSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + 40 CONTINUE + END IF + END IF + END IF + RETURN +* +* End of DSYGST +* + END diff --git a/lib/linalg/dsygvd.f b/lib/linalg/dsygvd.f new file mode 100644 index 0000000000..171aa175ff --- /dev/null +++ b/lib/linalg/dsygvd.f @@ -0,0 +1,380 @@ +*> \brief \b DSYGST +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYGVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, +* LWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDA, LDB, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYGVD computes all the eigenvalues, and optionally, the eigenvectors +*> of a real generalized symmetric-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and +*> B are assumed to be symmetric and B is also positive definite. +*> If eigenvectors are desired, it uses a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> matrix Z of eigenvectors. The eigenvectors are normalized +*> as follows: +*> if ITYPE = 1 or 2, Z**T*B*Z = I; +*> if ITYPE = 3, Z**T*inv(B)*Z = I. +*> If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +*> or the lower triangle (if UPLO='L') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB, N) +*> On entry, the symmetric matrix B. If UPLO = 'U', the +*> leading N-by-N upper triangular part of B contains the +*> upper triangular part of the matrix B. If UPLO = 'L', +*> the leading N-by-N lower triangular part of B contains +*> the lower triangular part of the matrix B. +*> +*> On exit, if INFO <= N, the part of B containing the matrix is +*> overwritten by the triangular factor U or L from the Cholesky +*> factorization B = U**T*U or B = L*L**T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N <= 1, LWORK >= 1. +*> If JOBZ = 'N' and N > 1, LWORK >= 2*N+1. +*> If JOBZ = 'V' and N > 1, LWORK >= 1 + 6*N + 2*N**2. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If N <= 1, LIWORK >= 1. +*> If JOBZ = 'N' and N > 1, LIWORK >= 1. +*> If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: DPOTRF or DSYEVD returned an error code: +*> <= N: if INFO = i and JOBZ = 'N', then the algorithm +*> failed to converge; i off-diagonal elements of an +*> intermediate tridiagonal form did not converge to +*> zero; +*> if INFO = i and JOBZ = 'V', then the algorithm +*> failed to compute an eigenvalue while working on +*> the submatrix lying in rows and columns INFO/(N+1) +*> through mod(INFO,N+1); +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup doubleSYeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Modified so that no backsubstitution is performed if DSYEVD fails to +*> converge (NEIG in old code could be greater than N causing out of +*> bounds reference to A - reported by Ralf Meyer). Also corrected the +*> description of INFO and the test on ITYPE. Sven, 16 Feb 05. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +*> +* ===================================================================== + SUBROUTINE DSYGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, + $ LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER LIOPT, LIWMIN, LOPT, LWMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DPOTRF, DSYEVD, DSYGST, DTRMM, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 6*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + 1 + END IF + LOPT = LWMIN + LIOPT = LIWMIN + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LOPT + IWORK( 1 ) = LIOPT +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYGVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL DPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL DSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL DSYEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, IWORK, LIWORK, + $ INFO ) + LOPT = MAX( DBLE( LOPT ), DBLE( WORK( 1 ) ) ) + LIOPT = MAX( DBLE( LIOPT ), DBLE( IWORK( 1 ) ) ) +* + IF( WANTZ .AND. INFO.EQ.0 ) THEN +* +* Backtransform eigenvectors to the original problem. +* + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**T*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + CALL DTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, N, ONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**T*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + CALL DTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, N, ONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LOPT + IWORK( 1 ) = LIOPT +* + RETURN +* +* End of DSYGVD +* + END diff --git a/lib/linalg/dsymm.f b/lib/linalg/dsymm.f new file mode 100644 index 0000000000..ee8df4df4b --- /dev/null +++ b/lib/linalg/dsymm.f @@ -0,0 +1,367 @@ +*> \brief \b DSYMM +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE DSYMM(SIDE,UPLO,M,N,ALPHA,A,LDA,B,LDB,BETA,C,LDC) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION ALPHA,BETA +* INTEGER LDA,LDB,LDC,M,N +* CHARACTER SIDE,UPLO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYMM performs one of the matrix-matrix operations +*> +*> C := alpha*A*B + beta*C, +*> +*> or +*> +*> C := alpha*B*A + beta*C, +*> +*> where alpha and beta are scalars, A is a symmetric matrix and B and +*> C are m by n matrices. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> On entry, SIDE specifies whether the symmetric matrix A +*> appears on the left or right in the operation as follows: +*> +*> SIDE = 'L' or 'l' C := alpha*A*B + beta*C, +*> +*> SIDE = 'R' or 'r' C := alpha*B*A + beta*C, +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the symmetric matrix A is to be +*> referenced as follows: +*> +*> UPLO = 'U' or 'u' Only the upper triangular part of the +*> symmetric matrix is to be referenced. +*> +*> UPLO = 'L' or 'l' Only the lower triangular part of the +*> symmetric matrix is to be referenced. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> On entry, M specifies the number of rows of the matrix C. +*> M must be at least zero. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of the matrix C. +*> N must be at least zero. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION. +*> On entry, ALPHA specifies the scalar alpha. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is +*> m when SIDE = 'L' or 'l' and is n otherwise. +*> Before entry with SIDE = 'L' or 'l', the m by m part of +*> the array A must contain the symmetric matrix, such that +*> when UPLO = 'U' or 'u', the leading m by m upper triangular +*> part of the array A must contain the upper triangular part +*> of the symmetric matrix and the strictly lower triangular +*> part of A is not referenced, and when UPLO = 'L' or 'l', +*> the leading m by m lower triangular part of the array A +*> must contain the lower triangular part of the symmetric +*> matrix and the strictly upper triangular part of A is not +*> referenced. +*> Before entry with SIDE = 'R' or 'r', the n by n part of +*> the array A must contain the symmetric matrix, such that +*> when UPLO = 'U' or 'u', the leading n by n upper triangular +*> part of the array A must contain the upper triangular part +*> of the symmetric matrix and the strictly lower triangular +*> part of A is not referenced, and when UPLO = 'L' or 'l', +*> the leading n by n lower triangular part of the array A +*> must contain the lower triangular part of the symmetric +*> matrix and the strictly upper triangular part of A is not +*> referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. When SIDE = 'L' or 'l' then +*> LDA must be at least max( 1, m ), otherwise LDA must be at +*> least max( 1, n ). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array of DIMENSION ( LDB, n ). +*> Before entry, the leading m by n part of the array B must +*> contain the matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> On entry, LDB specifies the first dimension of B as declared +*> in the calling (sub) program. LDB must be at least +*> max( 1, m ). +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION. +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then C need not be set on input. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array of DIMENSION ( LDC, n ). +*> Before entry, the leading m by n part of the array C must +*> contain the matrix C, except when beta is zero, in which +*> case C need not be set on entry. +*> On exit, the array C is overwritten by the m by n updated +*> matrix. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> On entry, LDC specifies the first dimension of C as declared +*> in the calling (sub) program. LDC must be at least +*> max( 1, m ). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup double_blas_level3 +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Level 3 Blas routine. +*> +*> -- Written on 8-February-1989. +*> Jack Dongarra, Argonne National Laboratory. +*> Iain Duff, AERE Harwell. +*> Jeremy Du Croz, Numerical Algorithms Group Ltd. +*> Sven Hammarling, Numerical Algorithms Group Ltd. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DSYMM(SIDE,UPLO,M,N,ALPHA,A,LDA,B,LDB,BETA,C,LDC) +* +* -- Reference BLAS level3 routine (version 3.4.0) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA,BETA + INTEGER LDA,LDB,LDC,M,N + CHARACTER SIDE,UPLO +* .. +* .. Array Arguments .. + DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Local Scalars .. + DOUBLE PRECISION TEMP1,TEMP2 + INTEGER I,INFO,J,K,NROWA + LOGICAL UPPER +* .. +* .. Parameters .. + DOUBLE PRECISION ONE,ZERO + PARAMETER (ONE=1.0D+0,ZERO=0.0D+0) +* .. +* +* Set NROWA as the number of rows of A. +* + IF (LSAME(SIDE,'L')) THEN + NROWA = M + ELSE + NROWA = N + END IF + UPPER = LSAME(UPLO,'U') +* +* Test the input parameters. +* + INFO = 0 + IF ((.NOT.LSAME(SIDE,'L')) .AND. (.NOT.LSAME(SIDE,'R'))) THEN + INFO = 1 + ELSE IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN + INFO = 2 + ELSE IF (M.LT.0) THEN + INFO = 3 + ELSE IF (N.LT.0) THEN + INFO = 4 + ELSE IF (LDA.LT.MAX(1,NROWA)) THEN + INFO = 7 + ELSE IF (LDB.LT.MAX(1,M)) THEN + INFO = 9 + ELSE IF (LDC.LT.MAX(1,M)) THEN + INFO = 12 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('DSYMM ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((M.EQ.0) .OR. (N.EQ.0) .OR. + + ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN +* +* And when alpha.eq.zero. +* + IF (ALPHA.EQ.ZERO) THEN + IF (BETA.EQ.ZERO) THEN + DO 20 J = 1,N + DO 10 I = 1,M + C(I,J) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1,N + DO 30 I = 1,M + C(I,J) = BETA*C(I,J) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF (LSAME(SIDE,'L')) THEN +* +* Form C := alpha*A*B + beta*C. +* + IF (UPPER) THEN + DO 70 J = 1,N + DO 60 I = 1,M + TEMP1 = ALPHA*B(I,J) + TEMP2 = ZERO + DO 50 K = 1,I - 1 + C(K,J) = C(K,J) + TEMP1*A(K,I) + TEMP2 = TEMP2 + B(K,J)*A(K,I) + 50 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = TEMP1*A(I,I) + ALPHA*TEMP2 + ELSE + C(I,J) = BETA*C(I,J) + TEMP1*A(I,I) + + + ALPHA*TEMP2 + END IF + 60 CONTINUE + 70 CONTINUE + ELSE + DO 100 J = 1,N + DO 90 I = M,1,-1 + TEMP1 = ALPHA*B(I,J) + TEMP2 = ZERO + DO 80 K = I + 1,M + C(K,J) = C(K,J) + TEMP1*A(K,I) + TEMP2 = TEMP2 + B(K,J)*A(K,I) + 80 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = TEMP1*A(I,I) + ALPHA*TEMP2 + ELSE + C(I,J) = BETA*C(I,J) + TEMP1*A(I,I) + + + ALPHA*TEMP2 + END IF + 90 CONTINUE + 100 CONTINUE + END IF + ELSE +* +* Form C := alpha*B*A + beta*C. +* + DO 170 J = 1,N + TEMP1 = ALPHA*A(J,J) + IF (BETA.EQ.ZERO) THEN + DO 110 I = 1,M + C(I,J) = TEMP1*B(I,J) + 110 CONTINUE + ELSE + DO 120 I = 1,M + C(I,J) = BETA*C(I,J) + TEMP1*B(I,J) + 120 CONTINUE + END IF + DO 140 K = 1,J - 1 + IF (UPPER) THEN + TEMP1 = ALPHA*A(K,J) + ELSE + TEMP1 = ALPHA*A(J,K) + END IF + DO 130 I = 1,M + C(I,J) = C(I,J) + TEMP1*B(I,K) + 130 CONTINUE + 140 CONTINUE + DO 160 K = J + 1,N + IF (UPPER) THEN + TEMP1 = ALPHA*A(J,K) + ELSE + TEMP1 = ALPHA*A(K,J) + END IF + DO 150 I = 1,M + C(I,J) = C(I,J) + TEMP1*B(I,K) + 150 CONTINUE + 160 CONTINUE + 170 CONTINUE + END IF +* + RETURN +* +* End of DSYMM . +* + END diff --git a/lib/linalg/dsymv.f b/lib/linalg/dsymv.f new file mode 100644 index 0000000000..5522023834 --- /dev/null +++ b/lib/linalg/dsymv.f @@ -0,0 +1,333 @@ +*> \brief \b DSYMV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE DSYMV(UPLO,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION ALPHA,BETA +* INTEGER INCX,INCY,LDA,N +* CHARACTER UPLO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A(LDA,*),X(*),Y(*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYMV performs the matrix-vector operation +*> +*> y := alpha*A*x + beta*y, +*> +*> where alpha and beta are scalars, x and y are n element vectors and +*> A is an n by n symmetric matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the array A is to be referenced as +*> follows: +*> +*> UPLO = 'U' or 'u' Only the upper triangular part of A +*> is to be referenced. +*> +*> UPLO = 'L' or 'l' Only the lower triangular part of A +*> is to be referenced. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of the matrix A. +*> N must be at least zero. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION. +*> On entry, ALPHA specifies the scalar alpha. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array of DIMENSION ( LDA, n ). +*> Before entry with UPLO = 'U' or 'u', the leading n by n +*> upper triangular part of the array A must contain the upper +*> triangular part of the symmetric matrix and the strictly +*> lower triangular part of A is not referenced. +*> Before entry with UPLO = 'L' or 'l', the leading n by n +*> lower triangular part of the array A must contain the lower +*> triangular part of the symmetric matrix and the strictly +*> upper triangular part of A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. LDA must be at least +*> max( 1, n ). +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is DOUBLE PRECISION array of dimension at least +*> ( 1 + ( n - 1 )*abs( INCX ) ). +*> Before entry, the incremented array X must contain the n +*> element vector x. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION. +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then Y need not be set on input. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is DOUBLE PRECISION array of dimension at least +*> ( 1 + ( n - 1 )*abs( INCY ) ). +*> Before entry, the incremented array Y must contain the n +*> element vector y. On exit, Y is overwritten by the updated +*> vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> On entry, INCY specifies the increment for the elements of +*> Y. INCY must not be zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup double_blas_level2 +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Level 2 Blas routine. +*> The vector and matrix arguments are not referenced when N = 0, or M = 0 +*> +*> -- Written on 22-October-1986. +*> Jack Dongarra, Argonne National Lab. +*> Jeremy Du Croz, Nag Central Office. +*> Sven Hammarling, Nag Central Office. +*> Richard Hanson, Sandia National Labs. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DSYMV(UPLO,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) +* +* -- Reference BLAS level2 routine (version 3.4.0) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA,BETA + INTEGER INCX,INCY,LDA,N + CHARACTER UPLO +* .. +* .. Array Arguments .. + DOUBLE PRECISION A(LDA,*),X(*),Y(*) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE,ZERO + PARAMETER (ONE=1.0D+0,ZERO=0.0D+0) +* .. +* .. Local Scalars .. + DOUBLE PRECISION TEMP1,TEMP2 + INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN + INFO = 1 + ELSE IF (N.LT.0) THEN + INFO = 2 + ELSE IF (LDA.LT.MAX(1,N)) THEN + INFO = 5 + ELSE IF (INCX.EQ.0) THEN + INFO = 7 + ELSE IF (INCY.EQ.0) THEN + INFO = 10 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('DSYMV ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN +* +* Set up the start points in X and Y. +* + IF (INCX.GT.0) THEN + KX = 1 + ELSE + KX = 1 - (N-1)*INCX + END IF + IF (INCY.GT.0) THEN + KY = 1 + ELSE + KY = 1 - (N-1)*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* +* First form y := beta*y. +* + IF (BETA.NE.ONE) THEN + IF (INCY.EQ.1) THEN + IF (BETA.EQ.ZERO) THEN + DO 10 I = 1,N + Y(I) = ZERO + 10 CONTINUE + ELSE + DO 20 I = 1,N + Y(I) = BETA*Y(I) + 20 CONTINUE + END IF + ELSE + IY = KY + IF (BETA.EQ.ZERO) THEN + DO 30 I = 1,N + Y(IY) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40 I = 1,N + Y(IY) = BETA*Y(IY) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF (ALPHA.EQ.ZERO) RETURN + IF (LSAME(UPLO,'U')) THEN +* +* Form y when A is stored in upper triangle. +* + IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN + DO 60 J = 1,N + TEMP1 = ALPHA*X(J) + TEMP2 = ZERO + DO 50 I = 1,J - 1 + Y(I) = Y(I) + TEMP1*A(I,J) + TEMP2 = TEMP2 + A(I,J)*X(I) + 50 CONTINUE + Y(J) = Y(J) + TEMP1*A(J,J) + ALPHA*TEMP2 + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80 J = 1,N + TEMP1 = ALPHA*X(JX) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70 I = 1,J - 1 + Y(IY) = Y(IY) + TEMP1*A(I,J) + TEMP2 = TEMP2 + A(I,J)*X(IX) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y(JY) = Y(JY) + TEMP1*A(J,J) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + ELSE +* +* Form y when A is stored in lower triangle. +* + IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN + DO 100 J = 1,N + TEMP1 = ALPHA*X(J) + TEMP2 = ZERO + Y(J) = Y(J) + TEMP1*A(J,J) + DO 90 I = J + 1,N + Y(I) = Y(I) + TEMP1*A(I,J) + TEMP2 = TEMP2 + A(I,J)*X(I) + 90 CONTINUE + Y(J) = Y(J) + ALPHA*TEMP2 + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120 J = 1,N + TEMP1 = ALPHA*X(JX) + TEMP2 = ZERO + Y(JY) = Y(JY) + TEMP1*A(J,J) + IX = JX + IY = JY + DO 110 I = J + 1,N + IX = IX + INCX + IY = IY + INCY + Y(IY) = Y(IY) + TEMP1*A(I,J) + TEMP2 = TEMP2 + A(I,J)*X(IX) + 110 CONTINUE + Y(JY) = Y(JY) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of DSYMV . +* + END diff --git a/lib/linalg/dsyr2.f b/lib/linalg/dsyr2.f new file mode 100644 index 0000000000..05e148105c --- /dev/null +++ b/lib/linalg/dsyr2.f @@ -0,0 +1,298 @@ +*> \brief \b DSYR2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE DSYR2(UPLO,N,ALPHA,X,INCX,Y,INCY,A,LDA) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION ALPHA +* INTEGER INCX,INCY,LDA,N +* CHARACTER UPLO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A(LDA,*),X(*),Y(*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYR2 performs the symmetric rank 2 operation +*> +*> A := alpha*x*y**T + alpha*y*x**T + A, +*> +*> where alpha is a scalar, x and y are n element vectors and A is an n +*> by n symmetric matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the array A is to be referenced as +*> follows: +*> +*> UPLO = 'U' or 'u' Only the upper triangular part of A +*> is to be referenced. +*> +*> UPLO = 'L' or 'l' Only the lower triangular part of A +*> is to be referenced. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of the matrix A. +*> N must be at least zero. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION. +*> On entry, ALPHA specifies the scalar alpha. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is DOUBLE PRECISION array of dimension at least +*> ( 1 + ( n - 1 )*abs( INCX ) ). +*> Before entry, the incremented array X must contain the n +*> element vector x. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> \endverbatim +*> +*> \param[in] Y +*> \verbatim +*> Y is DOUBLE PRECISION array of dimension at least +*> ( 1 + ( n - 1 )*abs( INCY ) ). +*> Before entry, the incremented array Y must contain the n +*> element vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> On entry, INCY specifies the increment for the elements of +*> Y. INCY must not be zero. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array of DIMENSION ( LDA, n ). +*> Before entry with UPLO = 'U' or 'u', the leading n by n +*> upper triangular part of the array A must contain the upper +*> triangular part of the symmetric matrix and the strictly +*> lower triangular part of A is not referenced. On exit, the +*> upper triangular part of the array A is overwritten by the +*> upper triangular part of the updated matrix. +*> Before entry with UPLO = 'L' or 'l', the leading n by n +*> lower triangular part of the array A must contain the lower +*> triangular part of the symmetric matrix and the strictly +*> upper triangular part of A is not referenced. On exit, the +*> lower triangular part of the array A is overwritten by the +*> lower triangular part of the updated matrix. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. LDA must be at least +*> max( 1, n ). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup double_blas_level2 +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Level 2 Blas routine. +*> +*> -- Written on 22-October-1986. +*> Jack Dongarra, Argonne National Lab. +*> Jeremy Du Croz, Nag Central Office. +*> Sven Hammarling, Nag Central Office. +*> Richard Hanson, Sandia National Labs. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DSYR2(UPLO,N,ALPHA,X,INCX,Y,INCY,A,LDA) +* +* -- Reference BLAS level2 routine (version 3.4.0) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA + INTEGER INCX,INCY,LDA,N + CHARACTER UPLO +* .. +* .. Array Arguments .. + DOUBLE PRECISION A(LDA,*),X(*),Y(*) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER (ZERO=0.0D+0) +* .. +* .. Local Scalars .. + DOUBLE PRECISION TEMP1,TEMP2 + INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN + INFO = 1 + ELSE IF (N.LT.0) THEN + INFO = 2 + ELSE IF (INCX.EQ.0) THEN + INFO = 5 + ELSE IF (INCY.EQ.0) THEN + INFO = 7 + ELSE IF (LDA.LT.MAX(1,N)) THEN + INFO = 9 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('DSYR2 ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN +* +* Set up the start points in X and Y if the increments are not both +* unity. +* + IF ((INCX.NE.1) .OR. (INCY.NE.1)) THEN + IF (INCX.GT.0) THEN + KX = 1 + ELSE + KX = 1 - (N-1)*INCX + END IF + IF (INCY.GT.0) THEN + KY = 1 + ELSE + KY = 1 - (N-1)*INCY + END IF + JX = KX + JY = KY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF (LSAME(UPLO,'U')) THEN +* +* Form A when A is stored in the upper triangle. +* + IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN + DO 20 J = 1,N + IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN + TEMP1 = ALPHA*Y(J) + TEMP2 = ALPHA*X(J) + DO 10 I = 1,J + A(I,J) = A(I,J) + X(I)*TEMP1 + Y(I)*TEMP2 + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + DO 40 J = 1,N + IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN + TEMP1 = ALPHA*Y(JY) + TEMP2 = ALPHA*X(JX) + IX = KX + IY = KY + DO 30 I = 1,J + A(I,J) = A(I,J) + X(IX)*TEMP1 + Y(IY)*TEMP2 + IX = IX + INCX + IY = IY + INCY + 30 CONTINUE + END IF + JX = JX + INCX + JY = JY + INCY + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in the lower triangle. +* + IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN + DO 60 J = 1,N + IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN + TEMP1 = ALPHA*Y(J) + TEMP2 = ALPHA*X(J) + DO 50 I = J,N + A(I,J) = A(I,J) + X(I)*TEMP1 + Y(I)*TEMP2 + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + DO 80 J = 1,N + IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN + TEMP1 = ALPHA*Y(JY) + TEMP2 = ALPHA*X(JX) + IX = JX + IY = JY + DO 70 I = J,N + A(I,J) = A(I,J) + X(IX)*TEMP1 + Y(IY)*TEMP2 + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of DSYR2 . +* + END diff --git a/lib/linalg/dsyr2k.f b/lib/linalg/dsyr2k.f new file mode 100644 index 0000000000..2dde293eae --- /dev/null +++ b/lib/linalg/dsyr2k.f @@ -0,0 +1,399 @@ +*> \brief \b DSYR2K +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE DSYR2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION ALPHA,BETA +* INTEGER K,LDA,LDB,LDC,N +* CHARACTER TRANS,UPLO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYR2K performs one of the symmetric rank 2k operations +*> +*> C := alpha*A*B**T + alpha*B*A**T + beta*C, +*> +*> or +*> +*> C := alpha*A**T*B + alpha*B**T*A + beta*C, +*> +*> where alpha and beta are scalars, C is an n by n symmetric matrix +*> and A and B are n by k matrices in the first case and k by n +*> matrices in the second case. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the array C is to be referenced as +*> follows: +*> +*> UPLO = 'U' or 'u' Only the upper triangular part of C +*> is to be referenced. +*> +*> UPLO = 'L' or 'l' Only the lower triangular part of C +*> is to be referenced. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> On entry, TRANS specifies the operation to be performed as +*> follows: +*> +*> TRANS = 'N' or 'n' C := alpha*A*B**T + alpha*B*A**T + +*> beta*C. +*> +*> TRANS = 'T' or 't' C := alpha*A**T*B + alpha*B**T*A + +*> beta*C. +*> +*> TRANS = 'C' or 'c' C := alpha*A**T*B + alpha*B**T*A + +*> beta*C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of the matrix C. N must be +*> at least zero. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> On entry with TRANS = 'N' or 'n', K specifies the number +*> of columns of the matrices A and B, and on entry with +*> TRANS = 'T' or 't' or 'C' or 'c', K specifies the number +*> of rows of the matrices A and B. K must be at least zero. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION. +*> On entry, ALPHA specifies the scalar alpha. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is +*> k when TRANS = 'N' or 'n', and is n otherwise. +*> Before entry with TRANS = 'N' or 'n', the leading n by k +*> part of the array A must contain the matrix A, otherwise +*> the leading k by n part of the array A must contain the +*> matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. When TRANS = 'N' or 'n' +*> then LDA must be at least max( 1, n ), otherwise LDA must +*> be at least max( 1, k ). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is +*> k when TRANS = 'N' or 'n', and is n otherwise. +*> Before entry with TRANS = 'N' or 'n', the leading n by k +*> part of the array B must contain the matrix B, otherwise +*> the leading k by n part of the array B must contain the +*> matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> On entry, LDB specifies the first dimension of B as declared +*> in the calling (sub) program. When TRANS = 'N' or 'n' +*> then LDB must be at least max( 1, n ), otherwise LDB must +*> be at least max( 1, k ). +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION. +*> On entry, BETA specifies the scalar beta. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array of DIMENSION ( LDC, n ). +*> Before entry with UPLO = 'U' or 'u', the leading n by n +*> upper triangular part of the array C must contain the upper +*> triangular part of the symmetric matrix and the strictly +*> lower triangular part of C is not referenced. On exit, the +*> upper triangular part of the array C is overwritten by the +*> upper triangular part of the updated matrix. +*> Before entry with UPLO = 'L' or 'l', the leading n by n +*> lower triangular part of the array C must contain the lower +*> triangular part of the symmetric matrix and the strictly +*> upper triangular part of C is not referenced. On exit, the +*> lower triangular part of the array C is overwritten by the +*> lower triangular part of the updated matrix. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> On entry, LDC specifies the first dimension of C as declared +*> in the calling (sub) program. LDC must be at least +*> max( 1, n ). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup double_blas_level3 +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Level 3 Blas routine. +*> +*> +*> -- Written on 8-February-1989. +*> Jack Dongarra, Argonne National Laboratory. +*> Iain Duff, AERE Harwell. +*> Jeremy Du Croz, Numerical Algorithms Group Ltd. +*> Sven Hammarling, Numerical Algorithms Group Ltd. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DSYR2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) +* +* -- Reference BLAS level3 routine (version 3.4.0) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA,BETA + INTEGER K,LDA,LDB,LDC,N + CHARACTER TRANS,UPLO +* .. +* .. Array Arguments .. + DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Local Scalars .. + DOUBLE PRECISION TEMP1,TEMP2 + INTEGER I,INFO,J,L,NROWA + LOGICAL UPPER +* .. +* .. Parameters .. + DOUBLE PRECISION ONE,ZERO + PARAMETER (ONE=1.0D+0,ZERO=0.0D+0) +* .. +* +* Test the input parameters. +* + IF (LSAME(TRANS,'N')) THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME(UPLO,'U') +* + INFO = 0 + IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN + INFO = 1 + ELSE IF ((.NOT.LSAME(TRANS,'N')) .AND. + + (.NOT.LSAME(TRANS,'T')) .AND. + + (.NOT.LSAME(TRANS,'C'))) THEN + INFO = 2 + ELSE IF (N.LT.0) THEN + INFO = 3 + ELSE IF (K.LT.0) THEN + INFO = 4 + ELSE IF (LDA.LT.MAX(1,NROWA)) THEN + INFO = 7 + ELSE IF (LDB.LT.MAX(1,NROWA)) THEN + INFO = 9 + ELSE IF (LDC.LT.MAX(1,N)) THEN + INFO = 12 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('DSYR2K',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR. + + (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN +* +* And when alpha.eq.zero. +* + IF (ALPHA.EQ.ZERO) THEN + IF (UPPER) THEN + IF (BETA.EQ.ZERO) THEN + DO 20 J = 1,N + DO 10 I = 1,J + C(I,J) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1,N + DO 30 I = 1,J + C(I,J) = BETA*C(I,J) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + IF (BETA.EQ.ZERO) THEN + DO 60 J = 1,N + DO 50 I = J,N + C(I,J) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1,N + DO 70 I = J,N + C(I,J) = BETA*C(I,J) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF (LSAME(TRANS,'N')) THEN +* +* Form C := alpha*A*B**T + alpha*B*A**T + C. +* + IF (UPPER) THEN + DO 130 J = 1,N + IF (BETA.EQ.ZERO) THEN + DO 90 I = 1,J + C(I,J) = ZERO + 90 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 100 I = 1,J + C(I,J) = BETA*C(I,J) + 100 CONTINUE + END IF + DO 120 L = 1,K + IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN + TEMP1 = ALPHA*B(J,L) + TEMP2 = ALPHA*A(J,L) + DO 110 I = 1,J + C(I,J) = C(I,J) + A(I,L)*TEMP1 + + + B(I,L)*TEMP2 + 110 CONTINUE + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180 J = 1,N + IF (BETA.EQ.ZERO) THEN + DO 140 I = J,N + C(I,J) = ZERO + 140 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 150 I = J,N + C(I,J) = BETA*C(I,J) + 150 CONTINUE + END IF + DO 170 L = 1,K + IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN + TEMP1 = ALPHA*B(J,L) + TEMP2 = ALPHA*A(J,L) + DO 160 I = J,N + C(I,J) = C(I,J) + A(I,L)*TEMP1 + + + B(I,L)*TEMP2 + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*A**T*B + alpha*B**T*A + C. +* + IF (UPPER) THEN + DO 210 J = 1,N + DO 200 I = 1,J + TEMP1 = ZERO + TEMP2 = ZERO + DO 190 L = 1,K + TEMP1 = TEMP1 + A(L,I)*B(L,J) + TEMP2 = TEMP2 + B(L,I)*A(L,J) + 190 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP1 + ALPHA*TEMP2 + ELSE + C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 + + + ALPHA*TEMP2 + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240 J = 1,N + DO 230 I = J,N + TEMP1 = ZERO + TEMP2 = ZERO + DO 220 L = 1,K + TEMP1 = TEMP1 + A(L,I)*B(L,J) + TEMP2 = TEMP2 + B(L,I)*A(L,J) + 220 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP1 + ALPHA*TEMP2 + ELSE + C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 + + + ALPHA*TEMP2 + END IF + 230 CONTINUE + 240 CONTINUE + END IF + END IF +* + RETURN +* +* End of DSYR2K. +* + END diff --git a/lib/linalg/dsyrk.f b/lib/linalg/dsyrk.f new file mode 100644 index 0000000000..d91c3369f6 --- /dev/null +++ b/lib/linalg/dsyrk.f @@ -0,0 +1,364 @@ +*> \brief \b DSYRK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE DSYRK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION ALPHA,BETA +* INTEGER K,LDA,LDC,N +* CHARACTER TRANS,UPLO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A(LDA,*),C(LDC,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYRK performs one of the symmetric rank k operations +*> +*> C := alpha*A*A**T + beta*C, +*> +*> or +*> +*> C := alpha*A**T*A + beta*C, +*> +*> where alpha and beta are scalars, C is an n by n symmetric matrix +*> and A is an n by k matrix in the first case and a k by n matrix +*> in the second case. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the array C is to be referenced as +*> follows: +*> +*> UPLO = 'U' or 'u' Only the upper triangular part of C +*> is to be referenced. +*> +*> UPLO = 'L' or 'l' Only the lower triangular part of C +*> is to be referenced. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> On entry, TRANS specifies the operation to be performed as +*> follows: +*> +*> TRANS = 'N' or 'n' C := alpha*A*A**T + beta*C. +*> +*> TRANS = 'T' or 't' C := alpha*A**T*A + beta*C. +*> +*> TRANS = 'C' or 'c' C := alpha*A**T*A + beta*C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of the matrix C. N must be +*> at least zero. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> On entry with TRANS = 'N' or 'n', K specifies the number +*> of columns of the matrix A, and on entry with +*> TRANS = 'T' or 't' or 'C' or 'c', K specifies the number +*> of rows of the matrix A. K must be at least zero. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION. +*> On entry, ALPHA specifies the scalar alpha. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is +*> k when TRANS = 'N' or 'n', and is n otherwise. +*> Before entry with TRANS = 'N' or 'n', the leading n by k +*> part of the array A must contain the matrix A, otherwise +*> the leading k by n part of the array A must contain the +*> matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. When TRANS = 'N' or 'n' +*> then LDA must be at least max( 1, n ), otherwise LDA must +*> be at least max( 1, k ). +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION. +*> On entry, BETA specifies the scalar beta. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array of DIMENSION ( LDC, n ). +*> Before entry with UPLO = 'U' or 'u', the leading n by n +*> upper triangular part of the array C must contain the upper +*> triangular part of the symmetric matrix and the strictly +*> lower triangular part of C is not referenced. On exit, the +*> upper triangular part of the array C is overwritten by the +*> upper triangular part of the updated matrix. +*> Before entry with UPLO = 'L' or 'l', the leading n by n +*> lower triangular part of the array C must contain the lower +*> triangular part of the symmetric matrix and the strictly +*> upper triangular part of C is not referenced. On exit, the +*> lower triangular part of the array C is overwritten by the +*> lower triangular part of the updated matrix. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> On entry, LDC specifies the first dimension of C as declared +*> in the calling (sub) program. LDC must be at least +*> max( 1, n ). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup double_blas_level3 +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Level 3 Blas routine. +*> +*> -- Written on 8-February-1989. +*> Jack Dongarra, Argonne National Laboratory. +*> Iain Duff, AERE Harwell. +*> Jeremy Du Croz, Numerical Algorithms Group Ltd. +*> Sven Hammarling, Numerical Algorithms Group Ltd. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DSYRK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC) +* +* -- Reference BLAS level3 routine (version 3.4.0) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA,BETA + INTEGER K,LDA,LDC,N + CHARACTER TRANS,UPLO +* .. +* .. Array Arguments .. + DOUBLE PRECISION A(LDA,*),C(LDC,*) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I,INFO,J,L,NROWA + LOGICAL UPPER +* .. +* .. Parameters .. + DOUBLE PRECISION ONE,ZERO + PARAMETER (ONE=1.0D+0,ZERO=0.0D+0) +* .. +* +* Test the input parameters. +* + IF (LSAME(TRANS,'N')) THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME(UPLO,'U') +* + INFO = 0 + IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN + INFO = 1 + ELSE IF ((.NOT.LSAME(TRANS,'N')) .AND. + + (.NOT.LSAME(TRANS,'T')) .AND. + + (.NOT.LSAME(TRANS,'C'))) THEN + INFO = 2 + ELSE IF (N.LT.0) THEN + INFO = 3 + ELSE IF (K.LT.0) THEN + INFO = 4 + ELSE IF (LDA.LT.MAX(1,NROWA)) THEN + INFO = 7 + ELSE IF (LDC.LT.MAX(1,N)) THEN + INFO = 10 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('DSYRK ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR. + + (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN +* +* And when alpha.eq.zero. +* + IF (ALPHA.EQ.ZERO) THEN + IF (UPPER) THEN + IF (BETA.EQ.ZERO) THEN + DO 20 J = 1,N + DO 10 I = 1,J + C(I,J) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1,N + DO 30 I = 1,J + C(I,J) = BETA*C(I,J) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + IF (BETA.EQ.ZERO) THEN + DO 60 J = 1,N + DO 50 I = J,N + C(I,J) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1,N + DO 70 I = J,N + C(I,J) = BETA*C(I,J) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF (LSAME(TRANS,'N')) THEN +* +* Form C := alpha*A*A**T + beta*C. +* + IF (UPPER) THEN + DO 130 J = 1,N + IF (BETA.EQ.ZERO) THEN + DO 90 I = 1,J + C(I,J) = ZERO + 90 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 100 I = 1,J + C(I,J) = BETA*C(I,J) + 100 CONTINUE + END IF + DO 120 L = 1,K + IF (A(J,L).NE.ZERO) THEN + TEMP = ALPHA*A(J,L) + DO 110 I = 1,J + C(I,J) = C(I,J) + TEMP*A(I,L) + 110 CONTINUE + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180 J = 1,N + IF (BETA.EQ.ZERO) THEN + DO 140 I = J,N + C(I,J) = ZERO + 140 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 150 I = J,N + C(I,J) = BETA*C(I,J) + 150 CONTINUE + END IF + DO 170 L = 1,K + IF (A(J,L).NE.ZERO) THEN + TEMP = ALPHA*A(J,L) + DO 160 I = J,N + C(I,J) = C(I,J) + TEMP*A(I,L) + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*A**T*A + beta*C. +* + IF (UPPER) THEN + DO 210 J = 1,N + DO 200 I = 1,J + TEMP = ZERO + DO 190 L = 1,K + TEMP = TEMP + A(L,I)*A(L,J) + 190 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240 J = 1,N + DO 230 I = J,N + TEMP = ZERO + DO 220 L = 1,K + TEMP = TEMP + A(L,I)*A(L,J) + 220 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 230 CONTINUE + 240 CONTINUE + END IF + END IF +* + RETURN +* +* End of DSYRK . +* + END diff --git a/lib/linalg/dsytd2.f b/lib/linalg/dsytd2.f new file mode 100644 index 0000000000..a238f9ab3b --- /dev/null +++ b/lib/linalg/dsytd2.f @@ -0,0 +1,323 @@ +*> \brief \b DSYTD2 reduces a symmetric matrix to real symmetric tridiagonal form by an orthogonal similarity transformation (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTD2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTD2( UPLO, N, A, LDA, D, E, TAU, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAU( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal +*> form T by an orthogonal similarity transformation: Q**T * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the diagonal and first superdiagonal +*> of A are overwritten by the corresponding elements of the +*> tridiagonal matrix T, and the elements above the first +*> superdiagonal, with the array TAU, represent the orthogonal +*> matrix Q as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and first subdiagonal of A are over- +*> written by the corresponding elements of the tridiagonal +*> matrix T, and the elements below the first subdiagonal, with +*> the array TAU, represent the orthogonal matrix Q as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (N-1) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup doubleSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(n-1) . . . H(2) H(1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in +*> A(1:i-1,i+1), and tau in TAU(i). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(n-1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), +*> and tau in TAU(i). +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( d e v2 v3 v4 ) ( d ) +*> ( d e v3 v4 ) ( e d ) +*> ( d e v4 ) ( v1 e d ) +*> ( d e ) ( v1 v2 e d ) +*> ( d ) ( v1 v2 v3 e d ) +*> +*> where d and e denote diagonal and off-diagonal elements of T, and vi +*> denotes an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DSYTD2( UPLO, N, A, LDA, D, E, TAU, INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAU( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO, HALF + PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0, + $ HALF = 1.0D0 / 2.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + DOUBLE PRECISION ALPHA, TAUI +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DLARFG, DSYMV, DSYR2, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT + EXTERNAL LSAME, DDOT +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTD2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A +* + DO 10 I = N - 1, 1, -1 +* +* Generate elementary reflector H(i) = I - tau * v * v**T +* to annihilate A(1:i-1,i+1) +* + CALL DLARFG( I, A( I, I+1 ), A( 1, I+1 ), 1, TAUI ) + E( I ) = A( I, I+1 ) +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(1:i,1:i) +* + A( I, I+1 ) = ONE +* +* Compute x := tau * A * v storing x in TAU(1:i) +* + CALL DSYMV( UPLO, I, TAUI, A, LDA, A( 1, I+1 ), 1, ZERO, + $ TAU, 1 ) +* +* Compute w := x - 1/2 * tau * (x**T * v) * v +* + ALPHA = -HALF*TAUI*DDOT( I, TAU, 1, A( 1, I+1 ), 1 ) + CALL DAXPY( I, ALPHA, A( 1, I+1 ), 1, TAU, 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w**T - w * v**T +* + CALL DSYR2( UPLO, I, -ONE, A( 1, I+1 ), 1, TAU, 1, A, + $ LDA ) +* + A( I, I+1 ) = E( I ) + END IF + D( I+1 ) = A( I+1, I+1 ) + TAU( I ) = TAUI + 10 CONTINUE + D( 1 ) = A( 1, 1 ) + ELSE +* +* Reduce the lower triangle of A +* + DO 20 I = 1, N - 1 +* +* Generate elementary reflector H(i) = I - tau * v * v**T +* to annihilate A(i+2:n,i) +* + CALL DLARFG( N-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1, + $ TAUI ) + E( I ) = A( I+1, I ) +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(i+1:n,i+1:n) +* + A( I+1, I ) = ONE +* +* Compute x := tau * A * v storing y in TAU(i:n-1) +* + CALL DSYMV( UPLO, N-I, TAUI, A( I+1, I+1 ), LDA, + $ A( I+1, I ), 1, ZERO, TAU( I ), 1 ) +* +* Compute w := x - 1/2 * tau * (x**T * v) * v +* + ALPHA = -HALF*TAUI*DDOT( N-I, TAU( I ), 1, A( I+1, I ), + $ 1 ) + CALL DAXPY( N-I, ALPHA, A( I+1, I ), 1, TAU( I ), 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w**T - w * v**T +* + CALL DSYR2( UPLO, N-I, -ONE, A( I+1, I ), 1, TAU( I ), 1, + $ A( I+1, I+1 ), LDA ) +* + A( I+1, I ) = E( I ) + END IF + D( I ) = A( I, I ) + TAU( I ) = TAUI + 20 CONTINUE + D( N ) = A( N, N ) + END IF +* + RETURN +* +* End of DSYTD2 +* + END diff --git a/lib/linalg/dsytrd.f b/lib/linalg/dsytrd.f new file mode 100644 index 0000000000..b268f4c1e4 --- /dev/null +++ b/lib/linalg/dsytrd.f @@ -0,0 +1,376 @@ +*> \brief \b DSYTRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAU( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTRD reduces a real symmetric matrix A to real symmetric +*> tridiagonal form T by an orthogonal similarity transformation: +*> Q**T * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the diagonal and first superdiagonal +*> of A are overwritten by the corresponding elements of the +*> tridiagonal matrix T, and the elements above the first +*> superdiagonal, with the array TAU, represent the orthogonal +*> matrix Q as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and first subdiagonal of A are over- +*> written by the corresponding elements of the tridiagonal +*> matrix T, and the elements below the first subdiagonal, with +*> the array TAU, represent the orthogonal matrix Q as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (N-1) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= 1. +*> For optimum performance LWORK >= N*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup doubleSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(n-1) . . . H(2) H(1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in +*> A(1:i-1,i+1), and tau in TAU(i). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(n-1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), +*> and tau in TAU(i). +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( d e v2 v3 v4 ) ( d ) +*> ( d e v3 v4 ) ( e d ) +*> ( d e v4 ) ( v1 e d ) +*> ( d e ) ( v1 v2 e d ) +*> ( d ) ( v1 v2 v3 e d ) +*> +*> where d and e denote diagonal and off-diagonal elements of T, and vi +*> denotes an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DSYTRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAU( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, IWS, J, KK, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL DLATRD, DSYR2K, DSYTD2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. +* + NB = ILAENV( 1, 'DSYTRD', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NX = N + IWS = 1 + IF( NB.GT.1 .AND. NB.LT.N ) THEN +* +* Determine when to cross over from blocked to unblocked code +* (last block is always handled by unblocked code). +* + NX = MAX( NB, ILAENV( 3, 'DSYTRD', UPLO, N, -1, -1, -1 ) ) + IF( NX.LT.N ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code by setting NX = N. +* + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = ILAENV( 2, 'DSYTRD', UPLO, N, -1, -1, -1 ) + IF( NB.LT.NBMIN ) + $ NX = N + END IF + ELSE + NX = N + END IF + ELSE + NB = 1 + END IF +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A. +* Columns 1:kk are handled by the unblocked method. +* + KK = N - ( ( N-NX+NB-1 ) / NB )*NB + DO 20 I = N - NB + 1, KK + 1, -NB +* +* Reduce columns i:i+nb-1 to tridiagonal form and form the +* matrix W which is needed to update the unreduced part of +* the matrix +* + CALL DLATRD( UPLO, I+NB-1, NB, A, LDA, E, TAU, WORK, + $ LDWORK ) +* +* Update the unreduced submatrix A(1:i-1,1:i-1), using an +* update of the form: A := A - V*W**T - W*V**T +* + CALL DSYR2K( UPLO, 'No transpose', I-1, NB, -ONE, A( 1, I ), + $ LDA, WORK, LDWORK, ONE, A, LDA ) +* +* Copy superdiagonal elements back into A, and diagonal +* elements into D +* + DO 10 J = I, I + NB - 1 + A( J-1, J ) = E( J-1 ) + D( J ) = A( J, J ) + 10 CONTINUE + 20 CONTINUE +* +* Use unblocked code to reduce the last or only block +* + CALL DSYTD2( UPLO, KK, A, LDA, D, E, TAU, IINFO ) + ELSE +* +* Reduce the lower triangle of A +* + DO 40 I = 1, N - NX, NB +* +* Reduce columns i:i+nb-1 to tridiagonal form and form the +* matrix W which is needed to update the unreduced part of +* the matrix +* + CALL DLATRD( UPLO, N-I+1, NB, A( I, I ), LDA, E( I ), + $ TAU( I ), WORK, LDWORK ) +* +* Update the unreduced submatrix A(i+ib:n,i+ib:n), using +* an update of the form: A := A - V*W**T - W*V**T +* + CALL DSYR2K( UPLO, 'No transpose', N-I-NB+1, NB, -ONE, + $ A( I+NB, I ), LDA, WORK( NB+1 ), LDWORK, ONE, + $ A( I+NB, I+NB ), LDA ) +* +* Copy subdiagonal elements back into A, and diagonal +* elements into D +* + DO 30 J = I, I + NB - 1 + A( J+1, J ) = E( J ) + D( J ) = A( J, J ) + 30 CONTINUE + 40 CONTINUE +* +* Use unblocked code to reduce the last or only block +* + CALL DSYTD2( UPLO, N-I+1, A( I, I ), LDA, D( I ), E( I ), + $ TAU( I ), IINFO ) + END IF +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of DSYTRD +* + END diff --git a/lib/linalg/iladlc.f b/lib/linalg/iladlc.f new file mode 100644 index 0000000000..b56387d320 --- /dev/null +++ b/lib/linalg/iladlc.f @@ -0,0 +1,118 @@ +*> \brief \b ILADLC scans a matrix for its last non-zero column. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILADLC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILADLC( M, N, A, LDA ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDA +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ILADLC scans A for its last non-zero column. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The m by n matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + INTEGER FUNCTION ILADLC( M, N, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER M, N, LDA +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. Executable Statements .. +* +* Quick test for the common case where one corner is non-zero. + IF( N.EQ.0 ) THEN + ILADLC = N + ELSE IF( A(1, N).NE.ZERO .OR. A(M, N).NE.ZERO ) THEN + ILADLC = N + ELSE +* Now scan each column from the end, returning with the first non-zero. + DO ILADLC = N, 1, -1 + DO I = 1, M + IF( A(I, ILADLC).NE.ZERO ) RETURN + END DO + END DO + END IF + RETURN + END diff --git a/lib/linalg/iladlr.f b/lib/linalg/iladlr.f new file mode 100644 index 0000000000..fe155af075 --- /dev/null +++ b/lib/linalg/iladlr.f @@ -0,0 +1,121 @@ +*> \brief \b ILADLR scans a matrix for its last non-zero row. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILADLR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILADLR( M, N, A, LDA ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDA +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ILADLR scans A for its last non-zero row. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The m by n matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + INTEGER FUNCTION ILADLR( M, N, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER M, N, LDA +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J +* .. +* .. Executable Statements .. +* +* Quick test for the common case where one corner is non-zero. + IF( M.EQ.0 ) THEN + ILADLR = M + ELSE IF( A(M, 1).NE.ZERO .OR. A(M, N).NE.ZERO ) THEN + ILADLR = M + ELSE +* Scan up each column tracking the last zero row seen. + ILADLR = 0 + DO J = 1, N + I=M + DO WHILE((A(MAX(I,1),J).EQ.ZERO).AND.(I.GE.1)) + I=I-1 + ENDDO + ILADLR = MAX( ILADLR, I ) + END DO + END IF + RETURN + END