diff --git a/lib/atc/Array.h b/lib/atc/Array.h index 5b0d60d9e9..e3e6044060 100644 --- a/lib/atc/Array.h +++ b/lib/atc/Array.h @@ -5,6 +5,7 @@ //#include #include #include +#include template class Array { diff --git a/lib/atc/Array2D.h b/lib/atc/Array2D.h index 6e93fc5d3e..e05c21857c 100644 --- a/lib/atc/Array2D.h +++ b/lib/atc/Array2D.h @@ -3,6 +3,7 @@ #include #include +#include template class Array2D { diff --git a/lib/linalg/Makefile.gfortran b/lib/linalg/Makefile.gfortran new file mode 100755 index 0000000000..3ec6cf79aa --- /dev/null +++ b/lib/linalg/Makefile.gfortran @@ -0,0 +1,62 @@ +# * +# *_________________________________________________________________________* +# * Minimal BLAS/LAPACK Library for ATC + +# To compile and link LAMMPS to the linalg library generated by this Makefile, +# try appending the following definitions to the standard definitions in +# whatever LAMMPS Makefile your are using. +# CCFLAGS = -I../../lib/linalg +# LINKFLAGS = -L../../lib/libalg +# USRLIB = -llinalg -lgfortran + +SHELL = /bin/sh + +# ------ FILES ------ + +SRC = dasum.f daxpy.f dcopy.f ddot.f dgecon.f dgemm.f dgemv.f dger.f dgetf2.f dgetrf.f dgetri.f dlabad.f dlamch.f dlacn2.f dlange.f dlassq.f dlaswp.f dlatrs.f drscl.f dscal.f dswap.f dtrmm.f dtrmv.f dtrsm.f dtrsv.f dtrti2.f dtrtri.f idamax.f ieeeck.f ilaenv.f iparmq.f lsame.f xerbla.f + + +FILES = $(SRC) Makefile.* + +# ------ DEFINITIONS ------ + +LIB = liblinalg.a +OBJ = $(SRC:.f=.o) + +# ------ SETTINGS ------ + +FC = gfortran +FFLAGS = -O3 -march=native -mpc64 \ + -ffast-math -funroll-loops -fstrict-aliasing -Wall -W -Wno-uninitialized -fno-second-underscore +FFLAGS0 = -O0 -march=native -mpc64 \ + -Wall -W -Wno-uninitialized -fno-second-underscore +ARCHIVE = ar +AR = ar +ARCHFLAG = -rcs +USRLIB = +SYSLIB = + +# ------ MAKE PROCEDURE ------ + +lib: $(OBJ) + $(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ) + +# ------ COMPILE RULES ------ + +%.o:%.F + $(F90) $(F90FLAGS) -c $< + +%.o:%.f + $(FC) $(FFLAGS) -c $< + +dlamch.o: dlamch.f + $(FC) $(FFLAGS0) -c $< + +# ------ CLEAN ------ + +clean: + -rm *.o *.mod *~ $(LIB) + +tar: + -tar -czvf ../linalg.tar.gz $(FILES) + diff --git a/lib/linalg/Makefile.mingw_cross b/lib/linalg/Makefile.mingw_cross new file mode 100755 index 0000000000..b767d98167 --- /dev/null +++ b/lib/linalg/Makefile.mingw_cross @@ -0,0 +1,62 @@ +# * +# *_________________________________________________________________________* +# * Minimal BLAS/LAPACK Library for ATC + +# To compile and link LAMMPS to the linalg library generated by this Makefile, +# try appending the following definitions to the standard definitions in +# whatever LAMMPS Makefile your are using. +# CCFLAGS = -I../../lib/linalg +# LINKFLAGS = -L../../lib/libalg +# USRLIB = -llinalg -lgfortran + +SHELL = /bin/sh + +# ------ FILES ------ + +SRC = dasum.f daxpy.f dcopy.f ddot.f dgecon.f dgemm.f dgemv.f dger.f dgetf2.f dgetrf.f dgetri.f dlabad.f dlamch.f dlacn2.f dlange.f dlassq.f dlaswp.f dlatrs.f drscl.f dscal.f dswap.f dtrmm.f dtrmv.f dtrsm.f dtrsv.f dtrti2.f dtrtri.f idamax.f ieeeck.f ilaenv.f iparmq.f lsame.f xerbla.f + + +FILES = $(SRC) Makefile + +# ------ DEFINITIONS ------ + +LIB = liblinalg.a +OBJ = $(SRC:.f=.o) + +# ------ SETTINGS ------ + +FC = i686-pc-mingw32-gfortran +FFLAGS = -O3 -march=i686 -mtune=generic -mfpmath=387 -mpc64 \ + -ffast-math -funroll-loops -fstrict-aliasing -Wall -W -Wno-uninitialized -fno-second-underscore +FFLAGS0 = -O0 -march=i686 -mtune=generic -mfpmath=387 -mpc64 \ + -Wall -W -Wno-uninitialized -fno-second-underscore +ARCHIVE = i686-pc-mingw32-ar +AR = i686-pc-mingw32-ar +ARCHFLAG = -rcs +USRLIB = +SYSLIB = + +# ------ MAKE PROCEDURE ------ + +lib: $(OBJ) + $(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ) + +# ------ COMPILE RULES ------ + +%.o:%.F + $(F90) $(F90FLAGS) -c $< + +%.o:%.f + $(FC) $(FFLAGS) -c $< + +dlamch.o: dlamch.f + $(FC) $(FFLAGS0) -c $< + +# ------ CLEAN ------ + +clean: + -rm *.o *.mod *~ $(LIB) + +tar: + -tar -cvf ../linalg.tar $(FILES) + diff --git a/lib/linalg/dasum.f b/lib/linalg/dasum.f new file mode 100644 index 0000000000..8705ecd9a1 --- /dev/null +++ b/lib/linalg/dasum.f @@ -0,0 +1,62 @@ + DOUBLE PRECISION FUNCTION DASUM(N,DX,INCX) +* .. Scalar Arguments .. + INTEGER INCX,N +* .. +* .. Array Arguments .. + DOUBLE PRECISION DX(*) +* .. +* +* Purpose +* ======= +* +* DASUM takes the sum of the absolute values. +* +* Further Details +* =============== +* +* jack dongarra, linpack, 3/11/78. +* modified 3/93 to return if incx .le. 0. +* modified 12/3/93, array(1) declarations changed to array(*) +* +* ===================================================================== +* +* .. Local Scalars .. + DOUBLE PRECISION DTEMP + INTEGER I,M,MP1,NINCX +* .. +* .. Intrinsic Functions .. + INTRINSIC DABS,MOD +* .. + DASUM = 0.0d0 + DTEMP = 0.0d0 + IF (N.LE.0 .OR. INCX.LE.0) RETURN + IF (INCX.EQ.1) GO TO 20 +* +* code for increment not equal to 1 +* + NINCX = N*INCX + DO 10 I = 1,NINCX,INCX + DTEMP = DTEMP + DABS(DX(I)) + 10 CONTINUE + DASUM = DTEMP + RETURN +* +* code for increment equal to 1 +* +* +* clean-up loop +* + 20 M = MOD(N,6) + IF (M.EQ.0) GO TO 40 + DO 30 I = 1,M + DTEMP = DTEMP + DABS(DX(I)) + 30 CONTINUE + IF (N.LT.6) GO TO 60 + 40 MP1 = M + 1 + DO 50 I = MP1,N,6 + DTEMP = DTEMP + DABS(DX(I)) + DABS(DX(I+1)) + DABS(DX(I+2)) + + + DABS(DX(I+3)) + DABS(DX(I+4)) + DABS(DX(I+5)) + 50 CONTINUE + 60 DASUM = DTEMP + RETURN + END diff --git a/lib/linalg/daxpy.f b/lib/linalg/daxpy.f new file mode 100644 index 0000000000..4f92db2fb9 --- /dev/null +++ b/lib/linalg/daxpy.f @@ -0,0 +1,67 @@ + SUBROUTINE DAXPY(N,DA,DX,INCX,DY,INCY) +* .. Scalar Arguments .. + DOUBLE PRECISION DA + INTEGER INCX,INCY,N +* .. +* .. Array Arguments .. + DOUBLE PRECISION DX(*),DY(*) +* .. +* +* Purpose +* ======= +* +* DAXPY constant times a vector plus a vector. +* uses unrolled loops for increments equal to one. +* +* Further Details +* =============== +* +* jack dongarra, linpack, 3/11/78. +* modified 12/3/93, array(1) declarations changed to array(*) +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I,IX,IY,M,MP1 +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. + IF (N.LE.0) RETURN + IF (DA.EQ.0.0d0) RETURN + IF (INCX.EQ.1 .AND. INCY.EQ.1) GO TO 20 +* +* code for unequal increments or equal increments +* not equal to 1 +* + IX = 1 + IY = 1 + IF (INCX.LT.0) IX = (-N+1)*INCX + 1 + IF (INCY.LT.0) IY = (-N+1)*INCY + 1 + DO 10 I = 1,N + DY(IY) = DY(IY) + DA*DX(IX) + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + RETURN +* +* code for both increments equal to 1 +* +* +* clean-up loop +* + 20 M = MOD(N,4) + IF (M.EQ.0) GO TO 40 + DO 30 I = 1,M + DY(I) = DY(I) + DA*DX(I) + 30 CONTINUE + IF (N.LT.4) RETURN + 40 MP1 = M + 1 + DO 50 I = MP1,N,4 + DY(I) = DY(I) + DA*DX(I) + DY(I+1) = DY(I+1) + DA*DX(I+1) + DY(I+2) = DY(I+2) + DA*DX(I+2) + DY(I+3) = DY(I+3) + DA*DX(I+3) + 50 CONTINUE + RETURN + END diff --git a/lib/linalg/dcopy.f b/lib/linalg/dcopy.f new file mode 100644 index 0000000000..dcc8a0c175 --- /dev/null +++ b/lib/linalg/dcopy.f @@ -0,0 +1,68 @@ + SUBROUTINE DCOPY(N,DX,INCX,DY,INCY) +* .. Scalar Arguments .. + INTEGER INCX,INCY,N +* .. +* .. Array Arguments .. + DOUBLE PRECISION DX(*),DY(*) +* .. +* +* Purpose +* ======= +* +* DCOPY copies a vector, x, to a vector, y. +* uses unrolled loops for increments equal to one. +* +* Further Details +* =============== +* +* jack dongarra, linpack, 3/11/78. +* modified 12/3/93, array(1) declarations changed to array(*) +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I,IX,IY,M,MP1 +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. + IF (N.LE.0) RETURN + IF (INCX.EQ.1 .AND. INCY.EQ.1) GO TO 20 +* +* code for unequal increments or equal increments +* not equal to 1 +* + IX = 1 + IY = 1 + IF (INCX.LT.0) IX = (-N+1)*INCX + 1 + IF (INCY.LT.0) IY = (-N+1)*INCY + 1 + DO 10 I = 1,N + DY(IY) = DX(IX) + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + RETURN +* +* code for both increments equal to 1 +* +* +* clean-up loop +* + 20 M = MOD(N,7) + IF (M.EQ.0) GO TO 40 + DO 30 I = 1,M + DY(I) = DX(I) + 30 CONTINUE + IF (N.LT.7) RETURN + 40 MP1 = M + 1 + DO 50 I = MP1,N,7 + DY(I) = DX(I) + DY(I+1) = DX(I+1) + DY(I+2) = DX(I+2) + DY(I+3) = DX(I+3) + DY(I+4) = DX(I+4) + DY(I+5) = DX(I+5) + DY(I+6) = DX(I+6) + 50 CONTINUE + RETURN + END diff --git a/lib/linalg/ddot.f b/lib/linalg/ddot.f new file mode 100644 index 0000000000..3df03aecb1 --- /dev/null +++ b/lib/linalg/ddot.f @@ -0,0 +1,68 @@ + DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY) +* .. Scalar Arguments .. + INTEGER INCX,INCY,N +* .. +* .. Array Arguments .. + DOUBLE PRECISION DX(*),DY(*) +* .. +* +* Purpose +* ======= +* +* DDOT forms the dot product of two vectors. +* uses unrolled loops for increments equal to one. +* +* Further Details +* =============== +* +* jack dongarra, linpack, 3/11/78. +* modified 12/3/93, array(1) declarations changed to array(*) +* +* ===================================================================== +* +* .. Local Scalars .. + DOUBLE PRECISION DTEMP + INTEGER I,IX,IY,M,MP1 +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. + DDOT = 0.0d0 + DTEMP = 0.0d0 + IF (N.LE.0) RETURN + IF (INCX.EQ.1 .AND. INCY.EQ.1) GO TO 20 +* +* code for unequal increments or equal increments +* not equal to 1 +* + IX = 1 + IY = 1 + IF (INCX.LT.0) IX = (-N+1)*INCX + 1 + IF (INCY.LT.0) IY = (-N+1)*INCY + 1 + DO 10 I = 1,N + DTEMP = DTEMP + DX(IX)*DY(IY) + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + DDOT = DTEMP + RETURN +* +* code for both increments equal to 1 +* +* +* clean-up loop +* + 20 M = MOD(N,5) + IF (M.EQ.0) GO TO 40 + DO 30 I = 1,M + DTEMP = DTEMP + DX(I)*DY(I) + 30 CONTINUE + IF (N.LT.5) GO TO 60 + 40 MP1 = M + 1 + DO 50 I = MP1,N,5 + DTEMP = DTEMP + DX(I)*DY(I) + DX(I+1)*DY(I+1) + + + DX(I+2)*DY(I+2) + DX(I+3)*DY(I+3) + DX(I+4)*DY(I+4) + 50 CONTINUE + 60 DDOT = DTEMP + RETURN + END diff --git a/lib/linalg/dgecon.f b/lib/linalg/dgecon.f new file mode 100644 index 0000000000..b49a91f6f2 --- /dev/null +++ b/lib/linalg/dgecon.f @@ -0,0 +1,186 @@ + SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2006 +* +* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, LDA, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGECON estimates the reciprocal of the condition number of a general +* real matrix A, in either the 1-norm or the infinity-norm, using +* the LU factorization computed by DGETRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The factors L and U from the factorization A = P*L*U +* as computed by DGETRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* ANORM (input) DOUBLE PRECISION +* If NORM = '1' or 'O', the 1-norm of the original matrix A. +* If NORM = 'I', the infinity-norm of the original matrix A. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ONENRM + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + DOUBLE PRECISION AINVNM, SCALE, SL, SMLNUM, SU +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IDAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLACN2, DLATRS, DRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGECON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = DLAMCH( 'Safe minimum' ) +* +* Estimate the norm of inv(A). +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL DLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(L). +* + CALL DLATRS( 'Lower', 'No transpose', 'Unit', NORMIN, N, A, + $ LDA, WORK, SL, WORK( 2*N+1 ), INFO ) +* +* Multiply by inv(U). +* + CALL DLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SU, WORK( 3*N+1 ), INFO ) + ELSE +* +* Multiply by inv(U'). +* + CALL DLATRS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, A, + $ LDA, WORK, SU, WORK( 3*N+1 ), INFO ) +* +* Multiply by inv(L'). +* + CALL DLATRS( 'Lower', 'Transpose', 'Unit', NORMIN, N, A, + $ LDA, WORK, SL, WORK( 2*N+1 ), INFO ) + END IF +* +* Divide X by 1/(SL*SU) if doing so will not cause overflow. +* + SCALE = SL*SU + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN + IX = IDAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL DRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of DGECON +* + END diff --git a/lib/linalg/dgemm.f b/lib/linalg/dgemm.f new file mode 100644 index 0000000000..4afccb1615 --- /dev/null +++ b/lib/linalg/dgemm.f @@ -0,0 +1,316 @@ + SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA,BETA + INTEGER K,LDA,LDB,LDC,M,N + CHARACTER TRANSA,TRANSB +* .. +* .. Array Arguments .. + DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*) +* .. +* +* Purpose +* ======= +* +* DGEMM performs one of the matrix-matrix operations +* +* C := alpha*op( A )*op( B ) + beta*C, +* +* where op( X ) is one of +* +* op( X ) = X or op( X ) = X', +* +* alpha and beta are scalars, and A, B and C are matrices, with op( A ) +* an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. +* +* Arguments +* ========== +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n', op( A ) = A. +* +* TRANSA = 'T' or 't', op( A ) = A'. +* +* TRANSA = 'C' or 'c', op( A ) = A'. +* +* Unchanged on exit. +* +* TRANSB - CHARACTER*1. +* On entry, TRANSB specifies the form of op( B ) to be used in +* the matrix multiplication as follows: +* +* TRANSB = 'N' or 'n', op( B ) = B. +* +* TRANSB = 'T' or 't', op( B ) = B'. +* +* TRANSB = 'C' or 'c', op( B ) = B'. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix +* op( A ) and of the matrix C. M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix +* op( B ) and the number of columns of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry, K specifies the number of columns of the matrix +* op( A ) and the number of rows of the matrix op( B ). K must +* be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is +* k when TRANSA = 'N' or 'n', and is m otherwise. +* Before entry with TRANSA = 'N' or 'n', the leading m by k +* part of the array A must contain the matrix A, otherwise +* the leading k by m part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANSA = 'N' or 'n' then +* LDA must be at least max( 1, m ), otherwise LDA must be at +* least max( 1, k ). +* Unchanged on exit. +* +* B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is +* n when TRANSB = 'N' or 'n', and is k otherwise. +* Before entry with TRANSB = 'N' or 'n', the leading k by n +* part of the array B must contain the matrix B, otherwise +* the leading n by k part of the array B must contain the +* matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. When TRANSB = 'N' or 'n' then +* LDB must be at least max( 1, k ), otherwise LDB must be at +* least max( 1, n ). +* Unchanged on exit. +* +* BETA - DOUBLE PRECISION. +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then C need not be set on input. +* Unchanged on exit. +* +* C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). +* Before entry, the leading m by n part of the array C must +* contain the matrix C, except when beta is zero, in which +* case C need not be set on entry. +* On exit, the array C is overwritten by the m by n matrix +* ( alpha*op( A )*op( B ) + beta*C ). +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, m ). +* Unchanged on exit. +* +* Further Details +* =============== +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB + LOGICAL NOTA,NOTB +* .. +* .. Parameters .. + DOUBLE PRECISION ONE,ZERO + PARAMETER (ONE=1.0D+0,ZERO=0.0D+0) +* .. +* +* Set NOTA and NOTB as true if A and B respectively are not +* transposed and set NROWA, NCOLA and NROWB as the number of rows +* and columns of A and the number of rows of B respectively. +* + NOTA = LSAME(TRANSA,'N') + NOTB = LSAME(TRANSB,'N') + IF (NOTA) THEN + NROWA = M + NCOLA = K + ELSE + NROWA = K + NCOLA = M + END IF + IF (NOTB) THEN + NROWB = K + ELSE + NROWB = N + END IF +* +* Test the input parameters. +* + INFO = 0 + IF ((.NOT.NOTA) .AND. (.NOT.LSAME(TRANSA,'C')) .AND. + + (.NOT.LSAME(TRANSA,'T'))) THEN + INFO = 1 + ELSE IF ((.NOT.NOTB) .AND. (.NOT.LSAME(TRANSB,'C')) .AND. + + (.NOT.LSAME(TRANSB,'T'))) THEN + INFO = 2 + ELSE IF (M.LT.0) THEN + INFO = 3 + ELSE IF (N.LT.0) THEN + INFO = 4 + ELSE IF (K.LT.0) THEN + INFO = 5 + ELSE IF (LDA.LT.MAX(1,NROWA)) THEN + INFO = 8 + ELSE IF (LDB.LT.MAX(1,NROWB)) THEN + INFO = 10 + ELSE IF (LDC.LT.MAX(1,M)) THEN + INFO = 13 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('DGEMM ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((M.EQ.0) .OR. (N.EQ.0) .OR. + + (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN +* +* And if alpha.eq.zero. +* + IF (ALPHA.EQ.ZERO) THEN + IF (BETA.EQ.ZERO) THEN + DO 20 J = 1,N + DO 10 I = 1,M + C(I,J) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1,N + DO 30 I = 1,M + C(I,J) = BETA*C(I,J) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF (NOTB) THEN + IF (NOTA) THEN +* +* Form C := alpha*A*B + beta*C. +* + DO 90 J = 1,N + IF (BETA.EQ.ZERO) THEN + DO 50 I = 1,M + C(I,J) = ZERO + 50 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 60 I = 1,M + C(I,J) = BETA*C(I,J) + 60 CONTINUE + END IF + DO 80 L = 1,K + IF (B(L,J).NE.ZERO) THEN + TEMP = ALPHA*B(L,J) + DO 70 I = 1,M + C(I,J) = C(I,J) + TEMP*A(I,L) + 70 CONTINUE + END IF + 80 CONTINUE + 90 CONTINUE + ELSE +* +* Form C := alpha*A'*B + beta*C +* + DO 120 J = 1,N + DO 110 I = 1,M + TEMP = ZERO + DO 100 L = 1,K + TEMP = TEMP + A(L,I)*B(L,J) + 100 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 110 CONTINUE + 120 CONTINUE + END IF + ELSE + IF (NOTA) THEN +* +* Form C := alpha*A*B' + beta*C +* + DO 170 J = 1,N + IF (BETA.EQ.ZERO) THEN + DO 130 I = 1,M + C(I,J) = ZERO + 130 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 140 I = 1,M + C(I,J) = BETA*C(I,J) + 140 CONTINUE + END IF + DO 160 L = 1,K + IF (B(J,L).NE.ZERO) THEN + TEMP = ALPHA*B(J,L) + DO 150 I = 1,M + C(I,J) = C(I,J) + TEMP*A(I,L) + 150 CONTINUE + END IF + 160 CONTINUE + 170 CONTINUE + ELSE +* +* Form C := alpha*A'*B' + beta*C +* + DO 200 J = 1,N + DO 190 I = 1,M + TEMP = ZERO + DO 180 L = 1,K + TEMP = TEMP + A(L,I)*B(J,L) + 180 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 190 CONTINUE + 200 CONTINUE + END IF + END IF +* + RETURN +* +* End of DGEMM . +* + END diff --git a/lib/linalg/dgemv.f b/lib/linalg/dgemv.f new file mode 100644 index 0000000000..59e11b8826 --- /dev/null +++ b/lib/linalg/dgemv.f @@ -0,0 +1,264 @@ + SUBROUTINE DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA,BETA + INTEGER INCX,INCY,LDA,M,N + CHARACTER TRANS +* .. +* .. Array Arguments .. + DOUBLE PRECISION A(LDA,*),X(*),Y(*) +* .. +* +* Purpose +* ======= +* +* DGEMV performs one of the matrix-vector operations +* +* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, +* +* where alpha and beta are scalars, x and y are vectors and A is an +* m by n matrix. +* +* Arguments +* ========== +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. +* +* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. +* +* TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - DOUBLE PRECISION. +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - DOUBLE PRECISION array of DIMENSION at least +* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +* Before entry with BETA non-zero, the incremented array Y +* must contain the vector y. On exit, Y is overwritten by the +* updated vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* Further Details +* =============== +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE,ZERO + PARAMETER (ONE=1.0D+0,ZERO=0.0D+0) +* .. +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY,LENX,LENY +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND. + + .NOT.LSAME(TRANS,'C')) THEN + INFO = 1 + ELSE IF (M.LT.0) THEN + INFO = 2 + ELSE IF (N.LT.0) THEN + INFO = 3 + ELSE IF (LDA.LT.MAX(1,M)) THEN + INFO = 6 + ELSE IF (INCX.EQ.0) THEN + INFO = 8 + ELSE IF (INCY.EQ.0) THEN + INFO = 11 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('DGEMV ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((M.EQ.0) .OR. (N.EQ.0) .OR. + + ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF (LSAME(TRANS,'N')) THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF (INCX.GT.0) THEN + KX = 1 + ELSE + KX = 1 - (LENX-1)*INCX + END IF + IF (INCY.GT.0) THEN + KY = 1 + ELSE + KY = 1 - (LENY-1)*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* +* First form y := beta*y. +* + IF (BETA.NE.ONE) THEN + IF (INCY.EQ.1) THEN + IF (BETA.EQ.ZERO) THEN + DO 10 I = 1,LENY + Y(I) = ZERO + 10 CONTINUE + ELSE + DO 20 I = 1,LENY + Y(I) = BETA*Y(I) + 20 CONTINUE + END IF + ELSE + IY = KY + IF (BETA.EQ.ZERO) THEN + DO 30 I = 1,LENY + Y(IY) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40 I = 1,LENY + Y(IY) = BETA*Y(IY) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF (ALPHA.EQ.ZERO) RETURN + IF (LSAME(TRANS,'N')) THEN +* +* Form y := alpha*A*x + y. +* + JX = KX + IF (INCY.EQ.1) THEN + DO 60 J = 1,N + IF (X(JX).NE.ZERO) THEN + TEMP = ALPHA*X(JX) + DO 50 I = 1,M + Y(I) = Y(I) + TEMP*A(I,J) + 50 CONTINUE + END IF + JX = JX + INCX + 60 CONTINUE + ELSE + DO 80 J = 1,N + IF (X(JX).NE.ZERO) THEN + TEMP = ALPHA*X(JX) + IY = KY + DO 70 I = 1,M + Y(IY) = Y(IY) + TEMP*A(I,J) + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + ELSE +* +* Form y := alpha*A'*x + y. +* + JY = KY + IF (INCX.EQ.1) THEN + DO 100 J = 1,N + TEMP = ZERO + DO 90 I = 1,M + TEMP = TEMP + A(I,J)*X(I) + 90 CONTINUE + Y(JY) = Y(JY) + ALPHA*TEMP + JY = JY + INCY + 100 CONTINUE + ELSE + DO 120 J = 1,N + TEMP = ZERO + IX = KX + DO 110 I = 1,M + TEMP = TEMP + A(I,J)*X(IX) + IX = IX + INCX + 110 CONTINUE + Y(JY) = Y(JY) + ALPHA*TEMP + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of DGEMV . +* + END diff --git a/lib/linalg/dger.f b/lib/linalg/dger.f new file mode 100644 index 0000000000..0a9dcf2afd --- /dev/null +++ b/lib/linalg/dger.f @@ -0,0 +1,162 @@ + SUBROUTINE DGER(M,N,ALPHA,X,INCX,Y,INCY,A,LDA) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA + INTEGER INCX,INCY,LDA,M,N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A(LDA,*),X(*),Y(*) +* .. +* +* Purpose +* ======= +* +* DGER performs the rank 1 operation +* +* A := alpha*x*y' + A, +* +* where alpha is a scalar, x is an m element vector, y is an n element +* vector and A is an m by n matrix. +* +* Arguments +* ========== +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( m - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the m +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. On exit, A is +* overwritten by the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* Further Details +* =============== +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER (ZERO=0.0D+0) +* .. +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I,INFO,IX,J,JY,KX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (M.LT.0) THEN + INFO = 1 + ELSE IF (N.LT.0) THEN + INFO = 2 + ELSE IF (INCX.EQ.0) THEN + INFO = 5 + ELSE IF (INCY.EQ.0) THEN + INFO = 7 + ELSE IF (LDA.LT.MAX(1,M)) THEN + INFO = 9 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('DGER ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((M.EQ.0) .OR. (N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF (INCY.GT.0) THEN + JY = 1 + ELSE + JY = 1 - (N-1)*INCY + END IF + IF (INCX.EQ.1) THEN + DO 20 J = 1,N + IF (Y(JY).NE.ZERO) THEN + TEMP = ALPHA*Y(JY) + DO 10 I = 1,M + A(I,J) = A(I,J) + X(I)*TEMP + 10 CONTINUE + END IF + JY = JY + INCY + 20 CONTINUE + ELSE + IF (INCX.GT.0) THEN + KX = 1 + ELSE + KX = 1 - (M-1)*INCX + END IF + DO 40 J = 1,N + IF (Y(JY).NE.ZERO) THEN + TEMP = ALPHA*Y(JY) + IX = KX + DO 30 I = 1,M + A(I,J) = A(I,J) + X(IX)*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JY = JY + INCY + 40 CONTINUE + END IF +* + RETURN +* +* End of DGER . +* + END diff --git a/lib/linalg/dgetf2.f b/lib/linalg/dgetf2.f new file mode 100644 index 0000000000..a4277c1651 --- /dev/null +++ b/lib/linalg/dgetf2.f @@ -0,0 +1,148 @@ + SUBROUTINE DGETF2( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK routine (version 3.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* DGETF2 computes an LU factorization of a general m-by-n matrix A +* using partial pivoting with row interchanges. +* +* The factorization has the form +* A = P * L * U +* where P is a permutation matrix, L is lower triangular with unit +* diagonal elements (lower trapezoidal if m > n), and U is upper +* triangular (upper trapezoidal if m < n). +* +* This is the right-looking Level 2 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the m by n matrix to be factored. +* On exit, the factors L and U from the factorization +* A = P*L*U; the unit diagonal elements of L are not stored. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, U(k,k) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION SFMIN + INTEGER I, J, JP +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + INTEGER IDAMAX + EXTERNAL DLAMCH, IDAMAX +* .. +* .. External Subroutines .. + EXTERNAL DGER, DSCAL, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGETF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Compute machine safe minimum +* + SFMIN = DLAMCH('S') +* + DO 10 J = 1, MIN( M, N ) +* +* Find pivot and test for singularity. +* + JP = J - 1 + IDAMAX( M-J+1, A( J, J ), 1 ) + IPIV( J ) = JP + IF( A( JP, J ).NE.ZERO ) THEN +* +* Apply the interchange to columns 1:N. +* + IF( JP.NE.J ) + $ CALL DSWAP( N, A( J, 1 ), LDA, A( JP, 1 ), LDA ) +* +* Compute elements J+1:M of J-th column. +* + IF( J.LT.M ) THEN + IF( ABS(A( J, J )) .GE. SFMIN ) THEN + CALL DSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 ) + ELSE + DO 20 I = 1, M-J + A( J+I, J ) = A( J+I, J ) / A( J, J ) + 20 CONTINUE + END IF + END IF +* + ELSE IF( INFO.EQ.0 ) THEN +* + INFO = J + END IF +* + IF( J.LT.MIN( M, N ) ) THEN +* +* Update trailing submatrix. +* + CALL DGER( M-J, N-J, -ONE, A( J+1, J ), 1, A( J, J+1 ), LDA, + $ A( J+1, J+1 ), LDA ) + END IF + 10 CONTINUE + RETURN +* +* End of DGETF2 +* + END diff --git a/lib/linalg/dgetrf.f b/lib/linalg/dgetrf.f new file mode 100644 index 0000000000..3f7aebee98 --- /dev/null +++ b/lib/linalg/dgetrf.f @@ -0,0 +1,160 @@ + SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK routine (version 3.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* DGETRF computes an LU factorization of a general M-by-N matrix A +* using partial pivoting with row interchanges. +* +* The factorization has the form +* A = P * L * U +* where P is a permutation matrix, L is lower triangular with unit +* diagonal elements (lower trapezoidal if m > n), and U is upper +* triangular (upper trapezoidal if m < n). +* +* This is the right-looking Level 3 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the M-by-N matrix to be factored. +* On exit, the factors L and U from the factorization +* A = P*L*U; the unit diagonal elements of L are not stored. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IINFO, J, JB, NB +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DGETF2, DLASWP, DTRSM, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGETRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'DGETRF', ' ', M, N, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN +* +* Use unblocked code. +* + CALL DGETF2( M, N, A, LDA, IPIV, INFO ) + ELSE +* +* Use blocked code. +* + DO 20 J = 1, MIN( M, N ), NB + JB = MIN( MIN( M, N )-J+1, NB ) +* +* Factor diagonal and subdiagonal blocks and test for exact +* singularity. +* + CALL DGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO ) +* +* Adjust INFO and the pivot indices. +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + J - 1 + DO 10 I = J, MIN( M, J+JB-1 ) + IPIV( I ) = J - 1 + IPIV( I ) + 10 CONTINUE +* +* Apply interchanges to columns 1:J-1. +* + CALL DLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 ) +* + IF( J+JB.LE.N ) THEN +* +* Apply interchanges to columns J+JB:N. +* + CALL DLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1, + $ IPIV, 1 ) +* +* Compute block row of U. +* + CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB, + $ N-J-JB+1, ONE, A( J, J ), LDA, A( J, J+JB ), + $ LDA ) + IF( J+JB.LE.M ) THEN +* +* Update trailing submatrix. +* + CALL DGEMM( 'No transpose', 'No transpose', M-J-JB+1, + $ N-J-JB+1, JB, -ONE, A( J+JB, J ), LDA, + $ A( J, J+JB ), LDA, ONE, A( J+JB, J+JB ), + $ LDA ) + END IF + END IF + 20 CONTINUE + END IF + RETURN +* +* End of DGETRF +* + END diff --git a/lib/linalg/dgetri.f b/lib/linalg/dgetri.f new file mode 100644 index 0000000000..09367b64e1 --- /dev/null +++ b/lib/linalg/dgetri.f @@ -0,0 +1,193 @@ + SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGETRI computes the inverse of a matrix using the LU factorization +* computed by DGETRF. +* +* This method inverts U and then computes inv(A) by solving the system +* inv(A)*L = inv(U) for inv(A). +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the factors L and U from the factorization +* A = P*L*U as computed by DGETRF. +* On exit, if INFO = 0, the inverse of the original matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices from DGETRF; for 1<=i<=N, row i of the +* matrix was interchanged with row IPIV(i). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +* On exit, if INFO=0, then WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimal performance LWORK >= N*NB, where NB is +* the optimal blocksize returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is +* singular and its inverse could not be computed. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB, + $ NBMIN, NN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DGEMV, DSWAP, DTRSM, DTRTRI, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NB = ILAENV( 1, 'DGETRI', ' ', N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -3 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGETRI', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form inv(U). If INFO > 0 from DTRTRI, then U is singular, +* and the inverse is not computed. +* + CALL DTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = MAX( LDWORK*NB, 1 ) + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DGETRI', ' ', N, -1, -1, -1 ) ) + END IF + ELSE + IWS = N + END IF +* +* Solve the equation inv(A)*L = inv(U) for inv(A). +* + IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + DO 20 J = N, 1, -1 +* +* Copy current column of L to WORK and replace with zeros. +* + DO 10 I = J + 1, N + WORK( I ) = A( I, J ) + A( I, J ) = ZERO + 10 CONTINUE +* +* Compute current column of inv(A). +* + IF( J.LT.N ) + $ CALL DGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ), + $ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 ) + 20 CONTINUE + ELSE +* +* Use blocked code. +* + NN = ( ( N-1 ) / NB )*NB + 1 + DO 50 J = NN, 1, -NB + JB = MIN( NB, N-J+1 ) +* +* Copy current block column of L to WORK and replace with +* zeros. +* + DO 40 JJ = J, J + JB - 1 + DO 30 I = JJ + 1, N + WORK( I+( JJ-J )*LDWORK ) = A( I, JJ ) + A( I, JJ ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* Compute current block column of inv(A). +* + IF( J+JB.LE.N ) + $ CALL DGEMM( 'No transpose', 'No transpose', N, JB, + $ N-J-JB+1, -ONE, A( 1, J+JB ), LDA, + $ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA ) + CALL DTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB, + $ ONE, WORK( J ), LDWORK, A( 1, J ), LDA ) + 50 CONTINUE + END IF +* +* Apply column interchanges. +* + DO 60 J = N - 1, 1, -1 + JP = IPIV( J ) + IF( JP.NE.J ) + $ CALL DSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 ) + 60 CONTINUE +* + WORK( 1 ) = IWS + RETURN +* +* End of DGETRI +* + END diff --git a/lib/linalg/dlabad.f b/lib/linalg/dlabad.f new file mode 100644 index 0000000000..8eb5ce4fcf --- /dev/null +++ b/lib/linalg/dlabad.f @@ -0,0 +1,56 @@ + SUBROUTINE DLABAD( SMALL, LARGE ) +* +* -- LAPACK auxiliary routine (version 3.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2006 +* +* .. Scalar Arguments .. + DOUBLE PRECISION LARGE, SMALL +* .. +* +* Purpose +* ======= +* +* DLABAD takes as input the values computed by DLAMCH for underflow and +* overflow, and returns the square root of each of these values if the +* log of LARGE is sufficiently large. This subroutine is intended to +* identify machines with a large exponent range, such as the Crays, and +* redefine the underflow and overflow limits to be the square roots of +* the values computed by DLAMCH. This subroutine is needed because +* DLAMCH does not compensate for poor arithmetic in the upper half of +* the exponent range, as is found on a Cray. +* +* Arguments +* ========= +* +* SMALL (input/output) DOUBLE PRECISION +* On entry, the underflow threshold as computed by DLAMCH. +* On exit, if LOG10(LARGE) is sufficiently large, the square +* root of SMALL, otherwise unchanged. +* +* LARGE (input/output) DOUBLE PRECISION +* On entry, the overflow threshold as computed by DLAMCH. +* On exit, if LOG10(LARGE) is sufficiently large, the square +* root of LARGE, otherwise unchanged. +* +* ===================================================================== +* +* .. Intrinsic Functions .. + INTRINSIC LOG10, SQRT +* .. +* .. Executable Statements .. +* +* If it looks like we're on a Cray, take the square root of +* SMALL and LARGE to avoid overflow and underflow problems. +* + IF( LOG10( LARGE ).GT.2000.D0 ) THEN + SMALL = SQRT( SMALL ) + LARGE = SQRT( LARGE ) + END IF +* + RETURN +* +* End of DLABAD +* + END diff --git a/lib/linalg/dlacn2.f b/lib/linalg/dlacn2.f new file mode 100644 index 0000000000..26b8f3c75b --- /dev/null +++ b/lib/linalg/dlacn2.f @@ -0,0 +1,215 @@ + SUBROUTINE DLACN2( N, V, X, ISGN, EST, KASE, ISAVE ) +* +* -- LAPACK auxiliary routine (version 3.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2006 +* +* .. Scalar Arguments .. + INTEGER KASE, N + DOUBLE PRECISION EST +* .. +* .. Array Arguments .. + INTEGER ISGN( * ), ISAVE( 3 ) + DOUBLE PRECISION V( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* DLACN2 estimates the 1-norm of a square, real matrix A. +* Reverse communication is used for evaluating matrix-vector products. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. N >= 1. +* +* V (workspace) DOUBLE PRECISION array, dimension (N) +* On the final return, V = A*W, where EST = norm(V)/norm(W) +* (W is not returned). +* +* X (input/output) DOUBLE PRECISION array, dimension (N) +* On an intermediate return, X should be overwritten by +* A * X, if KASE=1, +* A' * X, if KASE=2, +* and DLACN2 must be re-called with all the other parameters +* unchanged. +* +* ISGN (workspace) INTEGER array, dimension (N) +* +* EST (input/output) DOUBLE PRECISION +* On entry with KASE = 1 or 2 and ISAVE(1) = 3, EST should be +* unchanged from the previous call to DLACN2. +* On exit, EST is an estimate (a lower bound) for norm(A). +* +* KASE (input/output) INTEGER +* On the initial call to DLACN2, KASE should be 0. +* On an intermediate return, KASE will be 1 or 2, indicating +* whether X should be overwritten by A * X or A' * X. +* On the final return from DLACN2, KASE will again be 0. +* +* ISAVE (input/output) INTEGER array, dimension (3) +* ISAVE is used to save variables between calls to DLACN2 +* +* Further Details +* ======= ======= +* +* Contributed by Nick Higham, University of Manchester. +* Originally named SONEST, dated March 16, 1988. +* +* Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of +* a real or complex matrix, with applications to condition estimation", +* ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. +* +* This is a thread safe version of DLACON, which uses the array ISAVE +* in place of a SAVE statement, as follows: +* +* DLACON DLACN2 +* JUMP ISAVE(1) +* J ISAVE(2) +* ITER ISAVE(3) +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, JLAST + DOUBLE PRECISION ALTSGN, ESTOLD, TEMP +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DASUM + EXTERNAL IDAMAX, DASUM +* .. +* .. External Subroutines .. + EXTERNAL DCOPY +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, NINT, SIGN +* .. +* .. Executable Statements .. +* + IF( KASE.EQ.0 ) THEN + DO 10 I = 1, N + X( I ) = ONE / DBLE( N ) + 10 CONTINUE + KASE = 1 + ISAVE( 1 ) = 1 + RETURN + END IF +* + GO TO ( 20, 40, 70, 110, 140 )ISAVE( 1 ) +* +* ................ ENTRY (ISAVE( 1 ) = 1) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. +* + 20 CONTINUE + IF( N.EQ.1 ) THEN + V( 1 ) = X( 1 ) + EST = ABS( V( 1 ) ) +* ... QUIT + GO TO 150 + END IF + EST = DASUM( N, X, 1 ) +* + DO 30 I = 1, N + X( I ) = SIGN( ONE, X( I ) ) + ISGN( I ) = NINT( X( I ) ) + 30 CONTINUE + KASE = 2 + ISAVE( 1 ) = 2 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 2) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. +* + 40 CONTINUE + ISAVE( 2 ) = IDAMAX( N, X, 1 ) + ISAVE( 3 ) = 2 +* +* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. +* + 50 CONTINUE + DO 60 I = 1, N + X( I ) = ZERO + 60 CONTINUE + X( ISAVE( 2 ) ) = ONE + KASE = 1 + ISAVE( 1 ) = 3 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 3) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 70 CONTINUE + CALL DCOPY( N, X, 1, V, 1 ) + ESTOLD = EST + EST = DASUM( N, V, 1 ) + DO 80 I = 1, N + IF( NINT( SIGN( ONE, X( I ) ) ).NE.ISGN( I ) ) + $ GO TO 90 + 80 CONTINUE +* REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. + GO TO 120 +* + 90 CONTINUE +* TEST FOR CYCLING. + IF( EST.LE.ESTOLD ) + $ GO TO 120 +* + DO 100 I = 1, N + X( I ) = SIGN( ONE, X( I ) ) + ISGN( I ) = NINT( X( I ) ) + 100 CONTINUE + KASE = 2 + ISAVE( 1 ) = 4 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 4) +* X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. +* + 110 CONTINUE + JLAST = ISAVE( 2 ) + ISAVE( 2 ) = IDAMAX( N, X, 1 ) + IF( ( X( JLAST ).NE.ABS( X( ISAVE( 2 ) ) ) ) .AND. + $ ( ISAVE( 3 ).LT.ITMAX ) ) THEN + ISAVE( 3 ) = ISAVE( 3 ) + 1 + GO TO 50 + END IF +* +* ITERATION COMPLETE. FINAL STAGE. +* + 120 CONTINUE + ALTSGN = ONE + DO 130 I = 1, N + X( I ) = ALTSGN*( ONE+DBLE( I-1 ) / DBLE( N-1 ) ) + ALTSGN = -ALTSGN + 130 CONTINUE + KASE = 1 + ISAVE( 1 ) = 5 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 5) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 140 CONTINUE + TEMP = TWO*( DASUM( N, X, 1 ) / DBLE( 3*N ) ) + IF( TEMP.GT.EST ) THEN + CALL DCOPY( N, X, 1, V, 1 ) + EST = TEMP + END IF +* + 150 CONTINUE + KASE = 0 + RETURN +* +* End of DLACN2 +* + END diff --git a/lib/linalg/dlamch.f b/lib/linalg/dlamch.f new file mode 100644 index 0000000000..8f830e87cd --- /dev/null +++ b/lib/linalg/dlamch.f @@ -0,0 +1,852 @@ + DOUBLE PRECISION FUNCTION DLAMCH( CMACH ) +* +* -- LAPACK auxiliary routine (version 3.2) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER CMACH +* .. +* +* Purpose +* ======= +* +* DLAMCH determines double precision machine parameters. +* +* Arguments +* ========= +* +* CMACH (input) CHARACTER*1 +* Specifies the value to be returned by DLAMCH: +* = 'E' or 'e', DLAMCH := eps +* = 'S' or 's , DLAMCH := sfmin +* = 'B' or 'b', DLAMCH := base +* = 'P' or 'p', DLAMCH := eps*base +* = 'N' or 'n', DLAMCH := t +* = 'R' or 'r', DLAMCH := rnd +* = 'M' or 'm', DLAMCH := emin +* = 'U' or 'u', DLAMCH := rmin +* = 'L' or 'l', DLAMCH := emax +* = 'O' or 'o', DLAMCH := rmax +* +* where +* +* eps = relative machine precision +* sfmin = safe minimum, such that 1/sfmin does not overflow +* base = base of the machine +* prec = eps*base +* t = number of (base) digits in the mantissa +* rnd = 1.0 when rounding occurs in addition, 0.0 otherwise +* emin = minimum exponent before (gradual) underflow +* rmin = underflow threshold - base**(emin-1) +* emax = largest exponent before overflow +* rmax = overflow threshold - (base**emax)*(1-eps) +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL FIRST, LRND + INTEGER BETA, IMAX, IMIN, IT + DOUBLE PRECISION BASE, EMAX, EMIN, EPS, PREC, RMACH, RMAX, RMIN, + $ RND, SFMIN, SMALL, T +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLAMC2 +* .. +* .. Save statement .. + SAVE FIRST, EPS, SFMIN, BASE, T, RND, EMIN, RMIN, + $ EMAX, RMAX, PREC +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / +* .. +* .. Executable Statements .. +* + IF( FIRST ) THEN + CALL DLAMC2( BETA, IT, LRND, EPS, IMIN, RMIN, IMAX, RMAX ) + BASE = BETA + T = IT + IF( LRND ) THEN + RND = ONE + EPS = ( BASE**( 1-IT ) ) / 2 + ELSE + RND = ZERO + EPS = BASE**( 1-IT ) + END IF + PREC = EPS*BASE + EMIN = IMIN + EMAX = IMAX + SFMIN = RMIN + SMALL = ONE / RMAX + IF( SMALL.GE.SFMIN ) THEN +* +* Use SMALL plus a bit, to avoid the possibility of rounding +* causing overflow when computing 1/sfmin. +* + SFMIN = SMALL*( ONE+EPS ) + END IF + END IF +* + IF( LSAME( CMACH, 'E' ) ) THEN + RMACH = EPS + ELSE IF( LSAME( CMACH, 'S' ) ) THEN + RMACH = SFMIN + ELSE IF( LSAME( CMACH, 'B' ) ) THEN + RMACH = BASE + ELSE IF( LSAME( CMACH, 'P' ) ) THEN + RMACH = PREC + ELSE IF( LSAME( CMACH, 'N' ) ) THEN + RMACH = T + ELSE IF( LSAME( CMACH, 'R' ) ) THEN + RMACH = RND + ELSE IF( LSAME( CMACH, 'M' ) ) THEN + RMACH = EMIN + ELSE IF( LSAME( CMACH, 'U' ) ) THEN + RMACH = RMIN + ELSE IF( LSAME( CMACH, 'L' ) ) THEN + RMACH = EMAX + ELSE IF( LSAME( CMACH, 'O' ) ) THEN + RMACH = RMAX + END IF +* + DLAMCH = RMACH + FIRST = .FALSE. + RETURN +* +* End of DLAMCH +* + END +* +************************************************************************ +* + SUBROUTINE DLAMC1( BETA, T, RND, IEEE1 ) +* +* -- LAPACK auxiliary routine (version 3.2) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + LOGICAL IEEE1, RND + INTEGER BETA, T +* .. +* +* Purpose +* ======= +* +* DLAMC1 determines the machine parameters given by BETA, T, RND, and +* IEEE1. +* +* Arguments +* ========= +* +* BETA (output) INTEGER +* The base of the machine. +* +* T (output) INTEGER +* The number of ( BETA ) digits in the mantissa. +* +* RND (output) LOGICAL +* Specifies whether proper rounding ( RND = .TRUE. ) or +* chopping ( RND = .FALSE. ) occurs in addition. This may not +* be a reliable guide to the way in which the machine performs +* its arithmetic. +* +* IEEE1 (output) LOGICAL +* Specifies whether rounding appears to be done in the IEEE +* 'round to nearest' style. +* +* Further Details +* =============== +* +* The routine is based on the routine ENVRON by Malcolm and +* incorporates suggestions by Gentleman and Marovich. See +* +* Malcolm M. A. (1972) Algorithms to reveal properties of +* floating-point arithmetic. Comms. of the ACM, 15, 949-951. +* +* Gentleman W. M. and Marovich S. B. (1974) More on algorithms +* that reveal properties of floating point arithmetic units. +* Comms. of the ACM, 17, 276-277. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL FIRST, LIEEE1, LRND + INTEGER LBETA, LT + DOUBLE PRECISION A, B, C, F, ONE, QTR, SAVEC, T1, T2 +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMC3 + EXTERNAL DLAMC3 +* .. +* .. Save statement .. + SAVE FIRST, LIEEE1, LBETA, LRND, LT +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / +* .. +* .. Executable Statements .. +* + IF( FIRST ) THEN + ONE = 1 +* +* LBETA, LIEEE1, LT and LRND are the local values of BETA, +* IEEE1, T and RND. +* +* Throughout this routine we use the function DLAMC3 to ensure +* that relevant values are stored and not held in registers, or +* are not affected by optimizers. +* +* Compute a = 2.0**m with the smallest positive integer m such +* that +* +* fl( a + 1.0 ) = a. +* + A = 1 + C = 1 +* +*+ WHILE( C.EQ.ONE )LOOP + 10 CONTINUE + IF( C.EQ.ONE ) THEN + A = 2*A + C = DLAMC3( A, ONE ) + C = DLAMC3( C, -A ) + GO TO 10 + END IF +*+ END WHILE +* +* Now compute b = 2.0**m with the smallest positive integer m +* such that +* +* fl( a + b ) .gt. a. +* + B = 1 + C = DLAMC3( A, B ) +* +*+ WHILE( C.EQ.A )LOOP + 20 CONTINUE + IF( C.EQ.A ) THEN + B = 2*B + C = DLAMC3( A, B ) + GO TO 20 + END IF +*+ END WHILE +* +* Now compute the base. a and c are neighbouring floating point +* numbers in the interval ( beta**t, beta**( t + 1 ) ) and so +* their difference is beta. Adding 0.25 to c is to ensure that it +* is truncated to beta and not ( beta - 1 ). +* + QTR = ONE / 4 + SAVEC = C + C = DLAMC3( C, -A ) + LBETA = C + QTR +* +* Now determine whether rounding or chopping occurs, by adding a +* bit less than beta/2 and a bit more than beta/2 to a. +* + B = LBETA + F = DLAMC3( B / 2, -B / 100 ) + C = DLAMC3( F, A ) + IF( C.EQ.A ) THEN + LRND = .TRUE. + ELSE + LRND = .FALSE. + END IF + F = DLAMC3( B / 2, B / 100 ) + C = DLAMC3( F, A ) + IF( ( LRND ) .AND. ( C.EQ.A ) ) + $ LRND = .FALSE. +* +* Try and decide whether rounding is done in the IEEE 'round to +* nearest' style. B/2 is half a unit in the last place of the two +* numbers A and SAVEC. Furthermore, A is even, i.e. has last bit +* zero, and SAVEC is odd. Thus adding B/2 to A should not change +* A, but adding B/2 to SAVEC should change SAVEC. +* + T1 = DLAMC3( B / 2, A ) + T2 = DLAMC3( B / 2, SAVEC ) + LIEEE1 = ( T1.EQ.A ) .AND. ( T2.GT.SAVEC ) .AND. LRND +* +* Now find the mantissa, t. It should be the integer part of +* log to the base beta of a, however it is safer to determine t +* by powering. So we find t as the smallest positive integer for +* which +* +* fl( beta**t + 1.0 ) = 1.0. +* + LT = 0 + A = 1 + C = 1 +* +*+ WHILE( C.EQ.ONE )LOOP + 30 CONTINUE + IF( C.EQ.ONE ) THEN + LT = LT + 1 + A = A*LBETA + C = DLAMC3( A, ONE ) + C = DLAMC3( C, -A ) + GO TO 30 + END IF +*+ END WHILE +* + END IF +* + BETA = LBETA + T = LT + RND = LRND + IEEE1 = LIEEE1 + FIRST = .FALSE. + RETURN +* +* End of DLAMC1 +* + END +* +************************************************************************ +* + SUBROUTINE DLAMC2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX ) +* +* -- LAPACK auxiliary routine (version 3.2) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + LOGICAL RND + INTEGER BETA, EMAX, EMIN, T + DOUBLE PRECISION EPS, RMAX, RMIN +* .. +* +* Purpose +* ======= +* +* DLAMC2 determines the machine parameters specified in its argument +* list. +* +* Arguments +* ========= +* +* BETA (output) INTEGER +* The base of the machine. +* +* T (output) INTEGER +* The number of ( BETA ) digits in the mantissa. +* +* RND (output) LOGICAL +* Specifies whether proper rounding ( RND = .TRUE. ) or +* chopping ( RND = .FALSE. ) occurs in addition. This may not +* be a reliable guide to the way in which the machine performs +* its arithmetic. +* +* EPS (output) DOUBLE PRECISION +* The smallest positive number such that +* +* fl( 1.0 - EPS ) .LT. 1.0, +* +* where fl denotes the computed value. +* +* EMIN (output) INTEGER +* The minimum exponent before (gradual) underflow occurs. +* +* RMIN (output) DOUBLE PRECISION +* The smallest normalized number for the machine, given by +* BASE**( EMIN - 1 ), where BASE is the floating point value +* of BETA. +* +* EMAX (output) INTEGER +* The maximum exponent before overflow occurs. +* +* RMAX (output) DOUBLE PRECISION +* The largest positive number for the machine, given by +* BASE**EMAX * ( 1 - EPS ), where BASE is the floating point +* value of BETA. +* +* Further Details +* =============== +* +* The computation of EPS is based on a routine PARANOIA by +* W. Kahan of the University of California at Berkeley. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL FIRST, IEEE, IWARN, LIEEE1, LRND + INTEGER GNMIN, GPMIN, I, LBETA, LEMAX, LEMIN, LT, + $ NGNMIN, NGPMIN + DOUBLE PRECISION A, B, C, HALF, LEPS, LRMAX, LRMIN, ONE, RBASE, + $ SIXTH, SMALL, THIRD, TWO, ZERO +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMC3 + EXTERNAL DLAMC3 +* .. +* .. External Subroutines .. + EXTERNAL DLAMC1, DLAMC4, DLAMC5 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Save statement .. + SAVE FIRST, IWARN, LBETA, LEMAX, LEMIN, LEPS, LRMAX, + $ LRMIN, LT +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / , IWARN / .FALSE. / +* .. +* .. Executable Statements .. +* + IF( FIRST ) THEN + ZERO = 0 + ONE = 1 + TWO = 2 +* +* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of +* BETA, T, RND, EPS, EMIN and RMIN. +* +* Throughout this routine we use the function DLAMC3 to ensure +* that relevant values are stored and not held in registers, or +* are not affected by optimizers. +* +* DLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. +* + CALL DLAMC1( LBETA, LT, LRND, LIEEE1 ) +* +* Start to find EPS. +* + B = LBETA + A = B**( -LT ) + LEPS = A +* +* Try some tricks to see whether or not this is the correct EPS. +* + B = TWO / 3 + HALF = ONE / 2 + SIXTH = DLAMC3( B, -HALF ) + THIRD = DLAMC3( SIXTH, SIXTH ) + B = DLAMC3( THIRD, -HALF ) + B = DLAMC3( B, SIXTH ) + B = ABS( B ) + IF( B.LT.LEPS ) + $ B = LEPS +* + LEPS = 1 +* +*+ WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP + 10 CONTINUE + IF( ( LEPS.GT.B ) .AND. ( B.GT.ZERO ) ) THEN + LEPS = B + C = DLAMC3( HALF*LEPS, ( TWO**5 )*( LEPS**2 ) ) + C = DLAMC3( HALF, -C ) + B = DLAMC3( HALF, C ) + C = DLAMC3( HALF, -B ) + B = DLAMC3( HALF, C ) + GO TO 10 + END IF +*+ END WHILE +* + IF( A.LT.LEPS ) + $ LEPS = A +* +* Computation of EPS complete. +* +* Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). +* Keep dividing A by BETA until (gradual) underflow occurs. This +* is detected when we cannot recover the previous A. +* + RBASE = ONE / LBETA + SMALL = ONE + DO 20 I = 1, 3 + SMALL = DLAMC3( SMALL*RBASE, ZERO ) + 20 CONTINUE + A = DLAMC3( ONE, SMALL ) + CALL DLAMC4( NGPMIN, ONE, LBETA ) + CALL DLAMC4( NGNMIN, -ONE, LBETA ) + CALL DLAMC4( GPMIN, A, LBETA ) + CALL DLAMC4( GNMIN, -A, LBETA ) + IEEE = .FALSE. +* + IF( ( NGPMIN.EQ.NGNMIN ) .AND. ( GPMIN.EQ.GNMIN ) ) THEN + IF( NGPMIN.EQ.GPMIN ) THEN + LEMIN = NGPMIN +* ( Non twos-complement machines, no gradual underflow; +* e.g., VAX ) + ELSE IF( ( GPMIN-NGPMIN ).EQ.3 ) THEN + LEMIN = NGPMIN - 1 + LT + IEEE = .TRUE. +* ( Non twos-complement machines, with gradual underflow; +* e.g., IEEE standard followers ) + ELSE + LEMIN = MIN( NGPMIN, GPMIN ) +* ( A guess; no known machine ) + IWARN = .TRUE. + END IF +* + ELSE IF( ( NGPMIN.EQ.GPMIN ) .AND. ( NGNMIN.EQ.GNMIN ) ) THEN + IF( ABS( NGPMIN-NGNMIN ).EQ.1 ) THEN + LEMIN = MAX( NGPMIN, NGNMIN ) +* ( Twos-complement machines, no gradual underflow; +* e.g., CYBER 205 ) + ELSE + LEMIN = MIN( NGPMIN, NGNMIN ) +* ( A guess; no known machine ) + IWARN = .TRUE. + END IF +* + ELSE IF( ( ABS( NGPMIN-NGNMIN ).EQ.1 ) .AND. + $ ( GPMIN.EQ.GNMIN ) ) THEN + IF( ( GPMIN-MIN( NGPMIN, NGNMIN ) ).EQ.3 ) THEN + LEMIN = MAX( NGPMIN, NGNMIN ) - 1 + LT +* ( Twos-complement machines with gradual underflow; +* no known machine ) + ELSE + LEMIN = MIN( NGPMIN, NGNMIN ) +* ( A guess; no known machine ) + IWARN = .TRUE. + END IF +* + ELSE + LEMIN = MIN( NGPMIN, NGNMIN, GPMIN, GNMIN ) +* ( A guess; no known machine ) + IWARN = .TRUE. + END IF + FIRST = .FALSE. +*** +* Comment out this if block if EMIN is ok + IF( IWARN ) THEN + FIRST = .TRUE. + WRITE( 6, FMT = 9999 )LEMIN + END IF +*** +* +* Assume IEEE arithmetic if we found denormalised numbers above, +* or if arithmetic seems to round in the IEEE style, determined +* in routine DLAMC1. A true IEEE machine should have both things +* true; however, faulty machines may have one or the other. +* + IEEE = IEEE .OR. LIEEE1 +* +* Compute RMIN by successive division by BETA. We could compute +* RMIN as BASE**( EMIN - 1 ), but some machines underflow during +* this computation. +* + LRMIN = 1 + DO 30 I = 1, 1 - LEMIN + LRMIN = DLAMC3( LRMIN*RBASE, ZERO ) + 30 CONTINUE +* +* Finally, call DLAMC5 to compute EMAX and RMAX. +* + CALL DLAMC5( LBETA, LT, LEMIN, IEEE, LEMAX, LRMAX ) + END IF +* + BETA = LBETA + T = LT + RND = LRND + EPS = LEPS + EMIN = LEMIN + RMIN = LRMIN + EMAX = LEMAX + RMAX = LRMAX +* + RETURN +* + 9999 FORMAT( / / ' WARNING. The value EMIN may be incorrect:-', + $ ' EMIN = ', I8, / + $ ' If, after inspection, the value EMIN looks', + $ ' acceptable please comment out ', + $ / ' the IF block as marked within the code of routine', + $ ' DLAMC2,', / ' otherwise supply EMIN explicitly.', / ) +* +* End of DLAMC2 +* + END +* +************************************************************************ +* + DOUBLE PRECISION FUNCTION DLAMC3( A, B ) +* +* -- LAPACK auxiliary routine (version 3.2) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + DOUBLE PRECISION A, B +* .. +* +* Purpose +* ======= +* +* DLAMC3 is intended to force A and B to be stored prior to doing +* the addition of A and B , for use in situations where optimizers +* might hold one of these in a register. +* +* Arguments +* ========= +* +* A (input) DOUBLE PRECISION +* B (input) DOUBLE PRECISION +* The values A and B. +* +* ===================================================================== +* +* .. Executable Statements .. +* + DLAMC3 = A + B +* + RETURN +* +* End of DLAMC3 +* + END +* +************************************************************************ +* + SUBROUTINE DLAMC4( EMIN, START, BASE ) +* +* -- LAPACK auxiliary routine (version 3.2) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER BASE, EMIN + DOUBLE PRECISION START +* .. +* +* Purpose +* ======= +* +* DLAMC4 is a service routine for DLAMC2. +* +* Arguments +* ========= +* +* EMIN (output) INTEGER +* The minimum exponent before (gradual) underflow, computed by +* setting A = START and dividing by BASE until the previous A +* can not be recovered. +* +* START (input) DOUBLE PRECISION +* The starting point for determining EMIN. +* +* BASE (input) INTEGER +* The base of the machine. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION A, B1, B2, C1, C2, D1, D2, ONE, RBASE, ZERO +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMC3 + EXTERNAL DLAMC3 +* .. +* .. Executable Statements .. +* + A = START + ONE = 1 + RBASE = ONE / BASE + ZERO = 0 + EMIN = 1 + B1 = DLAMC3( A*RBASE, ZERO ) + C1 = A + C2 = A + D1 = A + D2 = A +*+ WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. +* $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP + 10 CONTINUE + IF( ( C1.EQ.A ) .AND. ( C2.EQ.A ) .AND. ( D1.EQ.A ) .AND. + $ ( D2.EQ.A ) ) THEN + EMIN = EMIN - 1 + A = B1 + B1 = DLAMC3( A / BASE, ZERO ) + C1 = DLAMC3( B1*BASE, ZERO ) + D1 = ZERO + DO 20 I = 1, BASE + D1 = D1 + B1 + 20 CONTINUE + B2 = DLAMC3( A*RBASE, ZERO ) + C2 = DLAMC3( B2 / RBASE, ZERO ) + D2 = ZERO + DO 30 I = 1, BASE + D2 = D2 + B2 + 30 CONTINUE + GO TO 10 + END IF +*+ END WHILE +* + RETURN +* +* End of DLAMC4 +* + END +* +************************************************************************ +* + SUBROUTINE DLAMC5( BETA, P, EMIN, IEEE, EMAX, RMAX ) +* +* -- LAPACK auxiliary routine (version 3.2) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + LOGICAL IEEE + INTEGER BETA, EMAX, EMIN, P + DOUBLE PRECISION RMAX +* .. +* +* Purpose +* ======= +* +* DLAMC5 attempts to compute RMAX, the largest machine floating-point +* number, without overflow. It assumes that EMAX + abs(EMIN) sum +* approximately to a power of 2. It will fail on machines where this +* assumption does not hold, for example, the Cyber 205 (EMIN = -28625, +* EMAX = 28718). It will also fail if the value supplied for EMIN is +* too large (i.e. too close to zero), probably with overflow. +* +* Arguments +* ========= +* +* BETA (input) INTEGER +* The base of floating-point arithmetic. +* +* P (input) INTEGER +* The number of base BETA digits in the mantissa of a +* floating-point value. +* +* EMIN (input) INTEGER +* The minimum exponent before (gradual) underflow. +* +* IEEE (input) LOGICAL +* A logical flag specifying whether or not the arithmetic +* system is thought to comply with the IEEE standard. +* +* EMAX (output) INTEGER +* The largest exponent before overflow +* +* RMAX (output) DOUBLE PRECISION +* The largest machine floating-point number. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + INTEGER EXBITS, EXPSUM, I, LEXP, NBITS, TRY, UEXP + DOUBLE PRECISION OLDY, RECBAS, Y, Z +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMC3 + EXTERNAL DLAMC3 +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* First compute LEXP and UEXP, two powers of 2 that bound +* abs(EMIN). We then assume that EMAX + abs(EMIN) will sum +* approximately to the bound that is closest to abs(EMIN). +* (EMAX is the exponent of the required number RMAX). +* + LEXP = 1 + EXBITS = 1 + 10 CONTINUE + TRY = LEXP*2 + IF( TRY.LE.( -EMIN ) ) THEN + LEXP = TRY + EXBITS = EXBITS + 1 + GO TO 10 + END IF + IF( LEXP.EQ.-EMIN ) THEN + UEXP = LEXP + ELSE + UEXP = TRY + EXBITS = EXBITS + 1 + END IF +* +* Now -LEXP is less than or equal to EMIN, and -UEXP is greater +* than or equal to EMIN. EXBITS is the number of bits needed to +* store the exponent. +* + IF( ( UEXP+EMIN ).GT.( -LEXP-EMIN ) ) THEN + EXPSUM = 2*LEXP + ELSE + EXPSUM = 2*UEXP + END IF +* +* EXPSUM is the exponent range, approximately equal to +* EMAX - EMIN + 1 . +* + EMAX = EXPSUM + EMIN - 1 + NBITS = 1 + EXBITS + P +* +* NBITS is the total number of bits needed to store a +* floating-point number. +* + IF( ( MOD( NBITS, 2 ).EQ.1 ) .AND. ( BETA.EQ.2 ) ) THEN +* +* Either there are an odd number of bits used to store a +* floating-point number, which is unlikely, or some bits are +* not used in the representation of numbers, which is possible, +* (e.g. Cray machines) or the mantissa has an implicit bit, +* (e.g. IEEE machines, Dec Vax machines), which is perhaps the +* most likely. We have to assume the last alternative. +* If this is true, then we need to reduce EMAX by one because +* there must be some way of representing zero in an implicit-bit +* system. On machines like Cray, we are reducing EMAX by one +* unnecessarily. +* + EMAX = EMAX - 1 + END IF +* + IF( IEEE ) THEN +* +* Assume we are on an IEEE machine which reserves one exponent +* for infinity and NaN. +* + EMAX = EMAX - 1 + END IF +* +* Now create RMAX, the largest machine number, which should +* be equal to (1.0 - BETA**(-P)) * BETA**EMAX . +* +* First compute 1.0 - BETA**(-P), being careful that the +* result is less than 1.0 . +* + RECBAS = ONE / BETA + Z = BETA - ONE + Y = ZERO + DO 20 I = 1, P + Z = Z*RECBAS + IF( Y.LT.ONE ) + $ OLDY = Y + Y = DLAMC3( Y, Z ) + 20 CONTINUE + IF( Y.GE.ONE ) + $ Y = OLDY +* +* Now multiply by BETA**EMAX to get RMAX. +* + DO 30 I = 1, EMAX + Y = DLAMC3( Y*BETA, ZERO ) + 30 CONTINUE +* + RMAX = Y + RETURN +* +* End of DLAMC5 +* + END diff --git a/lib/linalg/dlange.f b/lib/linalg/dlange.f new file mode 100644 index 0000000000..471c2371c7 --- /dev/null +++ b/lib/linalg/dlange.f @@ -0,0 +1,145 @@ + DOUBLE PRECISION FUNCTION DLANGE( NORM, M, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DLANGE returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* real matrix A. +* +* Description +* =========== +* +* DLANGE returns the value +* +* DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in DLANGE as described +* above. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. When M = 0, +* DLANGE is set to zero. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. When N = 0, +* DLANGE is set to zero. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The m by n matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(M,1). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +* where LWORK >= M when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( MIN( M, N ).EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = 1, M + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = 1, M + SUM = SUM + ABS( A( I, J ) ) + 30 CONTINUE + VALUE = MAX( VALUE, SUM ) + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, M + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + DO 60 I = 1, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, M + VALUE = MAX( VALUE, WORK( I ) ) + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + CALL DLASSQ( M, A( 1, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + DLANGE = VALUE + RETURN +* +* End of DLANGE +* + END diff --git a/lib/linalg/dlassq.f b/lib/linalg/dlassq.f new file mode 100644 index 0000000000..51cce918df --- /dev/null +++ b/lib/linalg/dlassq.f @@ -0,0 +1,89 @@ + SUBROUTINE DLASSQ( N, X, INCX, SCALE, SUMSQ ) +* +* -- LAPACK auxiliary routine (version 3.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INCX, N + DOUBLE PRECISION SCALE, SUMSQ +* .. +* .. Array Arguments .. + DOUBLE PRECISION X( * ) +* .. +* +* Purpose +* ======= +* +* DLASSQ returns the values scl and smsq such that +* +* ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, +* +* where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is +* assumed to be non-negative and scl returns the value +* +* scl = max( scale, abs( x( i ) ) ). +* +* scale and sumsq must be supplied in SCALE and SUMSQ and +* scl and smsq are overwritten on SCALE and SUMSQ respectively. +* +* The routine makes only one pass through the vector x. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of elements to be used from the vector X. +* +* X (input) DOUBLE PRECISION array, dimension (N) +* The vector for which a scaled sum of squares is computed. +* x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. +* +* INCX (input) INTEGER +* The increment between successive values of the vector X. +* INCX > 0. +* +* SCALE (input/output) DOUBLE PRECISION +* On entry, the value scale in the equation above. +* On exit, SCALE is overwritten with scl , the scaling factor +* for the sum of squares. +* +* SUMSQ (input/output) DOUBLE PRECISION +* On entry, the value sumsq in the equation above. +* On exit, SUMSQ is overwritten with smsq , the basic sum of +* squares from which scl has been factored out. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER IX + DOUBLE PRECISION ABSXI +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + IF( N.GT.0 ) THEN + DO 10 IX = 1, 1 + ( N-1 )*INCX, INCX + IF( X( IX ).NE.ZERO ) THEN + ABSXI = ABS( X( IX ) ) + IF( SCALE.LT.ABSXI ) THEN + SUMSQ = 1 + SUMSQ*( SCALE / ABSXI )**2 + SCALE = ABSXI + ELSE + SUMSQ = SUMSQ + ( ABSXI / SCALE )**2 + END IF + END IF + 10 CONTINUE + END IF + RETURN +* +* End of DLASSQ +* + END diff --git a/lib/linalg/dlaswp.f b/lib/linalg/dlaswp.f new file mode 100644 index 0000000000..5239ba1f5f --- /dev/null +++ b/lib/linalg/dlaswp.f @@ -0,0 +1,120 @@ + SUBROUTINE DLASWP( N, A, LDA, K1, K2, IPIV, INCX ) +* +* -- LAPACK auxiliary routine (version 3.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INCX, K1, K2, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* DLASWP performs a series of row interchanges on the matrix A. +* One row interchange is initiated for each of rows K1 through K2 of A. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of columns of the matrix A. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the matrix of column dimension N to which the row +* interchanges will be applied. +* On exit, the permuted matrix. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* +* K1 (input) INTEGER +* The first element of IPIV for which a row interchange will +* be done. +* +* K2 (input) INTEGER +* The last element of IPIV for which a row interchange will +* be done. +* +* IPIV (input) INTEGER array, dimension (K2*abs(INCX)) +* The vector of pivot indices. Only the elements in positions +* K1 through K2 of IPIV are accessed. +* IPIV(K) = L implies rows K and L are to be interchanged. +* +* INCX (input) INTEGER +* The increment between successive values of IPIV. If IPIV +* is negative, the pivots are applied in reverse order. +* +* Further Details +* =============== +* +* Modified by +* R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, I1, I2, INC, IP, IX, IX0, J, K, N32 + DOUBLE PRECISION TEMP +* .. +* .. Executable Statements .. +* +* Interchange row I with row IPIV(I) for each of rows K1 through K2. +* + IF( INCX.GT.0 ) THEN + IX0 = K1 + I1 = K1 + I2 = K2 + INC = 1 + ELSE IF( INCX.LT.0 ) THEN + IX0 = 1 + ( 1-K2 )*INCX + I1 = K2 + I2 = K1 + INC = -1 + ELSE + RETURN + END IF +* + N32 = ( N / 32 )*32 + IF( N32.NE.0 ) THEN + DO 30 J = 1, N32, 32 + IX = IX0 + DO 20 I = I1, I2, INC + IP = IPIV( IX ) + IF( IP.NE.I ) THEN + DO 10 K = J, J + 31 + TEMP = A( I, K ) + A( I, K ) = A( IP, K ) + A( IP, K ) = TEMP + 10 CONTINUE + END IF + IX = IX + INCX + 20 CONTINUE + 30 CONTINUE + END IF + IF( N32.NE.N ) THEN + N32 = N32 + 1 + IX = IX0 + DO 50 I = I1, I2, INC + IP = IPIV( IX ) + IF( IP.NE.I ) THEN + DO 40 K = N32, N + TEMP = A( I, K ) + A( I, K ) = A( IP, K ) + A( IP, K ) = TEMP + 40 CONTINUE + END IF + IX = IX + INCX + 50 CONTINUE + END IF +* + RETURN +* +* End of DLASWP +* + END diff --git a/lib/linalg/dlatrs.f b/lib/linalg/dlatrs.f new file mode 100644 index 0000000000..de70c1f565 --- /dev/null +++ b/lib/linalg/dlatrs.f @@ -0,0 +1,702 @@ + SUBROUTINE DLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X, SCALE, + $ CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, LDA, N + DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), CNORM( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* DLATRS solves one of the triangular systems +* +* A *x = s*b or A'*x = s*b +* +* with scaling to prevent overflow. Here A is an upper or lower +* triangular matrix, A' denotes the transpose of A, x and b are +* n-element vectors, and s is a scaling factor, usually less than +* or equal to 1, chosen so that the components of x will be less than +* the overflow threshold. If the unscaled problem will not cause +* overflow, the Level 2 BLAS routine DTRSV is called. If the matrix A +* is singular (A(j,j) = 0 for some j), then s is set to 0 and a +* non-trivial solution to A*x = 0 is returned. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* TRANS (input) CHARACTER*1 +* Specifies the operation applied to A. +* = 'N': Solve A * x = s*b (No transpose) +* = 'T': Solve A'* x = s*b (Transpose) +* = 'C': Solve A'* x = s*b (Conjugate transpose = Transpose) +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* NORMIN (input) CHARACTER*1 +* Specifies whether CNORM has been set or not. +* = 'Y': CNORM contains the column norms on entry +* = 'N': CNORM is not set on entry. On exit, the norms will +* be computed and stored in CNORM. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The triangular matrix A. If UPLO = 'U', the leading n by n +* upper triangular part of the array A contains the upper +* triangular matrix, and the strictly lower triangular part of +* A is not referenced. If UPLO = 'L', the leading n by n lower +* triangular part of the array A contains the lower triangular +* matrix, and the strictly upper triangular part of A is not +* referenced. If DIAG = 'U', the diagonal elements of A are +* also not referenced and are assumed to be 1. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max (1,N). +* +* X (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the right hand side b of the triangular system. +* On exit, X is overwritten by the solution vector x. +* +* SCALE (output) DOUBLE PRECISION +* The scaling factor s for the triangular system +* A * x = s*b or A'* x = s*b. +* If SCALE = 0, the matrix A is singular or badly scaled, and +* the vector x is an exact or approximate solution to A*x = 0. +* +* CNORM (input or output) DOUBLE PRECISION array, dimension (N) +* +* If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +* contains the norm of the off-diagonal part of the j-th column +* of A. If TRANS = 'N', CNORM(j) must be greater than or equal +* to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +* must be greater than or equal to the 1-norm. +* +* If NORMIN = 'N', CNORM is an output argument and CNORM(j) +* returns the 1-norm of the offdiagonal part of the j-th column +* of A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* Further Details +* ======= ======= +* +* A rough bound on x is computed; if that is less than overflow, DTRSV +* is called, otherwise, specific code is used which checks for possible +* overflow or divide-by-zero at every operation. +* +* A columnwise scheme is used for solving A*x = b. The basic algorithm +* if A is lower triangular is +* +* x[1:n] := b[1:n] +* for j = 1, ..., n +* x(j) := x(j) / A(j,j) +* x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +* end +* +* Define bounds on the components of x after j iterations of the loop: +* M(j) = bound on x[1:j] +* G(j) = bound on x[j+1:n] +* Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +* +* Then for iteration j+1 we have +* M(j+1) <= G(j) / | A(j+1,j+1) | +* G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +* <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +* +* where CNORM(j+1) is greater than or equal to the infinity-norm of +* column j+1 of A, not counting the diagonal. Hence +* +* G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +* 1<=i<=j +* and +* +* |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +* 1<=i< j +* +* Since |x(j)| <= M(j), we use the Level 2 BLAS routine DTRSV if the +* reciprocal of the largest M(j), j=1,..,n, is larger than +* max(underflow, 1/overflow). +* +* The bound on x(j) is also used to determine when a step in the +* columnwise method can be performed without fear of overflow. If +* the computed bound is greater than a large constant, x is scaled to +* prevent overflow, but if the bound overflows, x is set to 0, x(j) to +* 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +* +* Similarly, a row-wise scheme is used to solve A'*x = b. The basic +* algorithm for A upper triangular is +* +* for j = 1, ..., n +* x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) +* end +* +* We simultaneously compute two bounds +* G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j +* M(j) = bound on x(i), 1<=i<=j +* +* The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +* add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +* Then the bound on x(j) is +* +* M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +* +* <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +* 1<=i<=j +* +* and we can safely call DTRSV if 1/M(n) and 1/G(n) are both greater +* than max(underflow, 1/overflow). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, J, JFIRST, JINC, JLAST + DOUBLE PRECISION BIGNUM, GROW, REC, SMLNUM, SUMJ, TJJ, TJJS, + $ TMAX, TSCAL, USCAL, XBND, XJ, XMAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DASUM, DDOT, DLAMCH + EXTERNAL LSAME, IDAMAX, DASUM, DDOT, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DSCAL, DTRSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLATRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + DO 10 J = 1, N + CNORM( J ) = DASUM( J-1, A( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + DO 20 J = 1, N - 1 + CNORM( J ) = DASUM( N-J, A( J+1, J ), 1 ) + 20 CONTINUE + CNORM( N ) = ZERO + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM. +* + IMAX = IDAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM ) THEN + TSCAL = ONE + ELSE + TSCAL = ONE / ( SMLNUM*TMAX ) + CALL DSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine DTRSV can be used. +* + J = IDAMAX( N, X, 1 ) + XMAX = ABS( X( J ) ) + XBND = XMAX + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 50 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + DO 30 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* M(j) = G(j-1) / abs(A(j,j)) +* + TJJ = ABS( A( J, J ) ) + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + 30 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 40 CONTINUE + END IF + 50 CONTINUE +* + ELSE +* +* Compute the growth in A' * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 80 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + DO 60 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + TJJ = ABS( A( J, J ) ) + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + 60 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 70 CONTINUE + END IF + 80 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL DTRSV( UPLO, TRANS, DIAG, N, A, LDA, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = BIGNUM / XMAX + CALL DSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + DO 110 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 100 + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 90 I = 1, N + X( I ) = ZERO + 90 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 100 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL DSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) +* + CALL DAXPY( J-1, -X( J )*TSCAL, A( 1, J ), 1, X, + $ 1 ) + I = IDAMAX( J-1, X, 1 ) + XMAX = ABS( X( I ) ) + END IF + ELSE + IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) +* + CALL DAXPY( N-J, -X( J )*TSCAL, A( J+1, J ), 1, + $ X( J+1 ), 1 ) + I = J + IDAMAX( N-J, X( J+1 ), 1 ) + XMAX = ABS( X( I ) ) + END IF + END IF + 110 CONTINUE +* + ELSE +* +* Solve A' * x = b +* + DO 160 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = ABS( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = USCAL / TJJS + END IF + IF( REC.LT.ONE ) THEN + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + SUMJ = ZERO + IF( USCAL.EQ.ONE ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call DDOT to perform the dot product. +* + IF( UPPER ) THEN + SUMJ = DDOT( J-1, A( 1, J ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + SUMJ = DDOT( N-J, A( J+1, J ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 120 I = 1, J - 1 + SUMJ = SUMJ + ( A( I, J )*USCAL )*X( I ) + 120 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 130 I = J + 1, N + SUMJ = SUMJ + ( A( I, J )*USCAL )*X( I ) + 130 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.TSCAL ) THEN +* +* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - SUMJ + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 150 + END IF +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A'*x = 0. +* + DO 140 I = 1, N + X( I ) = ZERO + 140 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 150 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - sumj if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = X( J ) / TJJS - SUMJ + END IF + XMAX = MAX( XMAX, ABS( X( J ) ) ) + 160 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL DSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of DLATRS +* + END diff --git a/lib/linalg/drscl.f b/lib/linalg/drscl.f new file mode 100644 index 0000000000..5a9ed00812 --- /dev/null +++ b/lib/linalg/drscl.f @@ -0,0 +1,115 @@ + SUBROUTINE DRSCL( N, SA, SX, INCX ) +* +* -- LAPACK auxiliary routine (version 3.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INCX, N + DOUBLE PRECISION SA +* .. +* .. Array Arguments .. + DOUBLE PRECISION SX( * ) +* .. +* +* Purpose +* ======= +* +* DRSCL multiplies an n-element real vector x by the real scalar 1/a. +* This is done without overflow or underflow as long as +* the final result x/a does not overflow or underflow. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of components of the vector x. +* +* SA (input) DOUBLE PRECISION +* The scalar a which is used to divide each component of x. +* SA must be >= 0, or the subroutine will divide by zero. +* +* SX (input/output) DOUBLE PRECISION array, dimension +* (1+(N-1)*abs(INCX)) +* The n-element vector x. +* +* INCX (input) INTEGER +* The increment between successive values of the vector SX. +* > 0: SX(1) = X(1) and SX(1+(i-1)*INCX) = x(i), 1< i<= n +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + DOUBLE PRECISION BIGNUM, CDEN, CDEN1, CNUM, CNUM1, MUL, SMLNUM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Initialize the denominator to SA and the numerator to 1. +* + CDEN = SA + CNUM = ONE +* + 10 CONTINUE + CDEN1 = CDEN*SMLNUM + CNUM1 = CNUM / BIGNUM + IF( ABS( CDEN1 ).GT.ABS( CNUM ) .AND. CNUM.NE.ZERO ) THEN +* +* Pre-multiply X by SMLNUM if CDEN is large compared to CNUM. +* + MUL = SMLNUM + DONE = .FALSE. + CDEN = CDEN1 + ELSE IF( ABS( CNUM1 ).GT.ABS( CDEN ) ) THEN +* +* Pre-multiply X by BIGNUM if CDEN is small compared to CNUM. +* + MUL = BIGNUM + DONE = .FALSE. + CNUM = CNUM1 + ELSE +* +* Multiply X by CNUM / CDEN and return. +* + MUL = CNUM / CDEN + DONE = .TRUE. + END IF +* +* Scale the vector X by MUL +* + CALL DSCAL( N, MUL, SX, INCX ) +* + IF( .NOT.DONE ) + $ GO TO 10 +* + RETURN +* +* End of DRSCL +* + END diff --git a/lib/linalg/dscal.f b/lib/linalg/dscal.f new file mode 100644 index 0000000000..81425a9474 --- /dev/null +++ b/lib/linalg/dscal.f @@ -0,0 +1,62 @@ + SUBROUTINE DSCAL(N,DA,DX,INCX) +* .. Scalar Arguments .. + DOUBLE PRECISION DA + INTEGER INCX,N +* .. +* .. Array Arguments .. + DOUBLE PRECISION DX(*) +* .. +* +* Purpose +* ======= +* +* DSCAL scales a vector by a constant. +* uses unrolled loops for increment equal to one. +* +* Further Details +* =============== +* +* jack dongarra, linpack, 3/11/78. +* modified 3/93 to return if incx .le. 0. +* modified 12/3/93, array(1) declarations changed to array(*) +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I,M,MP1,NINCX +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. + IF (N.LE.0 .OR. INCX.LE.0) RETURN + IF (INCX.EQ.1) GO TO 20 +* +* code for increment not equal to 1 +* + NINCX = N*INCX + DO 10 I = 1,NINCX,INCX + DX(I) = DA*DX(I) + 10 CONTINUE + RETURN +* +* code for increment equal to 1 +* +* +* clean-up loop +* + 20 M = MOD(N,5) + IF (M.EQ.0) GO TO 40 + DO 30 I = 1,M + DX(I) = DA*DX(I) + 30 CONTINUE + IF (N.LT.5) RETURN + 40 MP1 = M + 1 + DO 50 I = MP1,N,5 + DX(I) = DA*DX(I) + DX(I+1) = DA*DX(I+1) + DX(I+2) = DA*DX(I+2) + DX(I+3) = DA*DX(I+3) + DX(I+4) = DA*DX(I+4) + 50 CONTINUE + RETURN + END diff --git a/lib/linalg/dswap.f b/lib/linalg/dswap.f new file mode 100644 index 0000000000..3595fd8c10 --- /dev/null +++ b/lib/linalg/dswap.f @@ -0,0 +1,75 @@ + SUBROUTINE DSWAP(N,DX,INCX,DY,INCY) +* .. Scalar Arguments .. + INTEGER INCX,INCY,N +* .. +* .. Array Arguments .. + DOUBLE PRECISION DX(*),DY(*) +* .. +* +* Purpose +* ======= +* +* interchanges two vectors. +* uses unrolled loops for increments equal one. +* +* Further Details +* =============== +* +* jack dongarra, linpack, 3/11/78. +* modified 12/3/93, array(1) declarations changed to array(*) +* +* ===================================================================== +* +* .. Local Scalars .. + DOUBLE PRECISION DTEMP + INTEGER I,IX,IY,M,MP1 +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. + IF (N.LE.0) RETURN + IF (INCX.EQ.1 .AND. INCY.EQ.1) GO TO 20 +* +* code for unequal increments or equal increments not equal +* to 1 +* + IX = 1 + IY = 1 + IF (INCX.LT.0) IX = (-N+1)*INCX + 1 + IF (INCY.LT.0) IY = (-N+1)*INCY + 1 + DO 10 I = 1,N + DTEMP = DX(IX) + DX(IX) = DY(IY) + DY(IY) = DTEMP + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + RETURN +* +* code for both increments equal to 1 +* +* +* clean-up loop +* + 20 M = MOD(N,3) + IF (M.EQ.0) GO TO 40 + DO 30 I = 1,M + DTEMP = DX(I) + DX(I) = DY(I) + DY(I) = DTEMP + 30 CONTINUE + IF (N.LT.3) RETURN + 40 MP1 = M + 1 + DO 50 I = MP1,N,3 + DTEMP = DX(I) + DX(I) = DY(I) + DY(I) = DTEMP + DTEMP = DX(I+1) + DX(I+1) = DY(I+1) + DY(I+1) = DTEMP + DTEMP = DX(I+2) + DX(I+2) = DY(I+2) + DY(I+2) = DTEMP + 50 CONTINUE + RETURN + END diff --git a/lib/linalg/dtrmm.f b/lib/linalg/dtrmm.f new file mode 100644 index 0000000000..931748dbeb --- /dev/null +++ b/lib/linalg/dtrmm.f @@ -0,0 +1,349 @@ + SUBROUTINE DTRMM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA + INTEGER LDA,LDB,M,N + CHARACTER DIAG,SIDE,TRANSA,UPLO +* .. +* .. Array Arguments .. + DOUBLE PRECISION A(LDA,*),B(LDB,*) +* .. +* +* Purpose +* ======= +* +* DTRMM performs one of the matrix-matrix operations +* +* B := alpha*op( A )*B, or B := alpha*B*op( A ), +* +* where alpha is a scalar, B is an m by n matrix, A is a unit, or +* non-unit, upper or lower triangular matrix and op( A ) is one of +* +* op( A ) = A or op( A ) = A'. +* +* Arguments +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether op( A ) multiplies B from +* the left or right as follows: +* +* SIDE = 'L' or 'l' B := alpha*op( A )*B. +* +* SIDE = 'R' or 'r' B := alpha*B*op( A ). +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix A is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n' op( A ) = A. +* +* TRANSA = 'T' or 't' op( A ) = A'. +* +* TRANSA = 'C' or 'c' op( A ) = A'. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit triangular +* as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of B. M must be at +* least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of B. N must be +* at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. When alpha is +* zero then A is not referenced and B need not be set before +* entry. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m +* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. +* Before entry with UPLO = 'U' or 'u', the leading k by k +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading k by k +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' +* then LDA must be at least max( 1, n ). +* Unchanged on exit. +* +* B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the matrix B, and on exit is overwritten by the +* transformed matrix. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* Further Details +* =============== +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I,INFO,J,K,NROWA + LOGICAL LSIDE,NOUNIT,UPPER +* .. +* .. Parameters .. + DOUBLE PRECISION ONE,ZERO + PARAMETER (ONE=1.0D+0,ZERO=0.0D+0) +* .. +* +* Test the input parameters. +* + LSIDE = LSAME(SIDE,'L') + IF (LSIDE) THEN + NROWA = M + ELSE + NROWA = N + END IF + NOUNIT = LSAME(DIAG,'N') + UPPER = LSAME(UPLO,'U') +* + INFO = 0 + IF ((.NOT.LSIDE) .AND. (.NOT.LSAME(SIDE,'R'))) THEN + INFO = 1 + ELSE IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN + INFO = 2 + ELSE IF ((.NOT.LSAME(TRANSA,'N')) .AND. + + (.NOT.LSAME(TRANSA,'T')) .AND. + + (.NOT.LSAME(TRANSA,'C'))) THEN + INFO = 3 + ELSE IF ((.NOT.LSAME(DIAG,'U')) .AND. (.NOT.LSAME(DIAG,'N'))) THEN + INFO = 4 + ELSE IF (M.LT.0) THEN + INFO = 5 + ELSE IF (N.LT.0) THEN + INFO = 6 + ELSE IF (LDA.LT.MAX(1,NROWA)) THEN + INFO = 9 + ELSE IF (LDB.LT.MAX(1,M)) THEN + INFO = 11 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('DTRMM ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF (M.EQ.0 .OR. N.EQ.0) RETURN +* +* And when alpha.eq.zero. +* + IF (ALPHA.EQ.ZERO) THEN + DO 20 J = 1,N + DO 10 I = 1,M + B(I,J) = ZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* +* Start the operations. +* + IF (LSIDE) THEN + IF (LSAME(TRANSA,'N')) THEN +* +* Form B := alpha*A*B. +* + IF (UPPER) THEN + DO 50 J = 1,N + DO 40 K = 1,M + IF (B(K,J).NE.ZERO) THEN + TEMP = ALPHA*B(K,J) + DO 30 I = 1,K - 1 + B(I,J) = B(I,J) + TEMP*A(I,K) + 30 CONTINUE + IF (NOUNIT) TEMP = TEMP*A(K,K) + B(K,J) = TEMP + END IF + 40 CONTINUE + 50 CONTINUE + ELSE + DO 80 J = 1,N + DO 70 K = M,1,-1 + IF (B(K,J).NE.ZERO) THEN + TEMP = ALPHA*B(K,J) + B(K,J) = TEMP + IF (NOUNIT) B(K,J) = B(K,J)*A(K,K) + DO 60 I = K + 1,M + B(I,J) = B(I,J) + TEMP*A(I,K) + 60 CONTINUE + END IF + 70 CONTINUE + 80 CONTINUE + END IF + ELSE +* +* Form B := alpha*A'*B. +* + IF (UPPER) THEN + DO 110 J = 1,N + DO 100 I = M,1,-1 + TEMP = B(I,J) + IF (NOUNIT) TEMP = TEMP*A(I,I) + DO 90 K = 1,I - 1 + TEMP = TEMP + A(K,I)*B(K,J) + 90 CONTINUE + B(I,J) = ALPHA*TEMP + 100 CONTINUE + 110 CONTINUE + ELSE + DO 140 J = 1,N + DO 130 I = 1,M + TEMP = B(I,J) + IF (NOUNIT) TEMP = TEMP*A(I,I) + DO 120 K = I + 1,M + TEMP = TEMP + A(K,I)*B(K,J) + 120 CONTINUE + B(I,J) = ALPHA*TEMP + 130 CONTINUE + 140 CONTINUE + END IF + END IF + ELSE + IF (LSAME(TRANSA,'N')) THEN +* +* Form B := alpha*B*A. +* + IF (UPPER) THEN + DO 180 J = N,1,-1 + TEMP = ALPHA + IF (NOUNIT) TEMP = TEMP*A(J,J) + DO 150 I = 1,M + B(I,J) = TEMP*B(I,J) + 150 CONTINUE + DO 170 K = 1,J - 1 + IF (A(K,J).NE.ZERO) THEN + TEMP = ALPHA*A(K,J) + DO 160 I = 1,M + B(I,J) = B(I,J) + TEMP*B(I,K) + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + ELSE + DO 220 J = 1,N + TEMP = ALPHA + IF (NOUNIT) TEMP = TEMP*A(J,J) + DO 190 I = 1,M + B(I,J) = TEMP*B(I,J) + 190 CONTINUE + DO 210 K = J + 1,N + IF (A(K,J).NE.ZERO) THEN + TEMP = ALPHA*A(K,J) + DO 200 I = 1,M + B(I,J) = B(I,J) + TEMP*B(I,K) + 200 CONTINUE + END IF + 210 CONTINUE + 220 CONTINUE + END IF + ELSE +* +* Form B := alpha*B*A'. +* + IF (UPPER) THEN + DO 260 K = 1,N + DO 240 J = 1,K - 1 + IF (A(J,K).NE.ZERO) THEN + TEMP = ALPHA*A(J,K) + DO 230 I = 1,M + B(I,J) = B(I,J) + TEMP*B(I,K) + 230 CONTINUE + END IF + 240 CONTINUE + TEMP = ALPHA + IF (NOUNIT) TEMP = TEMP*A(K,K) + IF (TEMP.NE.ONE) THEN + DO 250 I = 1,M + B(I,K) = TEMP*B(I,K) + 250 CONTINUE + END IF + 260 CONTINUE + ELSE + DO 300 K = N,1,-1 + DO 280 J = K + 1,N + IF (A(J,K).NE.ZERO) THEN + TEMP = ALPHA*A(J,K) + DO 270 I = 1,M + B(I,J) = B(I,J) + TEMP*B(I,K) + 270 CONTINUE + END IF + 280 CONTINUE + TEMP = ALPHA + IF (NOUNIT) TEMP = TEMP*A(K,K) + IF (TEMP.NE.ONE) THEN + DO 290 I = 1,M + B(I,K) = TEMP*B(I,K) + 290 CONTINUE + END IF + 300 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of DTRMM . +* + END diff --git a/lib/linalg/dtrmv.f b/lib/linalg/dtrmv.f new file mode 100644 index 0000000000..b1ca93a452 --- /dev/null +++ b/lib/linalg/dtrmv.f @@ -0,0 +1,281 @@ + SUBROUTINE DTRMV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX) +* .. Scalar Arguments .. + INTEGER INCX,LDA,N + CHARACTER DIAG,TRANS,UPLO +* .. +* .. Array Arguments .. + DOUBLE PRECISION A(LDA,*),X(*) +* .. +* +* Purpose +* ======= +* +* DTRMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular matrix. +* +* Arguments +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := A'*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Further Details +* =============== +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER (ZERO=0.0D+0) +* .. +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I,INFO,IX,J,JX,KX + LOGICAL NOUNIT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN + INFO = 1 + ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND. + + .NOT.LSAME(TRANS,'C')) THEN + INFO = 2 + ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN + INFO = 3 + ELSE IF (N.LT.0) THEN + INFO = 4 + ELSE IF (LDA.LT.MAX(1,N)) THEN + INFO = 6 + ELSE IF (INCX.EQ.0) THEN + INFO = 8 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('DTRMV ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF (N.EQ.0) RETURN +* + NOUNIT = LSAME(DIAG,'N') +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF (INCX.LE.0) THEN + KX = 1 - (N-1)*INCX + ELSE IF (INCX.NE.1) THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF (LSAME(TRANS,'N')) THEN +* +* Form x := A*x. +* + IF (LSAME(UPLO,'U')) THEN + IF (INCX.EQ.1) THEN + DO 20 J = 1,N + IF (X(J).NE.ZERO) THEN + TEMP = X(J) + DO 10 I = 1,J - 1 + X(I) = X(I) + TEMP*A(I,J) + 10 CONTINUE + IF (NOUNIT) X(J) = X(J)*A(J,J) + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40 J = 1,N + IF (X(JX).NE.ZERO) THEN + TEMP = X(JX) + IX = KX + DO 30 I = 1,J - 1 + X(IX) = X(IX) + TEMP*A(I,J) + IX = IX + INCX + 30 CONTINUE + IF (NOUNIT) X(JX) = X(JX)*A(J,J) + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE + IF (INCX.EQ.1) THEN + DO 60 J = N,1,-1 + IF (X(J).NE.ZERO) THEN + TEMP = X(J) + DO 50 I = N,J + 1,-1 + X(I) = X(I) + TEMP*A(I,J) + 50 CONTINUE + IF (NOUNIT) X(J) = X(J)*A(J,J) + END IF + 60 CONTINUE + ELSE + KX = KX + (N-1)*INCX + JX = KX + DO 80 J = N,1,-1 + IF (X(JX).NE.ZERO) THEN + TEMP = X(JX) + IX = KX + DO 70 I = N,J + 1,-1 + X(IX) = X(IX) + TEMP*A(I,J) + IX = IX - INCX + 70 CONTINUE + IF (NOUNIT) X(JX) = X(JX)*A(J,J) + END IF + JX = JX - INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x. +* + IF (LSAME(UPLO,'U')) THEN + IF (INCX.EQ.1) THEN + DO 100 J = N,1,-1 + TEMP = X(J) + IF (NOUNIT) TEMP = TEMP*A(J,J) + DO 90 I = J - 1,1,-1 + TEMP = TEMP + A(I,J)*X(I) + 90 CONTINUE + X(J) = TEMP + 100 CONTINUE + ELSE + JX = KX + (N-1)*INCX + DO 120 J = N,1,-1 + TEMP = X(JX) + IX = JX + IF (NOUNIT) TEMP = TEMP*A(J,J) + DO 110 I = J - 1,1,-1 + IX = IX - INCX + TEMP = TEMP + A(I,J)*X(IX) + 110 CONTINUE + X(JX) = TEMP + JX = JX - INCX + 120 CONTINUE + END IF + ELSE + IF (INCX.EQ.1) THEN + DO 140 J = 1,N + TEMP = X(J) + IF (NOUNIT) TEMP = TEMP*A(J,J) + DO 130 I = J + 1,N + TEMP = TEMP + A(I,J)*X(I) + 130 CONTINUE + X(J) = TEMP + 140 CONTINUE + ELSE + JX = KX + DO 160 J = 1,N + TEMP = X(JX) + IX = JX + IF (NOUNIT) TEMP = TEMP*A(J,J) + DO 150 I = J + 1,N + IX = IX + INCX + TEMP = TEMP + A(I,J)*X(IX) + 150 CONTINUE + X(JX) = TEMP + JX = JX + INCX + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of DTRMV . +* + END diff --git a/lib/linalg/dtrsm.f b/lib/linalg/dtrsm.f new file mode 100644 index 0000000000..888111042d --- /dev/null +++ b/lib/linalg/dtrsm.f @@ -0,0 +1,376 @@ + SUBROUTINE DTRSM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA + INTEGER LDA,LDB,M,N + CHARACTER DIAG,SIDE,TRANSA,UPLO +* .. +* .. Array Arguments .. + DOUBLE PRECISION A(LDA,*),B(LDB,*) +* .. +* +* Purpose +* ======= +* +* DTRSM solves one of the matrix equations +* +* op( A )*X = alpha*B, or X*op( A ) = alpha*B, +* +* where alpha is a scalar, X and B are m by n matrices, A is a unit, or +* non-unit, upper or lower triangular matrix and op( A ) is one of +* +* op( A ) = A or op( A ) = A'. +* +* The matrix X is overwritten on B. +* +* Arguments +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether op( A ) appears on the left +* or right of X as follows: +* +* SIDE = 'L' or 'l' op( A )*X = alpha*B. +* +* SIDE = 'R' or 'r' X*op( A ) = alpha*B. +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix A is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n' op( A ) = A. +* +* TRANSA = 'T' or 't' op( A ) = A'. +* +* TRANSA = 'C' or 'c' op( A ) = A'. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit triangular +* as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of B. M must be at +* least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of B. N must be +* at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. When alpha is +* zero then A is not referenced and B need not be set before +* entry. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m +* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. +* Before entry with UPLO = 'U' or 'u', the leading k by k +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading k by k +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' +* then LDA must be at least max( 1, n ). +* Unchanged on exit. +* +* B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the right-hand side matrix B, and on exit is +* overwritten by the solution matrix X. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* Further Details +* =============== +* +* Level 3 Blas routine. +* +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I,INFO,J,K,NROWA + LOGICAL LSIDE,NOUNIT,UPPER +* .. +* .. Parameters .. + DOUBLE PRECISION ONE,ZERO + PARAMETER (ONE=1.0D+0,ZERO=0.0D+0) +* .. +* +* Test the input parameters. +* + LSIDE = LSAME(SIDE,'L') + IF (LSIDE) THEN + NROWA = M + ELSE + NROWA = N + END IF + NOUNIT = LSAME(DIAG,'N') + UPPER = LSAME(UPLO,'U') +* + INFO = 0 + IF ((.NOT.LSIDE) .AND. (.NOT.LSAME(SIDE,'R'))) THEN + INFO = 1 + ELSE IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN + INFO = 2 + ELSE IF ((.NOT.LSAME(TRANSA,'N')) .AND. + + (.NOT.LSAME(TRANSA,'T')) .AND. + + (.NOT.LSAME(TRANSA,'C'))) THEN + INFO = 3 + ELSE IF ((.NOT.LSAME(DIAG,'U')) .AND. (.NOT.LSAME(DIAG,'N'))) THEN + INFO = 4 + ELSE IF (M.LT.0) THEN + INFO = 5 + ELSE IF (N.LT.0) THEN + INFO = 6 + ELSE IF (LDA.LT.MAX(1,NROWA)) THEN + INFO = 9 + ELSE IF (LDB.LT.MAX(1,M)) THEN + INFO = 11 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('DTRSM ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF (M.EQ.0 .OR. N.EQ.0) RETURN +* +* And when alpha.eq.zero. +* + IF (ALPHA.EQ.ZERO) THEN + DO 20 J = 1,N + DO 10 I = 1,M + B(I,J) = ZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* +* Start the operations. +* + IF (LSIDE) THEN + IF (LSAME(TRANSA,'N')) THEN +* +* Form B := alpha*inv( A )*B. +* + IF (UPPER) THEN + DO 60 J = 1,N + IF (ALPHA.NE.ONE) THEN + DO 30 I = 1,M + B(I,J) = ALPHA*B(I,J) + 30 CONTINUE + END IF + DO 50 K = M,1,-1 + IF (B(K,J).NE.ZERO) THEN + IF (NOUNIT) B(K,J) = B(K,J)/A(K,K) + DO 40 I = 1,K - 1 + B(I,J) = B(I,J) - B(K,J)*A(I,K) + 40 CONTINUE + END IF + 50 CONTINUE + 60 CONTINUE + ELSE + DO 100 J = 1,N + IF (ALPHA.NE.ONE) THEN + DO 70 I = 1,M + B(I,J) = ALPHA*B(I,J) + 70 CONTINUE + END IF + DO 90 K = 1,M + IF (B(K,J).NE.ZERO) THEN + IF (NOUNIT) B(K,J) = B(K,J)/A(K,K) + DO 80 I = K + 1,M + B(I,J) = B(I,J) - B(K,J)*A(I,K) + 80 CONTINUE + END IF + 90 CONTINUE + 100 CONTINUE + END IF + ELSE +* +* Form B := alpha*inv( A' )*B. +* + IF (UPPER) THEN + DO 130 J = 1,N + DO 120 I = 1,M + TEMP = ALPHA*B(I,J) + DO 110 K = 1,I - 1 + TEMP = TEMP - A(K,I)*B(K,J) + 110 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(I,I) + B(I,J) = TEMP + 120 CONTINUE + 130 CONTINUE + ELSE + DO 160 J = 1,N + DO 150 I = M,1,-1 + TEMP = ALPHA*B(I,J) + DO 140 K = I + 1,M + TEMP = TEMP - A(K,I)*B(K,J) + 140 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(I,I) + B(I,J) = TEMP + 150 CONTINUE + 160 CONTINUE + END IF + END IF + ELSE + IF (LSAME(TRANSA,'N')) THEN +* +* Form B := alpha*B*inv( A ). +* + IF (UPPER) THEN + DO 210 J = 1,N + IF (ALPHA.NE.ONE) THEN + DO 170 I = 1,M + B(I,J) = ALPHA*B(I,J) + 170 CONTINUE + END IF + DO 190 K = 1,J - 1 + IF (A(K,J).NE.ZERO) THEN + DO 180 I = 1,M + B(I,J) = B(I,J) - A(K,J)*B(I,K) + 180 CONTINUE + END IF + 190 CONTINUE + IF (NOUNIT) THEN + TEMP = ONE/A(J,J) + DO 200 I = 1,M + B(I,J) = TEMP*B(I,J) + 200 CONTINUE + END IF + 210 CONTINUE + ELSE + DO 260 J = N,1,-1 + IF (ALPHA.NE.ONE) THEN + DO 220 I = 1,M + B(I,J) = ALPHA*B(I,J) + 220 CONTINUE + END IF + DO 240 K = J + 1,N + IF (A(K,J).NE.ZERO) THEN + DO 230 I = 1,M + B(I,J) = B(I,J) - A(K,J)*B(I,K) + 230 CONTINUE + END IF + 240 CONTINUE + IF (NOUNIT) THEN + TEMP = ONE/A(J,J) + DO 250 I = 1,M + B(I,J) = TEMP*B(I,J) + 250 CONTINUE + END IF + 260 CONTINUE + END IF + ELSE +* +* Form B := alpha*B*inv( A' ). +* + IF (UPPER) THEN + DO 310 K = N,1,-1 + IF (NOUNIT) THEN + TEMP = ONE/A(K,K) + DO 270 I = 1,M + B(I,K) = TEMP*B(I,K) + 270 CONTINUE + END IF + DO 290 J = 1,K - 1 + IF (A(J,K).NE.ZERO) THEN + TEMP = A(J,K) + DO 280 I = 1,M + B(I,J) = B(I,J) - TEMP*B(I,K) + 280 CONTINUE + END IF + 290 CONTINUE + IF (ALPHA.NE.ONE) THEN + DO 300 I = 1,M + B(I,K) = ALPHA*B(I,K) + 300 CONTINUE + END IF + 310 CONTINUE + ELSE + DO 360 K = 1,N + IF (NOUNIT) THEN + TEMP = ONE/A(K,K) + DO 320 I = 1,M + B(I,K) = TEMP*B(I,K) + 320 CONTINUE + END IF + DO 340 J = K + 1,N + IF (A(J,K).NE.ZERO) THEN + TEMP = A(J,K) + DO 330 I = 1,M + B(I,J) = B(I,J) - TEMP*B(I,K) + 330 CONTINUE + END IF + 340 CONTINUE + IF (ALPHA.NE.ONE) THEN + DO 350 I = 1,M + B(I,K) = ALPHA*B(I,K) + 350 CONTINUE + END IF + 360 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of DTRSM . +* + END diff --git a/lib/linalg/dtrsv.f b/lib/linalg/dtrsv.f new file mode 100644 index 0000000000..847fd3917d --- /dev/null +++ b/lib/linalg/dtrsv.f @@ -0,0 +1,282 @@ + SUBROUTINE DTRSV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX) +* .. Scalar Arguments .. + INTEGER INCX,LDA,N + CHARACTER DIAG,TRANS,UPLO +* .. +* .. Array Arguments .. + DOUBLE PRECISION A(LDA,*),X(*) +* .. +* +* Purpose +* ======= +* +* DTRSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular matrix. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Arguments +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' A'*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER (ZERO=0.0D+0) +* .. +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I,INFO,IX,J,JX,KX + LOGICAL NOUNIT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN + INFO = 1 + ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND. + + .NOT.LSAME(TRANS,'C')) THEN + INFO = 2 + ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN + INFO = 3 + ELSE IF (N.LT.0) THEN + INFO = 4 + ELSE IF (LDA.LT.MAX(1,N)) THEN + INFO = 6 + ELSE IF (INCX.EQ.0) THEN + INFO = 8 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('DTRSV ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF (N.EQ.0) RETURN +* + NOUNIT = LSAME(DIAG,'N') +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF (INCX.LE.0) THEN + KX = 1 - (N-1)*INCX + ELSE IF (INCX.NE.1) THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF (LSAME(TRANS,'N')) THEN +* +* Form x := inv( A )*x. +* + IF (LSAME(UPLO,'U')) THEN + IF (INCX.EQ.1) THEN + DO 20 J = N,1,-1 + IF (X(J).NE.ZERO) THEN + IF (NOUNIT) X(J) = X(J)/A(J,J) + TEMP = X(J) + DO 10 I = J - 1,1,-1 + X(I) = X(I) - TEMP*A(I,J) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + JX = KX + (N-1)*INCX + DO 40 J = N,1,-1 + IF (X(JX).NE.ZERO) THEN + IF (NOUNIT) X(JX) = X(JX)/A(J,J) + TEMP = X(JX) + IX = JX + DO 30 I = J - 1,1,-1 + IX = IX - INCX + X(IX) = X(IX) - TEMP*A(I,J) + 30 CONTINUE + END IF + JX = JX - INCX + 40 CONTINUE + END IF + ELSE + IF (INCX.EQ.1) THEN + DO 60 J = 1,N + IF (X(J).NE.ZERO) THEN + IF (NOUNIT) X(J) = X(J)/A(J,J) + TEMP = X(J) + DO 50 I = J + 1,N + X(I) = X(I) - TEMP*A(I,J) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80 J = 1,N + IF (X(JX).NE.ZERO) THEN + IF (NOUNIT) X(JX) = X(JX)/A(J,J) + TEMP = X(JX) + IX = JX + DO 70 I = J + 1,N + IX = IX + INCX + X(IX) = X(IX) - TEMP*A(I,J) + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A' )*x. +* + IF (LSAME(UPLO,'U')) THEN + IF (INCX.EQ.1) THEN + DO 100 J = 1,N + TEMP = X(J) + DO 90 I = 1,J - 1 + TEMP = TEMP - A(I,J)*X(I) + 90 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(J,J) + X(J) = TEMP + 100 CONTINUE + ELSE + JX = KX + DO 120 J = 1,N + TEMP = X(JX) + IX = KX + DO 110 I = 1,J - 1 + TEMP = TEMP - A(I,J)*X(IX) + IX = IX + INCX + 110 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(J,J) + X(JX) = TEMP + JX = JX + INCX + 120 CONTINUE + END IF + ELSE + IF (INCX.EQ.1) THEN + DO 140 J = N,1,-1 + TEMP = X(J) + DO 130 I = N,J + 1,-1 + TEMP = TEMP - A(I,J)*X(I) + 130 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(J,J) + X(J) = TEMP + 140 CONTINUE + ELSE + KX = KX + (N-1)*INCX + JX = KX + DO 160 J = N,1,-1 + TEMP = X(JX) + IX = KX + DO 150 I = N,J + 1,-1 + TEMP = TEMP - A(I,J)*X(IX) + IX = IX - INCX + 150 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(J,J) + X(JX) = TEMP + JX = JX - INCX + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of DTRSV . +* + END diff --git a/lib/linalg/dtrti2.f b/lib/linalg/dtrti2.f new file mode 100644 index 0000000000..2091b693dd --- /dev/null +++ b/lib/linalg/dtrti2.f @@ -0,0 +1,147 @@ + SUBROUTINE DTRTI2( UPLO, DIAG, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* DTRTI2 computes the inverse of a real upper or lower triangular +* matrix. +* +* This is the Level 2 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the triangular matrix A. If UPLO = 'U', the +* leading n by n upper triangular part of the array A contains +* the upper triangular matrix, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n by n lower triangular part of the array A contains +* the lower triangular matrix, and the strictly upper +* triangular part of A is not referenced. If DIAG = 'U', the +* diagonal elements of A are also not referenced and are +* assumed to be 1. +* +* On exit, the (triangular) inverse of the original matrix, in +* the same storage format. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DTRMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTRTI2', -INFO ) + RETURN + END IF +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix. +* + DO 10 J = 1, N + IF( NOUNIT ) THEN + A( J, J ) = ONE / A( J, J ) + AJJ = -A( J, J ) + ELSE + AJJ = -ONE + END IF +* +* Compute elements 1:j-1 of j-th column. +* + CALL DTRMV( 'Upper', 'No transpose', DIAG, J-1, A, LDA, + $ A( 1, J ), 1 ) + CALL DSCAL( J-1, AJJ, A( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* Compute inverse of lower triangular matrix. +* + DO 20 J = N, 1, -1 + IF( NOUNIT ) THEN + A( J, J ) = ONE / A( J, J ) + AJJ = -A( J, J ) + ELSE + AJJ = -ONE + END IF + IF( J.LT.N ) THEN +* +* Compute elements j+1:n of j-th column. +* + CALL DTRMV( 'Lower', 'No transpose', DIAG, N-J, + $ A( J+1, J+1 ), LDA, A( J+1, J ), 1 ) + CALL DSCAL( N-J, AJJ, A( J+1, J ), 1 ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of DTRTI2 +* + END diff --git a/lib/linalg/dtrtri.f b/lib/linalg/dtrtri.f new file mode 100644 index 0000000000..6a04cdf1f7 --- /dev/null +++ b/lib/linalg/dtrtri.f @@ -0,0 +1,177 @@ + SUBROUTINE DTRTRI( UPLO, DIAG, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* DTRTRI computes the inverse of a real upper or lower triangular +* matrix A. +* +* This is the Level 3 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the triangular matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of the array A contains +* the upper triangular matrix, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of the array A contains +* the lower triangular matrix, and the strictly upper +* triangular part of A is not referenced. If DIAG = 'U', the +* diagonal elements of A are also not referenced and are +* assumed to be 1. +* On exit, the (triangular) inverse of the original matrix, in +* the same storage format. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, A(i,i) is exactly zero. The triangular +* matrix is singular and its inverse can not be computed. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JB, NB, NN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DTRMM, DTRSM, DTRTI2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTRTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity if non-unit. +* + IF( NOUNIT ) THEN + DO 10 INFO = 1, N + IF( A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + INFO = 0 + END IF +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'DTRTRI', UPLO // DIAG, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL DTRTI2( UPLO, DIAG, N, A, LDA, INFO ) + ELSE +* +* Use blocked code +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix +* + DO 20 J = 1, N, NB + JB = MIN( NB, N-J+1 ) +* +* Compute rows 1:j-1 of current block column +* + CALL DTRMM( 'Left', 'Upper', 'No transpose', DIAG, J-1, + $ JB, ONE, A, LDA, A( 1, J ), LDA ) + CALL DTRSM( 'Right', 'Upper', 'No transpose', DIAG, J-1, + $ JB, -ONE, A( J, J ), LDA, A( 1, J ), LDA ) +* +* Compute inverse of current diagonal block +* + CALL DTRTI2( 'Upper', DIAG, JB, A( J, J ), LDA, INFO ) + 20 CONTINUE + ELSE +* +* Compute inverse of lower triangular matrix +* + NN = ( ( N-1 ) / NB )*NB + 1 + DO 30 J = NN, 1, -NB + JB = MIN( NB, N-J+1 ) + IF( J+JB.LE.N ) THEN +* +* Compute rows j+jb:n of current block column +* + CALL DTRMM( 'Left', 'Lower', 'No transpose', DIAG, + $ N-J-JB+1, JB, ONE, A( J+JB, J+JB ), LDA, + $ A( J+JB, J ), LDA ) + CALL DTRSM( 'Right', 'Lower', 'No transpose', DIAG, + $ N-J-JB+1, JB, -ONE, A( J, J ), LDA, + $ A( J+JB, J ), LDA ) + END IF +* +* Compute inverse of current diagonal block +* + CALL DTRTI2( 'Lower', DIAG, JB, A( J, J ), LDA, INFO ) + 30 CONTINUE + END IF + END IF +* + RETURN +* +* End of DTRTRI +* + END diff --git a/lib/linalg/idamax.f b/lib/linalg/idamax.f new file mode 100644 index 0000000000..fa45c1d69c --- /dev/null +++ b/lib/linalg/idamax.f @@ -0,0 +1,58 @@ + INTEGER FUNCTION IDAMAX(N,DX,INCX) +* .. Scalar Arguments .. + INTEGER INCX,N +* .. +* .. Array Arguments .. + DOUBLE PRECISION DX(*) +* .. +* +* Purpose +* ======= +* +* IDAMAX finds the index of element having max. absolute value. +* +* Further Details +* =============== +* +* jack dongarra, linpack, 3/11/78. +* modified 3/93 to return if incx .le. 0. +* modified 12/3/93, array(1) declarations changed to array(*) +* +* ===================================================================== +* +* .. Local Scalars .. + DOUBLE PRECISION DMAX + INTEGER I,IX +* .. +* .. Intrinsic Functions .. + INTRINSIC DABS +* .. + IDAMAX = 0 + IF (N.LT.1 .OR. INCX.LE.0) RETURN + IDAMAX = 1 + IF (N.EQ.1) RETURN + IF (INCX.EQ.1) GO TO 20 +* +* code for increment not equal to 1 +* + IX = 1 + DMAX = DABS(DX(1)) + IX = IX + INCX + DO 10 I = 2,N + IF (DABS(DX(IX)).LE.DMAX) GO TO 5 + IDAMAX = I + DMAX = DABS(DX(IX)) + 5 IX = IX + INCX + 10 CONTINUE + RETURN +* +* code for increment equal to 1 +* + 20 DMAX = DABS(DX(1)) + DO 30 I = 2,N + IF (DABS(DX(I)).LE.DMAX) GO TO 30 + IDAMAX = I + DMAX = DABS(DX(I)) + 30 CONTINUE + RETURN + END diff --git a/lib/linalg/ieeeck.f b/lib/linalg/ieeeck.f new file mode 100644 index 0000000000..b2c1d4909f --- /dev/null +++ b/lib/linalg/ieeeck.f @@ -0,0 +1,148 @@ + INTEGER FUNCTION IEEECK( ISPEC, ZERO, ONE ) +* +* -- LAPACK auxiliary routine (version 3.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2006 +* +* .. Scalar Arguments .. + INTEGER ISPEC + REAL ONE, ZERO +* .. +* +* Purpose +* ======= +* +* IEEECK is called from the ILAENV to verify that Infinity and +* possibly NaN arithmetic is safe (i.e. will not trap). +* +* Arguments +* ========= +* +* ISPEC (input) INTEGER +* Specifies whether to test just for inifinity arithmetic +* or whether to test for infinity and NaN arithmetic. +* = 0: Verify infinity arithmetic only. +* = 1: Verify infinity and NaN arithmetic. +* +* ZERO (input) REAL +* Must contain the value 0.0 +* This is passed to prevent the compiler from optimizing +* away this code. +* +* ONE (input) REAL +* Must contain the value 1.0 +* This is passed to prevent the compiler from optimizing +* away this code. +* +* RETURN VALUE: INTEGER +* = 0: Arithmetic failed to produce the correct answers +* = 1: Arithmetic produced the correct answers +* +* .. Local Scalars .. + REAL NAN1, NAN2, NAN3, NAN4, NAN5, NAN6, NEGINF, + $ NEGZRO, NEWZRO, POSINF +* .. +* .. Executable Statements .. + IEEECK = 1 +* + POSINF = ONE / ZERO + IF( POSINF.LE.ONE ) THEN + IEEECK = 0 + RETURN + END IF +* + NEGINF = -ONE / ZERO + IF( NEGINF.GE.ZERO ) THEN + IEEECK = 0 + RETURN + END IF +* + NEGZRO = ONE / ( NEGINF+ONE ) + IF( NEGZRO.NE.ZERO ) THEN + IEEECK = 0 + RETURN + END IF +* + NEGINF = ONE / NEGZRO + IF( NEGINF.GE.ZERO ) THEN + IEEECK = 0 + RETURN + END IF +* + NEWZRO = NEGZRO + ZERO + IF( NEWZRO.NE.ZERO ) THEN + IEEECK = 0 + RETURN + END IF +* + POSINF = ONE / NEWZRO + IF( POSINF.LE.ONE ) THEN + IEEECK = 0 + RETURN + END IF +* + NEGINF = NEGINF*POSINF + IF( NEGINF.GE.ZERO ) THEN + IEEECK = 0 + RETURN + END IF +* + POSINF = POSINF*POSINF + IF( POSINF.LE.ONE ) THEN + IEEECK = 0 + RETURN + END IF +* +* +* +* +* Return if we were only asked to check infinity arithmetic +* + IF( ISPEC.EQ.0 ) + $ RETURN +* + NAN1 = POSINF + NEGINF +* + NAN2 = POSINF / NEGINF +* + NAN3 = POSINF / POSINF +* + NAN4 = POSINF*ZERO +* + NAN5 = NEGINF*NEGZRO +* + NAN6 = NAN5*0.0 +* + IF( NAN1.EQ.NAN1 ) THEN + IEEECK = 0 + RETURN + END IF +* + IF( NAN2.EQ.NAN2 ) THEN + IEEECK = 0 + RETURN + END IF +* + IF( NAN3.EQ.NAN3 ) THEN + IEEECK = 0 + RETURN + END IF +* + IF( NAN4.EQ.NAN4 ) THEN + IEEECK = 0 + RETURN + END IF +* + IF( NAN5.EQ.NAN5 ) THEN + IEEECK = 0 + RETURN + END IF +* + IF( NAN6.EQ.NAN6 ) THEN + IEEECK = 0 + RETURN + END IF +* + RETURN + END diff --git a/lib/linalg/ilaenv.f b/lib/linalg/ilaenv.f new file mode 100644 index 0000000000..8ed8aed657 --- /dev/null +++ b/lib/linalg/ilaenv.f @@ -0,0 +1,555 @@ + INTEGER FUNCTION ILAENV( ISPEC, NAME, OPTS, N1, N2, N3, N4 ) +* +* -- LAPACK auxiliary routine (version 3.2.1) -- +* +* -- April 2009 -- +* +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER*( * ) NAME, OPTS + INTEGER ISPEC, N1, N2, N3, N4 +* .. +* +* Purpose +* ======= +* +* ILAENV is called from the LAPACK routines to choose problem-dependent +* parameters for the local environment. See ISPEC for a description of +* the parameters. +* +* ILAENV returns an INTEGER +* if ILAENV >= 0: ILAENV returns the value of the parameter specified by ISPEC +* if ILAENV < 0: if ILAENV = -k, the k-th argument had an illegal value. +* +* This version provides a set of parameters which should give good, +* but not optimal, performance on many of the currently available +* computers. Users are encouraged to modify this subroutine to set +* the tuning parameters for their particular machine using the option +* and problem size information in the arguments. +* +* This routine will not function correctly if it is converted to all +* lower case. Converting it to all upper case is allowed. +* +* Arguments +* ========= +* +* ISPEC (input) INTEGER +* Specifies the parameter to be returned as the value of +* ILAENV. +* = 1: the optimal blocksize; if this value is 1, an unblocked +* algorithm will give the best performance. +* = 2: the minimum block size for which the block routine +* should be used; if the usable block size is less than +* this value, an unblocked routine should be used. +* = 3: the crossover point (in a block routine, for N less +* than this value, an unblocked routine should be used) +* = 4: the number of shifts, used in the nonsymmetric +* eigenvalue routines (DEPRECATED) +* = 5: the minimum column dimension for blocking to be used; +* rectangular blocks must have dimension at least k by m, +* where k is given by ILAENV(2,...) and m by ILAENV(5,...) +* = 6: the crossover point for the SVD (when reducing an m by n +* matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds +* this value, a QR factorization is used first to reduce +* the matrix to a triangular form.) +* = 7: the number of processors +* = 8: the crossover point for the multishift QR method +* for nonsymmetric eigenvalue problems (DEPRECATED) +* = 9: maximum size of the subproblems at the bottom of the +* computation tree in the divide-and-conquer algorithm +* (used by xGELSD and xGESDD) +* =10: ieee NaN arithmetic can be trusted not to trap +* =11: infinity arithmetic can be trusted not to trap +* 12 <= ISPEC <= 16: +* xHSEQR or one of its subroutines, +* see IPARMQ for detailed explanation +* +* NAME (input) CHARACTER*(*) +* The name of the calling subroutine, in either upper case or +* lower case. +* +* OPTS (input) CHARACTER*(*) +* The character options to the subroutine NAME, concatenated +* into a single character string. For example, UPLO = 'U', +* TRANS = 'T', and DIAG = 'N' for a triangular routine would +* be specified as OPTS = 'UTN'. +* +* N1 (input) INTEGER +* N2 (input) INTEGER +* N3 (input) INTEGER +* N4 (input) INTEGER +* Problem dimensions for the subroutine NAME; these may not all +* be required. +* +* Further Details +* =============== +* +* The following conventions have been used when calling ILAENV from the +* LAPACK routines: +* 1) OPTS is a concatenation of all of the character options to +* subroutine NAME, in the same order that they appear in the +* argument list for NAME, even if they are not used in determining +* the value of the parameter specified by ISPEC. +* 2) The problem dimensions N1, N2, N3, N4 are specified in the order +* that they appear in the argument list for NAME. N1 is used +* first, N2 second, and so on, and unused problem dimensions are +* passed a value of -1. +* 3) The parameter value returned by ILAENV is checked for validity in +* the calling subroutine. For example, ILAENV is used to retrieve +* the optimal blocksize for STRTRI as follows: +* +* NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) +* IF( NB.LE.1 ) NB = MAX( 1, N ) +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IC, IZ, NB, NBMIN, NX + LOGICAL CNAME, SNAME + CHARACTER C1*1, C2*2, C4*2, C3*3, SUBNAM*6 +* .. +* .. Intrinsic Functions .. + INTRINSIC CHAR, ICHAR, INT, MIN, REAL +* .. +* .. External Functions .. + INTEGER IEEECK, IPARMQ + EXTERNAL IEEECK, IPARMQ +* .. +* .. Executable Statements .. +* + GO TO ( 10, 10, 10, 80, 90, 100, 110, 120, + $ 130, 140, 150, 160, 160, 160, 160, 160 )ISPEC +* +* Invalid value for ISPEC +* + ILAENV = -1 + RETURN +* + 10 CONTINUE +* +* Convert NAME to upper case if the first character is lower case. +* + ILAENV = 1 + SUBNAM = NAME + IC = ICHAR( SUBNAM( 1: 1 ) ) + IZ = ICHAR( 'Z' ) + IF( IZ.EQ.90 .OR. IZ.EQ.122 ) THEN +* +* ASCII character set +* + IF( IC.GE.97 .AND. IC.LE.122 ) THEN + SUBNAM( 1: 1 ) = CHAR( IC-32 ) + DO 20 I = 2, 6 + IC = ICHAR( SUBNAM( I: I ) ) + IF( IC.GE.97 .AND. IC.LE.122 ) + $ SUBNAM( I: I ) = CHAR( IC-32 ) + 20 CONTINUE + END IF +* + ELSE IF( IZ.EQ.233 .OR. IZ.EQ.169 ) THEN +* +* EBCDIC character set +* + IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR. + $ ( IC.GE.145 .AND. IC.LE.153 ) .OR. + $ ( IC.GE.162 .AND. IC.LE.169 ) ) THEN + SUBNAM( 1: 1 ) = CHAR( IC+64 ) + DO 30 I = 2, 6 + IC = ICHAR( SUBNAM( I: I ) ) + IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR. + $ ( IC.GE.145 .AND. IC.LE.153 ) .OR. + $ ( IC.GE.162 .AND. IC.LE.169 ) )SUBNAM( I: + $ I ) = CHAR( IC+64 ) + 30 CONTINUE + END IF +* + ELSE IF( IZ.EQ.218 .OR. IZ.EQ.250 ) THEN +* +* Prime machines: ASCII+128 +* + IF( IC.GE.225 .AND. IC.LE.250 ) THEN + SUBNAM( 1: 1 ) = CHAR( IC-32 ) + DO 40 I = 2, 6 + IC = ICHAR( SUBNAM( I: I ) ) + IF( IC.GE.225 .AND. IC.LE.250 ) + $ SUBNAM( I: I ) = CHAR( IC-32 ) + 40 CONTINUE + END IF + END IF +* + C1 = SUBNAM( 1: 1 ) + SNAME = C1.EQ.'S' .OR. C1.EQ.'D' + CNAME = C1.EQ.'C' .OR. C1.EQ.'Z' + IF( .NOT.( CNAME .OR. SNAME ) ) + $ RETURN + C2 = SUBNAM( 2: 3 ) + C3 = SUBNAM( 4: 6 ) + C4 = C3( 2: 3 ) +* + GO TO ( 50, 60, 70 )ISPEC +* + 50 CONTINUE +* +* ISPEC = 1: block size +* +* In these examples, separate code is provided for setting NB for +* real and complex. We assume that NB will take the same value in +* single or double precision. +* + NB = 1 +* + IF( C2.EQ.'GE' ) THEN + IF( C3.EQ.'TRF' ) THEN + IF( SNAME ) THEN + NB = 64 + ELSE + NB = 64 + END IF + ELSE IF( C3.EQ.'QRF' .OR. C3.EQ.'RQF' .OR. C3.EQ.'LQF' .OR. + $ C3.EQ.'QLF' ) THEN + IF( SNAME ) THEN + NB = 32 + ELSE + NB = 32 + END IF + ELSE IF( C3.EQ.'HRD' ) THEN + IF( SNAME ) THEN + NB = 32 + ELSE + NB = 32 + END IF + ELSE IF( C3.EQ.'BRD' ) THEN + IF( SNAME ) THEN + NB = 32 + ELSE + NB = 32 + END IF + ELSE IF( C3.EQ.'TRI' ) THEN + IF( SNAME ) THEN + NB = 64 + ELSE + NB = 64 + END IF + END IF + ELSE IF( C2.EQ.'PO' ) THEN + IF( C3.EQ.'TRF' ) THEN + IF( SNAME ) THEN + NB = 64 + ELSE + NB = 64 + END IF + END IF + ELSE IF( C2.EQ.'SY' ) THEN + IF( C3.EQ.'TRF' ) THEN + IF( SNAME ) THEN + NB = 64 + ELSE + NB = 64 + END IF + ELSE IF( SNAME .AND. C3.EQ.'TRD' ) THEN + NB = 32 + ELSE IF( SNAME .AND. C3.EQ.'GST' ) THEN + NB = 64 + END IF + ELSE IF( CNAME .AND. C2.EQ.'HE' ) THEN + IF( C3.EQ.'TRF' ) THEN + NB = 64 + ELSE IF( C3.EQ.'TRD' ) THEN + NB = 32 + ELSE IF( C3.EQ.'GST' ) THEN + NB = 64 + END IF + ELSE IF( SNAME .AND. C2.EQ.'OR' ) THEN + IF( C3( 1: 1 ).EQ.'G' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. C4.EQ. + $ 'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. C4.EQ.'BR' ) + $ THEN + NB = 32 + END IF + ELSE IF( C3( 1: 1 ).EQ.'M' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. C4.EQ. + $ 'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. C4.EQ.'BR' ) + $ THEN + NB = 32 + END IF + END IF + ELSE IF( CNAME .AND. C2.EQ.'UN' ) THEN + IF( C3( 1: 1 ).EQ.'G' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. C4.EQ. + $ 'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. C4.EQ.'BR' ) + $ THEN + NB = 32 + END IF + ELSE IF( C3( 1: 1 ).EQ.'M' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. C4.EQ. + $ 'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. C4.EQ.'BR' ) + $ THEN + NB = 32 + END IF + END IF + ELSE IF( C2.EQ.'GB' ) THEN + IF( C3.EQ.'TRF' ) THEN + IF( SNAME ) THEN + IF( N4.LE.64 ) THEN + NB = 1 + ELSE + NB = 32 + END IF + ELSE + IF( N4.LE.64 ) THEN + NB = 1 + ELSE + NB = 32 + END IF + END IF + END IF + ELSE IF( C2.EQ.'PB' ) THEN + IF( C3.EQ.'TRF' ) THEN + IF( SNAME ) THEN + IF( N2.LE.64 ) THEN + NB = 1 + ELSE + NB = 32 + END IF + ELSE + IF( N2.LE.64 ) THEN + NB = 1 + ELSE + NB = 32 + END IF + END IF + END IF + ELSE IF( C2.EQ.'TR' ) THEN + IF( C3.EQ.'TRI' ) THEN + IF( SNAME ) THEN + NB = 64 + ELSE + NB = 64 + END IF + END IF + ELSE IF( C2.EQ.'LA' ) THEN + IF( C3.EQ.'UUM' ) THEN + IF( SNAME ) THEN + NB = 64 + ELSE + NB = 64 + END IF + END IF + ELSE IF( SNAME .AND. C2.EQ.'ST' ) THEN + IF( C3.EQ.'EBZ' ) THEN + NB = 1 + END IF + END IF + ILAENV = NB + RETURN +* + 60 CONTINUE +* +* ISPEC = 2: minimum block size +* + NBMIN = 2 + IF( C2.EQ.'GE' ) THEN + IF( C3.EQ.'QRF' .OR. C3.EQ.'RQF' .OR. C3.EQ.'LQF' .OR. C3.EQ. + $ 'QLF' ) THEN + IF( SNAME ) THEN + NBMIN = 2 + ELSE + NBMIN = 2 + END IF + ELSE IF( C3.EQ.'HRD' ) THEN + IF( SNAME ) THEN + NBMIN = 2 + ELSE + NBMIN = 2 + END IF + ELSE IF( C3.EQ.'BRD' ) THEN + IF( SNAME ) THEN + NBMIN = 2 + ELSE + NBMIN = 2 + END IF + ELSE IF( C3.EQ.'TRI' ) THEN + IF( SNAME ) THEN + NBMIN = 2 + ELSE + NBMIN = 2 + END IF + END IF + ELSE IF( C2.EQ.'SY' ) THEN + IF( C3.EQ.'TRF' ) THEN + IF( SNAME ) THEN + NBMIN = 8 + ELSE + NBMIN = 8 + END IF + ELSE IF( SNAME .AND. C3.EQ.'TRD' ) THEN + NBMIN = 2 + END IF + ELSE IF( CNAME .AND. C2.EQ.'HE' ) THEN + IF( C3.EQ.'TRD' ) THEN + NBMIN = 2 + END IF + ELSE IF( SNAME .AND. C2.EQ.'OR' ) THEN + IF( C3( 1: 1 ).EQ.'G' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. C4.EQ. + $ 'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. C4.EQ.'BR' ) + $ THEN + NBMIN = 2 + END IF + ELSE IF( C3( 1: 1 ).EQ.'M' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. C4.EQ. + $ 'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. C4.EQ.'BR' ) + $ THEN + NBMIN = 2 + END IF + END IF + ELSE IF( CNAME .AND. C2.EQ.'UN' ) THEN + IF( C3( 1: 1 ).EQ.'G' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. C4.EQ. + $ 'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. C4.EQ.'BR' ) + $ THEN + NBMIN = 2 + END IF + ELSE IF( C3( 1: 1 ).EQ.'M' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. C4.EQ. + $ 'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. C4.EQ.'BR' ) + $ THEN + NBMIN = 2 + END IF + END IF + END IF + ILAENV = NBMIN + RETURN +* + 70 CONTINUE +* +* ISPEC = 3: crossover point +* + NX = 0 + IF( C2.EQ.'GE' ) THEN + IF( C3.EQ.'QRF' .OR. C3.EQ.'RQF' .OR. C3.EQ.'LQF' .OR. C3.EQ. + $ 'QLF' ) THEN + IF( SNAME ) THEN + NX = 128 + ELSE + NX = 128 + END IF + ELSE IF( C3.EQ.'HRD' ) THEN + IF( SNAME ) THEN + NX = 128 + ELSE + NX = 128 + END IF + ELSE IF( C3.EQ.'BRD' ) THEN + IF( SNAME ) THEN + NX = 128 + ELSE + NX = 128 + END IF + END IF + ELSE IF( C2.EQ.'SY' ) THEN + IF( SNAME .AND. C3.EQ.'TRD' ) THEN + NX = 32 + END IF + ELSE IF( CNAME .AND. C2.EQ.'HE' ) THEN + IF( C3.EQ.'TRD' ) THEN + NX = 32 + END IF + ELSE IF( SNAME .AND. C2.EQ.'OR' ) THEN + IF( C3( 1: 1 ).EQ.'G' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. C4.EQ. + $ 'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. C4.EQ.'BR' ) + $ THEN + NX = 128 + END IF + END IF + ELSE IF( CNAME .AND. C2.EQ.'UN' ) THEN + IF( C3( 1: 1 ).EQ.'G' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. C4.EQ. + $ 'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. C4.EQ.'BR' ) + $ THEN + NX = 128 + END IF + END IF + END IF + ILAENV = NX + RETURN +* + 80 CONTINUE +* +* ISPEC = 4: number of shifts (used by xHSEQR) +* + ILAENV = 6 + RETURN +* + 90 CONTINUE +* +* ISPEC = 5: minimum column dimension (not used) +* + ILAENV = 2 + RETURN +* + 100 CONTINUE +* +* ISPEC = 6: crossover point for SVD (used by xGELSS and xGESVD) +* + ILAENV = INT( REAL( MIN( N1, N2 ) )*1.6E0 ) + RETURN +* + 110 CONTINUE +* +* ISPEC = 7: number of processors (not used) +* + ILAENV = 1 + RETURN +* + 120 CONTINUE +* +* ISPEC = 8: crossover point for multishift (used by xHSEQR) +* + ILAENV = 50 + RETURN +* + 130 CONTINUE +* +* ISPEC = 9: maximum size of the subproblems at the bottom of the +* computation tree in the divide-and-conquer algorithm +* (used by xGELSD and xGESDD) +* + ILAENV = 25 + RETURN +* + 140 CONTINUE +* +* ISPEC = 10: ieee NaN arithmetic can be trusted not to trap +* +* ILAENV = 0 + ILAENV = 1 + IF( ILAENV.EQ.1 ) THEN + ILAENV = IEEECK( 1, 0.0, 1.0 ) + END IF + RETURN +* + 150 CONTINUE +* +* ISPEC = 11: infinity arithmetic can be trusted not to trap +* +* ILAENV = 0 + ILAENV = 1 + IF( ILAENV.EQ.1 ) THEN + ILAENV = IEEECK( 0, 0.0, 1.0 ) + END IF + RETURN +* + 160 CONTINUE +* +* 12 <= ISPEC <= 16: xHSEQR or one of its subroutines. +* + ILAENV = IPARMQ( ISPEC, NAME, OPTS, N1, N2, N3, N4 ) + RETURN +* +* End of ILAENV +* + END diff --git a/lib/linalg/iparmq.f b/lib/linalg/iparmq.f new file mode 100644 index 0000000000..9a76099bb3 --- /dev/null +++ b/lib/linalg/iparmq.f @@ -0,0 +1,254 @@ + INTEGER FUNCTION IPARMQ( ISPEC, NAME, OPTS, N, ILO, IHI, LWORK ) +* +* -- LAPACK auxiliary routine (version 3.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2006 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, ISPEC, LWORK, N + CHARACTER NAME*( * ), OPTS*( * ) +* +* Purpose +* ======= +* +* This program sets problem and machine dependent parameters +* useful for xHSEQR and its subroutines. It is called whenever +* ILAENV is called with 12 <= ISPEC <= 16 +* +* Arguments +* ========= +* +* ISPEC (input) integer scalar +* ISPEC specifies which tunable parameter IPARMQ should +* return. +* +* ISPEC=12: (INMIN) Matrices of order nmin or less +* are sent directly to xLAHQR, the implicit +* double shift QR algorithm. NMIN must be +* at least 11. +* +* ISPEC=13: (INWIN) Size of the deflation window. +* This is best set greater than or equal to +* the number of simultaneous shifts NS. +* Larger matrices benefit from larger deflation +* windows. +* +* ISPEC=14: (INIBL) Determines when to stop nibbling and +* invest in an (expensive) multi-shift QR sweep. +* If the aggressive early deflation subroutine +* finds LD converged eigenvalues from an order +* NW deflation window and LD.GT.(NW*NIBBLE)/100, +* then the next QR sweep is skipped and early +* deflation is applied immediately to the +* remaining active diagonal block. Setting +* IPARMQ(ISPEC=14) = 0 causes TTQRE to skip a +* multi-shift QR sweep whenever early deflation +* finds a converged eigenvalue. Setting +* IPARMQ(ISPEC=14) greater than or equal to 100 +* prevents TTQRE from skipping a multi-shift +* QR sweep. +* +* ISPEC=15: (NSHFTS) The number of simultaneous shifts in +* a multi-shift QR iteration. +* +* ISPEC=16: (IACC22) IPARMQ is set to 0, 1 or 2 with the +* following meanings. +* 0: During the multi-shift QR sweep, +* xLAQR5 does not accumulate reflections and +* does not use matrix-matrix multiply to +* update the far-from-diagonal matrix +* entries. +* 1: During the multi-shift QR sweep, +* xLAQR5 and/or xLAQRaccumulates reflections and uses +* matrix-matrix multiply to update the +* far-from-diagonal matrix entries. +* 2: During the multi-shift QR sweep. +* xLAQR5 accumulates reflections and takes +* advantage of 2-by-2 block structure during +* matrix-matrix multiplies. +* (If xTRMM is slower than xGEMM, then +* IPARMQ(ISPEC=16)=1 may be more efficient than +* IPARMQ(ISPEC=16)=2 despite the greater level of +* arithmetic work implied by the latter choice.) +* +* NAME (input) character string +* Name of the calling subroutine +* +* OPTS (input) character string +* This is a concatenation of the string arguments to +* TTQRE. +* +* N (input) integer scalar +* N is the order of the Hessenberg matrix H. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that H is already upper triangular +* in rows and columns 1:ILO-1 and IHI+1:N. +* +* LWORK (input) integer scalar +* The amount of workspace available. +* +* Further Details +* =============== +* +* Little is known about how best to choose these parameters. +* It is possible to use different values of the parameters +* for each of CHSEQR, DHSEQR, SHSEQR and ZHSEQR. +* +* It is probably best to choose different parameters for +* different matrices and different parameters at different +* times during the iteration, but this has not been +* implemented --- yet. +* +* +* The best choices of most of the parameters depend +* in an ill-understood way on the relative execution +* rate of xLAQR3 and xLAQR5 and on the nature of each +* particular eigenvalue problem. Experiment may be the +* only practical way to determine which choices are most +* effective. +* +* Following is a list of default values supplied by IPARMQ. +* These defaults may be adjusted in order to attain better +* performance in any particular computational environment. +* +* IPARMQ(ISPEC=12) The xLAHQR vs xLAQR0 crossover point. +* Default: 75. (Must be at least 11.) +* +* IPARMQ(ISPEC=13) Recommended deflation window size. +* This depends on ILO, IHI and NS, the +* number of simultaneous shifts returned +* by IPARMQ(ISPEC=15). The default for +* (IHI-ILO+1).LE.500 is NS. The default +* for (IHI-ILO+1).GT.500 is 3*NS/2. +* +* IPARMQ(ISPEC=14) Nibble crossover point. Default: 14. +* +* IPARMQ(ISPEC=15) Number of simultaneous shifts, NS. +* a multi-shift QR iteration. +* +* If IHI-ILO+1 is ... +* +* greater than ...but less ... the +* or equal to ... than default is +* +* 0 30 NS = 2+ +* 30 60 NS = 4+ +* 60 150 NS = 10 +* 150 590 NS = ** +* 590 3000 NS = 64 +* 3000 6000 NS = 128 +* 6000 infinity NS = 256 +* +* (+) By default matrices of this order are +* passed to the implicit double shift routine +* xLAHQR. See IPARMQ(ISPEC=12) above. These +* values of NS are used only in case of a rare +* xLAHQR failure. +* +* (**) The asterisks (**) indicate an ad-hoc +* function increasing from 10 to 64. +* +* IPARMQ(ISPEC=16) Select structured matrix multiply. +* (See ISPEC=16 above for details.) +* Default: 3. +* +* ================================================================ +* .. Parameters .. + INTEGER INMIN, INWIN, INIBL, ISHFTS, IACC22 + PARAMETER ( INMIN = 12, INWIN = 13, INIBL = 14, + $ ISHFTS = 15, IACC22 = 16 ) + INTEGER NMIN, K22MIN, KACMIN, NIBBLE, KNWSWP + PARAMETER ( NMIN = 75, K22MIN = 14, KACMIN = 14, + $ NIBBLE = 14, KNWSWP = 500 ) + REAL TWO + PARAMETER ( TWO = 2.0 ) +* .. +* .. Local Scalars .. + INTEGER NH, NS +* .. +* .. Intrinsic Functions .. + INTRINSIC LOG, MAX, MOD, NINT, REAL +* .. +* .. Executable Statements .. + IF( ( ISPEC.EQ.ISHFTS ) .OR. ( ISPEC.EQ.INWIN ) .OR. + $ ( ISPEC.EQ.IACC22 ) ) THEN +* +* ==== Set the number simultaneous shifts ==== +* + NH = IHI - ILO + 1 + NS = 2 + IF( NH.GE.30 ) + $ NS = 4 + IF( NH.GE.60 ) + $ NS = 10 + IF( NH.GE.150 ) + $ NS = MAX( 10, NH / NINT( LOG( REAL( NH ) ) / LOG( TWO ) ) ) + IF( NH.GE.590 ) + $ NS = 64 + IF( NH.GE.3000 ) + $ NS = 128 + IF( NH.GE.6000 ) + $ NS = 256 + NS = MAX( 2, NS-MOD( NS, 2 ) ) + END IF +* + IF( ISPEC.EQ.INMIN ) THEN +* +* +* ===== Matrices of order smaller than NMIN get sent +* . to xLAHQR, the classic double shift algorithm. +* . This must be at least 11. ==== +* + IPARMQ = NMIN +* + ELSE IF( ISPEC.EQ.INIBL ) THEN +* +* ==== INIBL: skip a multi-shift qr iteration and +* . whenever aggressive early deflation finds +* . at least (NIBBLE*(window size)/100) deflations. ==== +* + IPARMQ = NIBBLE +* + ELSE IF( ISPEC.EQ.ISHFTS ) THEN +* +* ==== NSHFTS: The number of simultaneous shifts ===== +* + IPARMQ = NS +* + ELSE IF( ISPEC.EQ.INWIN ) THEN +* +* ==== NW: deflation window size. ==== +* + IF( NH.LE.KNWSWP ) THEN + IPARMQ = NS + ELSE + IPARMQ = 3*NS / 2 + END IF +* + ELSE IF( ISPEC.EQ.IACC22 ) THEN +* +* ==== IACC22: Whether to accumulate reflections +* . before updating the far-from-diagonal elements +* . and whether to use 2-by-2 block structure while +* . doing it. A small amount of work could be saved +* . by making this choice dependent also upon the +* . NH=IHI-ILO+1. +* + IPARMQ = 0 + IF( NS.GE.KACMIN ) + $ IPARMQ = 1 + IF( NS.GE.K22MIN ) + $ IPARMQ = 2 +* + ELSE +* ===== invalid value of ispec ===== + IPARMQ = -1 +* + END IF +* +* ==== End of IPARMQ ==== +* + END diff --git a/lib/linalg/lsame.f b/lib/linalg/lsame.f new file mode 100644 index 0000000000..5e9047e14e --- /dev/null +++ b/lib/linalg/lsame.f @@ -0,0 +1,86 @@ + LOGICAL FUNCTION LSAME( CA, CB ) +* +* -- LAPACK auxiliary routine (version 3.2) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER CA, CB +* .. +* +* Purpose +* ======= +* +* LSAME returns .TRUE. if CA is the same letter as CB regardless of +* case. +* +* Arguments +* ========= +* +* CA (input) CHARACTER*1 +* CB (input) CHARACTER*1 +* CA and CB specify the single characters to be compared. +* +* ===================================================================== +* +* .. Intrinsic Functions .. + INTRINSIC ICHAR +* .. +* .. Local Scalars .. + INTEGER INTA, INTB, ZCODE +* .. +* .. Executable Statements .. +* +* Test if the characters are equal +* + LSAME = CA.EQ.CB + IF( LSAME ) + $ RETURN +* +* Now test for equivalence if both characters are alphabetic. +* + ZCODE = ICHAR( 'Z' ) +* +* Use 'Z' rather than 'A' so that ASCII can be detected on Prime +* machines, on which ICHAR returns a value with bit 8 set. +* ICHAR('A') on Prime machines returns 193 which is the same as +* ICHAR('A') on an EBCDIC machine. +* + INTA = ICHAR( CA ) + INTB = ICHAR( CB ) +* + IF( ZCODE.EQ.90 .OR. ZCODE.EQ.122 ) THEN +* +* ASCII is assumed - ZCODE is the ASCII code of either lower or +* upper case 'Z'. +* + IF( INTA.GE.97 .AND. INTA.LE.122 ) INTA = INTA - 32 + IF( INTB.GE.97 .AND. INTB.LE.122 ) INTB = INTB - 32 +* + ELSE IF( ZCODE.EQ.233 .OR. ZCODE.EQ.169 ) THEN +* +* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or +* upper case 'Z'. +* + IF( INTA.GE.129 .AND. INTA.LE.137 .OR. + $ INTA.GE.145 .AND. INTA.LE.153 .OR. + $ INTA.GE.162 .AND. INTA.LE.169 ) INTA = INTA + 64 + IF( INTB.GE.129 .AND. INTB.LE.137 .OR. + $ INTB.GE.145 .AND. INTB.LE.153 .OR. + $ INTB.GE.162 .AND. INTB.LE.169 ) INTB = INTB + 64 +* + ELSE IF( ZCODE.EQ.218 .OR. ZCODE.EQ.250 ) THEN +* +* ASCII is assumed, on Prime machines - ZCODE is the ASCII code +* plus 128 of either lower or upper case 'Z'. +* + IF( INTA.GE.225 .AND. INTA.LE.250 ) INTA = INTA - 32 + IF( INTB.GE.225 .AND. INTB.LE.250 ) INTB = INTB - 32 + END IF + LSAME = INTA.EQ.INTB +* +* RETURN +* +* End of LSAME +* + END diff --git a/lib/linalg/xerbla.f b/lib/linalg/xerbla.f new file mode 100644 index 0000000000..3a84150e32 --- /dev/null +++ b/lib/linalg/xerbla.f @@ -0,0 +1,48 @@ + SUBROUTINE XERBLA( SRNAME, INFO ) +* +* -- LAPACK auxiliary routine (preliminary version) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER*(*) SRNAME + INTEGER INFO +* .. +* +* Purpose +* ======= +* +* XERBLA is an error handler for the LAPACK routines. +* It is called by an LAPACK routine if an input parameter has an +* invalid value. A message is printed and execution stops. +* +* Installers may consider modifying the STOP statement in order to +* call system-specific exception-handling facilities. +* +* Arguments +* ========= +* +* SRNAME (input) CHARACTER*(*) +* The name of the routine which called XERBLA. +* +* INFO (input) INTEGER +* The position of the invalid parameter in the parameter list +* of the calling routine. +* +* ===================================================================== +* +* .. Intrinsic Functions .. + INTRINSIC LEN_TRIM +* .. +* .. Executable Statements .. +* + WRITE( *, FMT = 9999 )SRNAME( 1:LEN_TRIM( SRNAME ) ), INFO +* + STOP +* + 9999 FORMAT( ' ** On entry to ', A, ' parameter number ', I2, ' had ', + $ 'an illegal value' ) +* +* End of XERBLA +* + END